]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/xtube.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  16 class XTubeIE(InfoExtractor
): 
  20                             https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-) 
  27         'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', 
  28         'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', 
  32             'title': 'strange erotica', 
  33             'description': 'contains:an ET kind of thing', 
  34             'uploader': 'greenshowers', 
  42         'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', 
  43         'only_matching': True, 
  45         'url': 'xtube:625837', 
  46         'only_matching': True, 
  49     def _real_extract(self
, url
): 
  50         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  51         video_id 
= mobj
.group('id') 
  52         display_id 
= mobj
.group('display_id') 
  56             url 
= 'http://www.xtube.com/watch.php?v=%s' % video_id
 
  58         req 
= sanitized_Request(url
) 
  59         req
.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') 
  60         webpage 
= self
._download
_webpage
(req
, display_id
) 
  62         sources 
= self
._parse
_json
(self
._search
_regex
( 
  63             r
'sources\s*:\s*({.+?}),', webpage
, 'sources'), video_id
) 
  66         for format_id
, format_url 
in sources
.items(): 
  69                 'format_id': format_id
, 
  70                 'height': int_or_none(format_id
), 
  72         self
._sort
_formats
(formats
) 
  74         title 
= self
._search
_regex
( 
  75             (r
'<h1>(?P<title>[^<]+)</h1>', r
'videoTitle\s*:\s*(["\'])(?P
<title
>.+?
)\
1'), 
  76             webpage, 'title
', group='title
') 
  77         description = self._search_regex( 
  78             r'</h1
>\s
*<p
>([^
<]+)', webpage, 'description
', fatal=False) 
  79         uploader = self._search_regex( 
  80             (r'<input[^
>]+name
="contentOwnerId"[^
>]+value
="([^"]+)"', 
  81              r'<span[^>]+class="nickname
"[^>]*>([^<]+)'), 
  82             webpage, 'uploader', fatal=False) 
  83         duration = parse_duration(self._search_regex( 
  84             r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>', 
  85             webpage, 'duration', fatal=False)) 
  86         view_count = str_to_int(self._search_regex( 
  87             r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>', 
  88             webpage, 'view count', fatal=False)) 
  89         comment_count = str_to_int(self._html_search_regex( 
  90             r'>Comments? \(([\d,\.]+)\)<', 
  91             webpage, 'comment count', fatal=False)) 
  95             'display_id': display_id, 
  97             'description': description, 
 100             'view_count': view_count, 
 101             'comment_count': comment_count, 
 107 class XTubeUserIE(InfoExtractor): 
 108     IE_DESC = 'XTube user profile' 
 109     _VALID_URL = r'https?://(?:www\.)?xtube\.com/profile/(?P<id>[^/]+-\d+)' 
 111         'url': 'http://www.xtube.com/profile/greenshowers-4056496', 
 113             'id': 'greenshowers-4056496', 
 116         'playlist_mincount': 155, 
 119     def _real_extract(self, url): 
 120         user_id = self._match_id(url) 
 123         for pagenum in itertools.count(1): 
 124             request = sanitized_Request( 
 125                 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum), 
 127                     'Cookie': 'popunder=4', 
 128                     'X-Requested-With': 'XMLHttpRequest', 
 132             page = self._download_json( 
 133                 request, user_id, 'Downloading videos JSON page %d' % pagenum) 
 135             html = page.get('html') 
 139             for video_id in orderedSet([video_id for _, video_id in re.findall( 
 140                     r'data-plid=(["\'])(.+?
)\
1', html)]): 
 141                 entries.append(self.url_result('xtube
:%s' % video_id, XTubeIE.ie_key())) 
 143             page_count = int_or_none(page.get('pageCount
')) 
 144             if not page_count or pagenum == page_count: 
 147         playlist = self.playlist_result(entries, user_id) 
 148         playlist['age_limit
'] = 18