]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/fourtube.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  15 class FourTubeIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)' 
  20         'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', 
  21         'md5': '6516c8ac63b03de06bc8eac14362db4f', 
  25             'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', 
  26             'uploader': 'WCP Club', 
  27             'uploader_id': 'wcp-club', 
  28             'upload_date': '20131031', 
  33     def _real_extract(self
, url
): 
  34         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  36         video_id 
= mobj
.group('id') 
  37         webpage_url 
= 'http://www.4tube.com/videos/' + video_id
 
  38         webpage 
= self
._download
_webpage
(webpage_url
, video_id
) 
  40         self
.report_extraction(video_id
) 
  42         playlist_json 
= self
._html
_search
_regex
(r
'var playerConfigPlaylist\s+=\s+([^;]+)', webpage
, 'Playlist') 
  43         media_id 
= self
._search
_regex
(r
'idMedia:\s*(\d+)', playlist_json
, 'Media Id') 
  44         sources 
= self
._search
_regex
(r
'sources:\s*\[([^\]]*)\]', playlist_json
, 'Sources').split(',') 
  45         title 
= self
._search
_regex
(r
'title:\s*"([^"]*)', playlist_json
, 'Title') 
  46         thumbnail_url 
= self
._search
_regex
(r
'image:\s*"([^"]*)', playlist_json
, 'Thumbnail', fatal
=False) 
  48         uploader_str 
= self
._search
_regex
(r
'<span>Uploaded by</span>(.*?)<span>', webpage
, 'uploader', fatal
=False) 
  49         mobj 
= re
.search(r
'<a href="/sites/(?P<id>[^"]+)"><strong>(?P<name>[^<]+)</strong></a>', uploader_str
) 
  50         (uploader
, uploader_id
) = (mobj
.group('name'), mobj
.group('id')) if mobj 
else (clean_html(uploader_str
), None) 
  55         description 
= self
._html
_search
_meta
('description', webpage
, 'description') 
  57             upload_date 
= self
._search
_regex
(r
'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description
, 'upload date', 
  60                 upload_date 
= unified_strdate(upload_date
) 
  61             view_count 
= self
._search
_regex
(r
'Views: ([\d,\.]+)', description
, 'view count', fatal
=False) 
  63                 view_count 
= str_to_int(view_count
) 
  64             duration 
= parse_duration(self
._search
_regex
(r
'Length: (\d+m\d+s)', description
, 'duration', fatal
=False)) 
  66         token_url 
= "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id
, "+".join(sources
)) 
  68                 b
'Content-Type': b
'application/x-www-form-urlencoded', 
  69                 b
'Origin': b
'http://www.4tube.com', 
  71         token_req 
= compat_urllib_request
.Request(token_url
, b
'{}', headers
) 
  72         tokens 
= self
._download
_json
(token_req
, video_id
) 
  75             'url': tokens
[format
]['token'], 
  76             'format_id': format 
+ 'p', 
  77             'resolution': format 
+ 'p', 
  78             'quality': int(format
), 
  79             } for format 
in sources
] 
  81         self
._sort
_formats
(formats
) 
  87             'thumbnail': thumbnail_url
, 
  89             'uploader_id': uploader_id
, 
  90             'upload_date': upload_date
, 
  91             'view_count': view_count
, 
  94             'webpage_url': webpage_url
,