]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/fourtube.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
15 class FourTubeIE(InfoExtractor
):
17 _VALID_URL
= r
'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)'
20 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
21 'md5': '6516c8ac63b03de06bc8eac14362db4f',
25 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
26 'uploader': 'WCP Club',
27 'uploader_id': 'wcp-club',
28 'upload_date': '20131031',
33 def _real_extract(self
, url
):
34 mobj
= re
.match(self
._VALID
_URL
, url
)
36 video_id
= mobj
.group('id')
37 webpage_url
= 'http://www.4tube.com/videos/' + video_id
38 webpage
= self
._download
_webpage
(webpage_url
, video_id
)
40 self
.report_extraction(video_id
)
42 playlist_json
= self
._html
_search
_regex
(r
'var playerConfigPlaylist\s+=\s+([^;]+)', webpage
, 'Playlist')
43 media_id
= self
._search
_regex
(r
'idMedia:\s*(\d+)', playlist_json
, 'Media Id')
44 sources
= self
._search
_regex
(r
'sources:\s*\[([^\]]*)\]', playlist_json
, 'Sources').split(',')
45 title
= self
._search
_regex
(r
'title:\s*"([^"]*)', playlist_json
, 'Title')
46 thumbnail_url
= self
._search
_regex
(r
'image:\s*"([^"]*)', playlist_json
, 'Thumbnail', fatal
=False)
48 uploader_str
= self
._search
_regex
(r
'<span>Uploaded by</span>(.*?)<span>', webpage
, 'uploader', fatal
=False)
49 mobj
= re
.search(r
'<a href="/sites/(?P<id>[^"]+)"><strong>(?P<name>[^<]+)</strong></a>', uploader_str
)
50 (uploader
, uploader_id
) = (mobj
.group('name'), mobj
.group('id')) if mobj
else (clean_html(uploader_str
), None)
55 description
= self
._html
_search
_meta
('description', webpage
, 'description')
57 upload_date
= self
._search
_regex
(r
'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description
, 'upload date',
60 upload_date
= unified_strdate(upload_date
)
61 view_count
= self
._search
_regex
(r
'Views: ([\d,\.]+)', description
, 'view count', fatal
=False)
63 view_count
= str_to_int(view_count
)
64 duration
= parse_duration(self
._search
_regex
(r
'Length: (\d+m\d+s)', description
, 'duration', fatal
=False))
66 token_url
= "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id
, "+".join(sources
))
68 b
'Content-Type': b
'application/x-www-form-urlencoded',
69 b
'Origin': b
'http://www.4tube.com',
71 token_req
= compat_urllib_request
.Request(token_url
, b
'{}', headers
)
72 tokens
= self
._download
_json
(token_req
, video_id
)
75 'url': tokens
[format
]['token'],
76 'format_id': format
+ 'p',
77 'resolution': format
+ 'p',
78 'quality': int(format
),
79 } for format
in sources
]
81 self
._sort
_formats
(formats
)
87 'thumbnail': thumbnail_url
,
89 'uploader_id': uploader_id
,
90 'upload_date': upload_date
,
91 'view_count': view_count
,
94 'webpage_url': webpage_url
,