]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/viki.py
1 from __future__
import unicode_literals
17 from . common
import InfoExtractor
20 class VikiIE ( InfoExtractor
):
24 _USER_AGENT
= 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5'
26 _VALID_URL
= r
'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
28 'url' : 'http://www.viki.com/videos/1023585v-heirs-episode-14' ,
32 'title' : 'Heirs Episode 14' ,
34 'description' : 'md5:c4b17b9626dd4b143dcc4d855ba3474e' ,
35 'upload_date' : '20131121' ,
38 'skip' : 'Blocked in the US' ,
40 'url' : 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference' ,
41 'md5' : 'ca6493e6f0a6ec07da9aa8d6304b4b2c' ,
45 'description' : 'md5:d70b2f9428f5488321bfe1db10d612ea' ,
46 'upload_date' : '20150430' ,
47 'title' : ' \' The Avengers: Age of Ultron \' Press Conference' ,
50 'url' : 'http://www.viki.com/videos/1048879v-ankhon-dekhi' ,
54 'upload_date' : '20140820' ,
55 'description' : 'md5:54ff56d51bdfc7a30441ec967394e91c' ,
56 'title' : 'Ankhon Dekhi' ,
60 'skip_download' : True ,
64 def _real_extract ( self
, url
):
65 video_id
= self
._ match
_ id
( url
)
67 webpage
= self
._ download
_ webpage
( url
, video_id
)
68 title
= self
._ og
_ search
_ title
( webpage
)
69 description
= self
._ og
_ search
_ description
( webpage
)
70 thumbnail
= self
._ og
_ search
_ thumbnail
( webpage
)
72 uploader_m
= re
. search (
73 r
'<strong>Broadcast Network: </strong>\s*([^<]*)<' , webpage
)
74 if uploader_m
is None :
77 uploader
= uploader_m
. group ( 1 ). strip ()
79 rating_str
= self
._ html
_ search
_ regex
(
80 r
'<strong>Rating: </strong>\s*([^<]*)<' , webpage
,
81 'rating information' , default
= '' ). strip ()
82 age_limit
= US_RATINGS
. get ( rating_str
)
84 req
= compat_urllib_request
. Request (
85 'http://www.viki.com/player5_fragment/ %s ?action=show&controller=videos' % video_id
)
86 req
. add_header ( 'User-Agent' , self
._U SER
_ AGENT
)
87 info_webpage
= self
._ download
_ webpage
(
88 req
, video_id
, note
= 'Downloading info page' )
89 err_msg
= self
._ html
_ search
_ regex
( r
'<div[^>]+class="video-error[^>]+>(.+)</div>' , info_webpage
, 'error message' , default
= None )
91 if 'not available in your region' in err_msg
:
93 'Video %s is blocked from your location.' % video_id
,
96 raise ExtractorError ( 'Viki said: ' + err_msg
)
98 r
'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"' , info_webpage
)
100 raise ExtractorError ( 'Unable to find video URL' )
101 video_url
= unescapeHTML ( mobj
. group ( 'url' ))
102 video_ext
= mimetype2ext ( mobj
. group ( 'mime_type' ))
104 if determine_ext ( video_url
) == 'm3u8' :
105 formats
= self
._ extract
_ m
3u8_ formats
(
106 video_url
, video_id
, ext
= video_ext
)
113 upload_date_str
= self
._ html
_ search
_ regex
(
114 r
'"created_at":"([^"]+)"' , info_webpage
, 'upload date' )
116 unified_strdate ( upload_date_str
)
117 if upload_date_str
is not None
122 video_subtitles
= self
. extract_subtitles ( video_id
, info_webpage
)
128 'description' : description
,
129 'thumbnail' : thumbnail
,
130 'age_limit' : age_limit
,
131 'uploader' : uploader
,
132 'subtitles' : video_subtitles
,
133 'upload_date' : upload_date
,
136 def _get_subtitles ( self
, video_id
, info_webpage
):
138 for sturl_html
in re
. findall ( r
'<track src="([^"]+)"' , info_webpage
):
139 sturl
= unescapeHTML ( sturl_html
)
140 m
= re
. search ( r
'/(?P<lang>[a-z]+)\.vtt' , sturl
)
143 res
[ m
. group ( 'lang' )] = [{
144 'url' : compat_urlparse
. urljoin ( 'http://www.viki.com' , sturl
),