]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vice.py
2 from __future__
import unicode_literals
9 from . adobepass
import AdobePassIE
10 from . common
import InfoExtractor
11 from .. compat
import compat_HTTPError
22 class ViceBaseIE ( AdobePassIE
):
23 def _extract_preplay_video ( self
, url
, locale
, webpage
):
24 watch_hub_data
= extract_attributes ( self
._ search
_ regex
(
25 r
'(?s)(<watch-hub\s*.+?</watch-hub>)' , webpage
, 'watch hub' ))
26 video_id
= watch_hub_data
[ 'vms-id' ]
27 title
= watch_hub_data
[ 'video-title' ]
30 is_locked
= watch_hub_data
. get ( 'video-locked' ) == '1'
32 resource
= self
._ get
_ mvpd
_ resource
(
33 'VICELAND' , title
, video_id
,
34 watch_hub_data
. get ( 'video-rating' ))
35 query
[ 'tvetoken' ] = self
._ extract
_ mvpd
_ auth
(
36 url
, video_id
, 'VICELAND' , resource
)
38 # signature generation algorithm is reverse engineered from signatureGenerator in
39 # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
40 # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
41 exp
= int ( time
. time ()) + 14400
44 'sign' : hashlib
. sha512 (( ' %s :GET: %d ' % ( video_id
, exp
)). encode ()). hexdigest (),
48 host
= 'www.viceland' if is_locked
else self
._ PREPLAY
_ HOST
49 preplay
= self
._ download
_ json
(
50 'https:// %s .com/ %s /preplay/ %s ' % ( host
, locale
, video_id
),
51 video_id
, query
= query
)
52 except ExtractorError
as e
:
53 if isinstance ( e
. cause
, compat_HTTPError
) and e
. cause
. code
== 400 :
54 error
= json
. loads ( e
. cause
. read (). decode ())
55 raise ExtractorError ( ' %s said: %s ' % (
56 self
. IE_NAME
, error
[ 'details' ]), expected
= True )
59 video_data
= preplay
[ 'video' ]
60 base
= video_data
[ 'base' ]
61 uplynk_preplay_url
= preplay
[ 'preplayURL' ]
62 episode
= video_data
. get ( 'episode' , {})
63 channel
= video_data
. get ( 'channel' , {})
66 cc_url
= preplay
. get ( 'ccURL' )
73 '_type' : 'url_transparent' ,
74 'url' : uplynk_preplay_url
,
77 'description' : base
. get ( 'body' ) or base
. get ( 'display_body' ),
78 'thumbnail' : watch_hub_data
. get ( 'cover-image' ) or watch_hub_data
. get ( 'thumbnail' ),
79 'duration' : int_or_none ( video_data
. get ( 'video_duration' )) or parse_duration ( watch_hub_data
. get ( 'video-duration' )),
80 'timestamp' : int_or_none ( video_data
. get ( 'created_at' ), 1000 ),
81 'age_limit' : parse_age_limit ( video_data
. get ( 'video_rating' )),
82 'series' : video_data
. get ( 'show_title' ) or watch_hub_data
. get ( 'show-title' ),
83 'episode_number' : int_or_none ( episode
. get ( 'episode_number' ) or watch_hub_data
. get ( 'episode' )),
84 'episode_id' : str_or_none ( episode
. get ( 'id' ) or video_data
. get ( 'episode_id' )),
85 'season_number' : int_or_none ( watch_hub_data
. get ( 'season' )),
86 'season_id' : str_or_none ( episode
. get ( 'season_id' )),
87 'uploader' : channel
. get ( 'base' , {}). get ( 'title' ) or watch_hub_data
. get ( 'channel-title' ),
88 'uploader_id' : str_or_none ( channel
. get ( 'id' )),
89 'subtitles' : subtitles
,
90 'ie_key' : 'UplynkPreplay' ,
94 class ViceIE ( ViceBaseIE
):
96 _VALID_URL
= r
'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
99 'url' : 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab' ,
100 'md5' : '7d3ae2f9ba5f196cdd9f9efd43657ac2' ,
102 'id' : 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj' ,
104 'title' : 'Monkey Labs of Holland' ,
105 'description' : 'md5:92b3c7dcbfe477f772dd4afa496c9149' ,
107 'add_ie' : [ 'Ooyala' ],
109 'url' : 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56' ,
111 'id' : '5816510690b70e6c5fd39a56' ,
113 'uploader' : 'Waypoint' ,
114 'title' : 'The Signal From Tölva' ,
115 'description' : 'md5:3927e3c79f9e8094606a2b3c5b5e55d5' ,
116 'uploader_id' : '57f7d621e05ca860fa9ccaf9' ,
117 'timestamp' : 1477941983 ,
118 'upload_date' : '20161031' ,
122 'skip_download' : True ,
124 'add_ie' : [ 'UplynkPreplay' ],
126 'url' : 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f' ,
128 'id' : '581b12b60a0e1f4c0fb6ea2f' ,
130 'title' : 'ULFs - Wien berüchtigste Grafitti Crew - Part 1' ,
131 'description' : '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>' ,
133 'uploader_id' : '57a204088cb727dec794c67b' ,
134 'timestamp' : 1485368119 ,
135 'upload_date' : '20170125' ,
140 'skip_download' : True ,
142 'add_ie' : [ 'UplynkPreplay' ],
144 'url' : 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4' ,
145 'only_matching' : True ,
147 _PREPLAY_HOST
= 'video.vice'
149 def _real_extract ( self
, url
):
150 locale
, video_id
= re
. match ( self
._ VALID
_U RL
, url
). groups ()
151 webpage
, urlh
= self
._ download
_ webpage
_ handle
( url
, video_id
)
152 embed_code
= self
._ search
_ regex
(
153 r
'embedCode=([^&\' "]+)', webpage,
154 'ooyala embed code', default=None)
156 return self.url_result('ooyala: %s ' % embed_code, 'Ooyala')
157 youtube_id = self._search_regex(
158 r'data-youtube-id=" ([ ^
"]+)" ', webpage, ' youtube
id ', default=None)
160 return self.url_result(youtube_id, ' Youtube
')
161 return self._extract_preplay_video(urlh.geturl(), locale, webpage)
164 class ViceShowIE(InfoExtractor):
165 IE_NAME = ' vice
: show
'
166 _VALID_URL = r' https?
://( ?
:.+ ?\
.) ?vice\
. com
/( ?
:[ ^
/]+/) ?show
/( ?P
< id >[ ^
/ ?
#&]+)'
169 'url' : 'https://munchies.vice.com/en/show/fuck-thats-delicious-2' ,
171 'id' : 'fuck-thats-delicious-2' ,
172 'title' : "Fuck, That's Delicious" ,
173 'description' : 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.' ,
175 'playlist_count' : 17 ,
178 def _real_extract ( self
, url
):
179 show_id
= self
._ match
_ id
( url
)
180 webpage
= self
._ download
_ webpage
( url
, show_id
)
183 self
. url_result ( video_url
, ViceIE
. ie_key ())
184 for video_url
, _
in re
. findall (
185 r
'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="( %s .*?)"'
186 % ViceIE
._ VALID
_U RL
, webpage
)]
188 title
= self
._ search
_ regex
(
189 r
'<title>(.+?)</title>' , webpage
, 'title' , default
= None )
191 title
= re
. sub ( r
'(.+)\s*\|\s*.+$' , r
'\1' , title
). strip ()
192 description
= self
._ html
_ search
_ meta
(
193 'description' , webpage
, 'description' )
195 return self
. playlist_result ( entries
, show_id
, title
, description
)
198 class ViceArticleIE ( InfoExtractor
):
199 IE_NAME
= 'vice:article'
200 _VALID_URL
= r
'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
203 'url' : 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah' ,
205 'id' : '58dc0a3dee202d2a0ccfcbd8' ,
207 'title' : 'Mormon War on Porn ' ,
208 'description' : 'md5:ad396a2481e7f8afb5ed486878421090' ,
210 'uploader_id' : '57a204088cb727dec794c693' ,
211 'timestamp' : 1489160690 ,
212 'upload_date' : '20170310' ,
216 'skip_download' : True ,
218 'add_ie' : [ 'UplynkPreplay' ],
220 'url' : 'https://www.vice.com/en_us/article/how-to-hack-a-car' ,
221 'md5' : 'a7ecf64ee4fa19b916c16f4b56184ae2' ,
225 'title' : 'How to Hack a Car: Phreaked Out (Episode 2)' ,
226 'description' : 'md5:ee95453f7ff495db8efe14ae8bf56f30' ,
227 'uploader_id' : 'MotherboardTV' ,
228 'uploader' : 'Motherboard' ,
229 'upload_date' : '20140529' ,
231 'add_ie' : [ 'Youtube' ],
233 'url' : 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1' ,
234 'only_matching' : True ,
236 'url' : 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229' ,
237 'only_matching' : True ,
240 def _real_extract ( self
, url
):
241 display_id
= self
._ match
_ id
( url
)
243 webpage
= self
._ download
_ webpage
( url
, display_id
)
245 prefetch_data
= self
._ parse
_ json
( self
._ search
_ regex
(
246 r
'window\.__PREFETCH_DATA\s*=\s*({.*});' ,
247 webpage
, 'prefetch data' ), display_id
)
248 body
= prefetch_data
[ 'body' ]
250 def _url_res ( video_url
, ie_key
):
252 '_type' : 'url_transparent' ,
254 'display_id' : display_id
,
258 embed_code
= self
._ search
_ regex
(
259 r
'embedCode=([^&\' "]+)', body,
260 'ooyala embed code', default=None)
262 return _url_res('ooyala: %s ' % embed_code, 'Ooyala')
264 youtube_url = self._html_search_regex(
265 r'<iframe[^>]+src=" (.* youtube\
. com
/.*) "',
266 body, 'YouTube URL', default=None)
268 return _url_res(youtube_url, 'Youtube')
270 video_url = self._html_search_regex(
271 r'data-video-url=" ([ ^
"]+)" ',
272 prefetch_data[' embed_code
'], ' video URL
')
274 return _url_res(video_url, ViceIE.ie_key())