]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vice.py
b8b8bf97968ea430a445c521622461736346af7b
2 from __future__
import unicode_literals
9 from . adobepass
import AdobePassIE
10 from . youtube
import YoutubeIE
11 from . common
import InfoExtractor
12 from .. compat
import compat_HTTPError
23 class ViceBaseIE ( AdobePassIE
):
24 def _extract_preplay_video ( self
, url
, locale
, webpage
):
25 watch_hub_data
= extract_attributes ( self
._ search
_ regex
(
26 r
'(?s)(<watch-hub\s*.+?</watch-hub>)' , webpage
, 'watch hub' ))
27 video_id
= watch_hub_data
[ 'vms-id' ]
28 title
= watch_hub_data
[ 'video-title' ]
31 is_locked
= watch_hub_data
. get ( 'video-locked' ) == '1'
33 resource
= self
._ get
_ mvpd
_ resource
(
34 'VICELAND' , title
, video_id
,
35 watch_hub_data
. get ( 'video-rating' ))
36 query
[ 'tvetoken' ] = self
._ extract
_ mvpd
_ auth
(
37 url
, video_id
, 'VICELAND' , resource
)
39 # signature generation algorithm is reverse engineered from signatureGenerator in
40 # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
41 # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
42 exp
= int ( time
. time ()) + 14400
45 'sign' : hashlib
. sha512 (( ' %s :GET: %d ' % ( video_id
, exp
)). encode ()). hexdigest (),
49 host
= 'www.viceland' if is_locked
else self
._ PREPLAY
_ HOST
50 preplay
= self
._ download
_ json
(
51 'https:// %s .com/ %s /preplay/ %s ' % ( host
, locale
, video_id
),
52 video_id
, query
= query
)
53 except ExtractorError
as e
:
54 if isinstance ( e
. cause
, compat_HTTPError
) and e
. cause
. code
== 400 :
55 error
= json
. loads ( e
. cause
. read (). decode ())
56 raise ExtractorError ( ' %s said: %s ' % (
57 self
. IE_NAME
, error
[ 'details' ]), expected
= True )
60 video_data
= preplay
[ 'video' ]
61 base
= video_data
[ 'base' ]
62 uplynk_preplay_url
= preplay
[ 'preplayURL' ]
63 episode
= video_data
. get ( 'episode' , {})
64 channel
= video_data
. get ( 'channel' , {})
67 cc_url
= preplay
. get ( 'ccURL' )
74 '_type' : 'url_transparent' ,
75 'url' : uplynk_preplay_url
,
78 'description' : base
. get ( 'body' ) or base
. get ( 'display_body' ),
79 'thumbnail' : watch_hub_data
. get ( 'cover-image' ) or watch_hub_data
. get ( 'thumbnail' ),
80 'duration' : int_or_none ( video_data
. get ( 'video_duration' )) or parse_duration ( watch_hub_data
. get ( 'video-duration' )),
81 'timestamp' : int_or_none ( video_data
. get ( 'created_at' ), 1000 ),
82 'age_limit' : parse_age_limit ( video_data
. get ( 'video_rating' )),
83 'series' : video_data
. get ( 'show_title' ) or watch_hub_data
. get ( 'show-title' ),
84 'episode_number' : int_or_none ( episode
. get ( 'episode_number' ) or watch_hub_data
. get ( 'episode' )),
85 'episode_id' : str_or_none ( episode
. get ( 'id' ) or video_data
. get ( 'episode_id' )),
86 'season_number' : int_or_none ( watch_hub_data
. get ( 'season' )),
87 'season_id' : str_or_none ( episode
. get ( 'season_id' )),
88 'uploader' : channel
. get ( 'base' , {}). get ( 'title' ) or watch_hub_data
. get ( 'channel-title' ),
89 'uploader_id' : str_or_none ( channel
. get ( 'id' )),
90 'subtitles' : subtitles
,
91 'ie_key' : 'UplynkPreplay' ,
95 class ViceIE ( ViceBaseIE
):
97 _VALID_URL
= r
'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
100 'url' : 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab' ,
101 'md5' : '7d3ae2f9ba5f196cdd9f9efd43657ac2' ,
103 'id' : 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj' ,
105 'title' : 'Monkey Labs of Holland' ,
106 'description' : 'md5:92b3c7dcbfe477f772dd4afa496c9149' ,
108 'add_ie' : [ 'Ooyala' ],
110 'url' : 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56' ,
112 'id' : '5816510690b70e6c5fd39a56' ,
114 'uploader' : 'Waypoint' ,
115 'title' : 'The Signal From Tölva' ,
116 'description' : 'md5:3927e3c79f9e8094606a2b3c5b5e55d5' ,
117 'uploader_id' : '57f7d621e05ca860fa9ccaf9' ,
118 'timestamp' : 1477941983 ,
119 'upload_date' : '20161031' ,
123 'skip_download' : True ,
125 'add_ie' : [ 'UplynkPreplay' ],
127 'url' : 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f' ,
129 'id' : '581b12b60a0e1f4c0fb6ea2f' ,
131 'title' : 'ULFs - Wien berüchtigste Grafitti Crew - Part 1' ,
132 'description' : '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>' ,
134 'uploader_id' : '57a204088cb727dec794c67b' ,
135 'timestamp' : 1485368119 ,
136 'upload_date' : '20170125' ,
141 'skip_download' : True ,
143 'add_ie' : [ 'UplynkPreplay' ],
145 'url' : 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4' ,
146 'only_matching' : True ,
148 _PREPLAY_HOST
= 'video.vice'
150 def _real_extract ( self
, url
):
151 locale
, video_id
= re
. match ( self
._ VALID
_U RL
, url
). groups ()
152 webpage
, urlh
= self
._ download
_ webpage
_ handle
( url
, video_id
)
153 embed_code
= self
._ search
_ regex
(
154 r
'embedCode=([^&\' "]+)', webpage,
155 'ooyala embed code', default=None)
157 return self.url_result('ooyala: %s ' % embed_code, 'Ooyala')
158 youtube_id = self._search_regex(
159 r'data-youtube-id=" ([ ^
"]+)" ', webpage, ' youtube
id ', default=None)
161 return self.url_result(youtube_id, ' Youtube
')
162 return self._extract_preplay_video(urlh.geturl(), locale, webpage)
165 class ViceShowIE(InfoExtractor):
166 IE_NAME = ' vice
: show
'
167 _VALID_URL = r' https?
://( ?
:.+ ?\
.) ?vice\
. com
/( ?
:[ ^
/]+/) ?show
/( ?P
< id >[ ^
/ ?
#&]+)'
170 'url' : 'https://munchies.vice.com/en/show/fuck-thats-delicious-2' ,
172 'id' : 'fuck-thats-delicious-2' ,
173 'title' : "Fuck, That's Delicious" ,
174 'description' : 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.' ,
176 'playlist_count' : 17 ,
179 def _real_extract ( self
, url
):
180 show_id
= self
._ match
_ id
( url
)
181 webpage
= self
._ download
_ webpage
( url
, show_id
)
184 self
. url_result ( video_url
, ViceIE
. ie_key ())
185 for video_url
, _
in re
. findall (
186 r
'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="( %s .*?)"'
187 % ViceIE
._ VALID
_U RL
, webpage
)]
189 title
= self
._ search
_ regex
(
190 r
'<title>(.+?)</title>' , webpage
, 'title' , default
= None )
192 title
= re
. sub ( r
'(.+)\s*\|\s*.+$' , r
'\1' , title
). strip ()
193 description
= self
._ html
_ search
_ meta
(
194 'description' , webpage
, 'description' )
196 return self
. playlist_result ( entries
, show_id
, title
, description
)
199 class ViceArticleIE ( InfoExtractor
):
200 IE_NAME
= 'vice:article'
201 _VALID_URL
= r
'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
204 'url' : 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah' ,
206 'id' : '58dc0a3dee202d2a0ccfcbd8' ,
208 'title' : 'Mormon War on Porn ' ,
209 'description' : 'md5:ad396a2481e7f8afb5ed486878421090' ,
211 'uploader_id' : '57a204088cb727dec794c693' ,
212 'timestamp' : 1489160690 ,
213 'upload_date' : '20170310' ,
217 'skip_download' : True ,
219 'add_ie' : [ 'UplynkPreplay' ],
221 'url' : 'https://www.vice.com/en_us/article/how-to-hack-a-car' ,
222 'md5' : 'a7ecf64ee4fa19b916c16f4b56184ae2' ,
226 'title' : 'How to Hack a Car: Phreaked Out (Episode 2)' ,
227 'description' : 'md5:ee95453f7ff495db8efe14ae8bf56f30' ,
228 'uploader_id' : 'MotherboardTV' ,
229 'uploader' : 'Motherboard' ,
230 'upload_date' : '20140529' ,
232 'add_ie' : [ 'Youtube' ],
234 'url' : 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1' ,
235 'only_matching' : True ,
237 'url' : 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229' ,
238 'only_matching' : True ,
241 def _real_extract ( self
, url
):
242 display_id
= self
._ match
_ id
( url
)
244 webpage
= self
._ download
_ webpage
( url
, display_id
)
246 prefetch_data
= self
._ parse
_ json
( self
._ search
_ regex
(
247 r
'window\.__PREFETCH_DATA\s*=\s*({.*});' ,
248 webpage
, 'prefetch data' ), display_id
)
249 body
= prefetch_data
[ 'body' ]
251 def _url_res ( video_url
, ie_key
):
253 '_type' : 'url_transparent' ,
255 'display_id' : display_id
,
259 embed_code
= self
._ search
_ regex
(
260 r
'embedCode=([^&\' "]+)', body,
261 'ooyala embed code', default=None)
263 return _url_res('ooyala: %s ' % embed_code, 'Ooyala')
265 youtube_url = YoutubeIE._extract_url(body)
267 return _url_res(youtube_url, YoutubeIE.ie_key())
269 video_url = self._html_search_regex(
270 r'data-video-url=" ([ ^
"]+)" ',
271 prefetch_data[' embed_code
'], ' video URL
')
273 return _url_res(video_url, ViceIE.ie_key())