X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/47d80ec0b18245caeb97018d4c1af18d0b5b972b..f6e6ebc1916f365c396a262787a63523463c8099:/youtube_dl/extractor/vice.py diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 8a00c8f..54e207b 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -20,7 +20,7 @@ from ..utils import ( class ViceBaseIE(AdobePassIE): - def _extract_preplay_video(self, url, webpage): + def _extract_preplay_video(self, url, locale, webpage): watch_hub_data = extract_attributes(self._search_regex( r'(?s)()', webpage, 'watch hub')) video_id = watch_hub_data['vms-id'] @@ -32,7 +32,8 @@ class ViceBaseIE(AdobePassIE): resource = self._get_mvpd_resource( 'VICELAND', title, video_id, watch_hub_data.get('video-rating')) - query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) + query['tvetoken'] = self._extract_mvpd_auth( + url, video_id, 'VICELAND', resource) # signature generation algorithm is reverse engineered from signatureGenerator in # webpack:///../shared/~/vice-player/dist/js/vice-player.js in @@ -45,11 +46,14 @@ class ViceBaseIE(AdobePassIE): try: host = 'www.viceland' if is_locked else self._PREPLAY_HOST - preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query) + preplay = self._download_json( + 'https://%s.com/%s/preplay/%s' % (host, locale, video_id), + video_id, query=query) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: error = json.loads(e.cause.read().decode()) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error['details']), expected=True) raise video_data = preplay['video'] @@ -70,10 +74,10 @@ class ViceBaseIE(AdobePassIE): 'url': uplynk_preplay_url, 'id': video_id, 'title': title, - 'description': base.get('body'), + 'description': base.get('body') or base.get('display_body'), 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), - 'timestamp': int_or_none(video_data.get('created_at')), + 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')), + 'timestamp': int_or_none(video_data.get('created_at'), 1000), 'age_limit': parse_age_limit(video_data.get('video_rating')), 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), @@ -88,41 +92,30 @@ class ViceBaseIE(AdobePassIE): class ViceIE(ViceBaseIE): - _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P[^/?#&]+)' + IE_NAME = 'vice' + _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P[^/]+)/)?videos?/(?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', - 'md5': 'e9d77741f9e42ba583e683cd170660f7', + 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', + 'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2', 'info_dict': { - 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', + 'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj', 'ext': 'flv', - 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', - 'duration': 725.983, + 'title': 'Monkey Labs of Holland', + 'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149', }, 'add_ie': ['Ooyala'], - }, { - 'url': 'http://www.vice.com/video/how-to-hack-a-car', - 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', - 'info_dict': { - 'id': '3jstaBeXgAs', - 'ext': 'mp4', - 'title': 'How to Hack a Car: Phreaked Out (Episode 2)', - 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30', - 'uploader_id': 'MotherboardTV', - 'uploader': 'Motherboard', - 'upload_date': '20140529', - }, - 'add_ie': ['Youtube'], }, { 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', - 'md5': '', 'info_dict': { 'id': '5816510690b70e6c5fd39a56', 'ext': 'mp4', 'uploader': 'Waypoint', 'title': 'The Signal From Tölva', + 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5', 'uploader_id': '57f7d621e05ca860fa9ccaf9', - 'timestamp': 1477941983938, + 'timestamp': 1477941983, + 'upload_date': '20161031', }, 'params': { # m3u8 download @@ -130,19 +123,31 @@ class ViceIE(ViceBaseIE): }, 'add_ie': ['UplynkPreplay'], }, { - 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', - 'only_matching': True, - }, { - 'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229', - 'only_matching': True, + 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f', + 'info_dict': { + 'id': '581b12b60a0e1f4c0fb6ea2f', + 'ext': 'mp4', + 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1', + 'description': '

Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.

', + 'uploader': 'VICE', + 'uploader_id': '57a204088cb727dec794c67b', + 'timestamp': 1485368119, + 'upload_date': '20170125', + 'age_limit': 14, + }, + 'params': { + # AES-encrypted m3u8 + 'skip_download': True, + }, + 'add_ie': ['UplynkPreplay'], }, { - 'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show', + 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', 'only_matching': True, }] _PREPLAY_HOST = 'video.vice' def _real_extract(self, url): - video_id = self._match_id(url) + locale, video_id = re.match(self._VALID_URL, url).groups() webpage, urlh = self._download_webpage_handle(url, video_id) embed_code = self._search_regex( r'embedCode=([^&\'"]+)', webpage, @@ -153,10 +158,11 @@ class ViceIE(ViceBaseIE): r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None) if youtube_id: return self.url_result(youtube_id, 'Youtube') - return self._extract_preplay_video(urlh.geturl(), webpage) + return self._extract_preplay_video(urlh.geturl(), locale, webpage) class ViceShowIE(InfoExtractor): + IE_NAME = 'vice:show' _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P[^/?#&]+)' _TEST = { @@ -183,6 +189,86 @@ class ViceShowIE(InfoExtractor): r'(.+?)', webpage, 'title', default=None) if title: title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip() - description = self._html_search_meta('description', webpage, 'description') + description = self._html_search_meta( + 'description', webpage, 'description') return self.playlist_result(entries, show_id, title, description) + + +class ViceArticleIE(InfoExtractor): + IE_NAME = 'vice:article' + _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P[^?#]+)' + + _TESTS = [{ + 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', + 'info_dict': { + 'id': '58dc0a3dee202d2a0ccfcbd8', + 'ext': 'mp4', + 'title': 'Mormon War on Porn ', + 'description': 'md5:ad396a2481e7f8afb5ed486878421090', + 'uploader': 'VICE', + 'uploader_id': '57a204088cb727dec794c693', + 'timestamp': 1489160690, + 'upload_date': '20170310', + }, + 'params': { + # AES-encrypted m3u8 + 'skip_download': True, + }, + 'add_ie': ['UplynkPreplay'], + }, { + 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car', + 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', + 'info_dict': { + 'id': '3jstaBeXgAs', + 'ext': 'mp4', + 'title': 'How to Hack a Car: Phreaked Out (Episode 2)', + 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30', + 'uploader_id': 'MotherboardTV', + 'uploader': 'Motherboard', + 'upload_date': '20140529', + }, + 'add_ie': ['Youtube'], + }, { + 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1', + 'only_matching': True, + }, { + 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + prefetch_data = self._parse_json(self._search_regex( + r'window\.__PREFETCH_DATA\s*=\s*({.*});', + webpage, 'prefetch data'), display_id) + body = prefetch_data['body'] + + def _url_res(video_url, ie_key): + return { + '_type': 'url_transparent', + 'url': video_url, + 'display_id': display_id, + 'ie_key': ie_key, + } + + embed_code = self._search_regex( + r'embedCode=([^&\'"]+)', body, + 'ooyala embed code', default=None) + if embed_code: + return _url_res('ooyala:%s' % embed_code, 'Ooyala') + + youtube_url = self._html_search_regex( + r']+src="(.*youtube\.com/.*)"', + body, 'YouTube URL', default=None) + if youtube_url: + return _url_res(youtube_url, 'Youtube') + + video_url = self._html_search_regex( + r'data-video-url="([^"]+)"', + prefetch_data['embed_code'], 'video URL') + + return _url_res(video_url, ViceIE.ie_key())