X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/46113edab215c2211a604c06245c16d5d4e57dcf..9865587cf7b579793ba274067c3e44065d5ff77d:/youtube_dl/extractor/adultswim.py diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index b4b40f2..8d1d9ac 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -1,143 +1,202 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re -from .common import InfoExtractor - -class AdultSwimIE(InfoExtractor): - _VALID_URL = r'https?://video\.adultswim\.com/(?P.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' - _TEST = { - 'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title', - 'playlist': [ - { - 'md5': '4da359ec73b58df4575cd01a610ba5dc', - 'info_dict': { - 'id': '8a250ba1450996e901453d7f02ca02f5', - 'ext': 'flv', - 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1', - 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', - 'uploader': 'Rick and Morty', - 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' - } - }, - { - 'md5': 'ffbdf55af9331c509d95350bd0cc1819', - 'info_dict': { - 'id': '8a250ba1450996e901453d7f4bd102f6', - 'ext': 'flv', - 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2', - 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', - 'uploader': 'Rick and Morty', - 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' - } - }, - { - 'md5': 'b92409635540304280b4b6c36bd14a0a', - 'info_dict': { - 'id': '8a250ba1450996e901453d7fa73c02f7', - 'ext': 'flv', - 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3', - 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', - 'uploader': 'Rick and Morty', - 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' - } - }, - { - 'md5': 'e8818891d60e47b29cd89d7b0278156d', - 'info_dict': { - 'id': '8a250ba1450996e901453d7fc8ba02f8', - 'ext': 'flv', - 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4', - 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', - 'uploader': 'Rick and Morty', - 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' - } - } - ] - } - - _video_extensions = { - '3500': 'flv', - '640': 'mp4', - '150': 'mp4', - 'ipad': 'm3u8', - 'iphone': 'm3u8' - } - _video_dimensions = { - '3500': (1280, 720), - '640': (480, 270), - '150': (320, 180) - } +from .turner import TurnerBaseIE +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, + mimetype2ext, + parse_age_limit, + parse_iso8601, + strip_or_none, + try_get, +) + + +class AdultSwimIE(TurnerBaseIE): + _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P[^/?#]+)(?:/(?P[^/?#]+))?' + + _TESTS = [{ + 'url': 'http://adultswim.com/videos/rick-and-morty/pilot', + 'info_dict': { + 'id': 'rQxZvXQ4ROaSOqq-or2Mow', + 'ext': 'mp4', + 'title': 'Rick and Morty - Pilot', + 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.', + 'timestamp': 1543294800, + 'upload_date': '20181127', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'], + }, { + 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/', + 'info_dict': { + 'id': 'sY3cMUR_TbuE4YmdjzbIcQ', + 'ext': 'mp4', + 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', + 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.', + 'upload_date': '20080124', + 'timestamp': 1201150800, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': '404 Not Found', + }, { + 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', + 'info_dict': { + 'id': 'I0LQFQkaSUaFp8PnAWHhoQ', + 'ext': 'mp4', + 'title': 'Decker - Inside Decker: A New Hero', + 'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.', + 'timestamp': 1469480460, + 'upload_date': '20160725', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'], + }, { + 'url': 'http://www.adultswim.com/videos/attack-on-titan', + 'info_dict': { + 'id': 'attack-on-titan', + 'title': 'Attack on Titan', + 'description': 'md5:41caa9416906d90711e31dc00cb7db7e', + }, + 'playlist_mincount': 12, + }, { + 'url': 'http://www.adultswim.com/videos/streams/williams-stream', + 'info_dict': { + 'id': 'd8DEBj7QRfetLsRgFnGEyg', + 'ext': 'mp4', + 'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'description': 'original programming', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': '404 Not Found', + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_path = mobj.group('path') - - webpage = self._download_webpage(url, video_path) - episode_id = self._html_search_regex( - r'', - webpage, 'episode_id') - title = self._og_search_title(webpage) - - index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id - idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index') - - episode_el = idoc.find('.//episode') - show_title = episode_el.attrib.get('collectionTitle') - episode_title = episode_el.attrib.get('title') - thumbnail = episode_el.attrib.get('thumbnailUrl') - description = episode_el.find('./description').text.strip() - - entries = [] - segment_els = episode_el.findall('./segments/segment') - - for part_num, segment_el in enumerate(segment_els): - segment_id = segment_el.attrib.get('id') - segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1) - thumbnail = segment_el.attrib.get('thumbnailUrl') - duration = segment_el.attrib.get('duration') - - segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id - idoc = self._download_xml( - segment_url, segment_title, - 'Downloading segment information', 'Unable to download segment information') - - formats = [] - file_els = idoc.findall('.//files/file') - - for file_el in file_els: - bitrate = file_el.attrib.get('bitrate') - type = file_el.attrib.get('type') - width, height = self._video_dimensions.get(bitrate, (None, None)) - formats.append({ - 'format_id': '%s-%s' % (bitrate, type), - 'url': file_el.text, - 'ext': self._video_extensions.get(bitrate, 'mp4'), - # The bitrate may not be a number (for example: 'iphone') - 'tbr': int(bitrate) if bitrate.isdigit() else None, - 'height': height, - 'width': width - }) - - self._sort_formats(formats) - - entries.append({ - 'id': segment_id, - 'title': segment_title, - 'formats': formats, - 'uploader': show_title, - 'thumbnail': thumbnail, - 'duration': duration, - 'description': description - }) - - return { - '_type': 'playlist', - 'id': episode_id, - 'display_id': video_path, - 'entries': entries, - 'title': '%s %s' % (show_title, episode_title), - 'description': description, - 'thumbnail': thumbnail + show_path, episode_path = re.match(self._VALID_URL, url).groups() + display_id = episode_path or show_path + query = '''query { + getShowBySlug(slug:"%s") { + %%s + } +}''' % show_path + if episode_path: + query = query % '''title + getVideoBySlug(slug:"%s") { + _id + auth + description + duration + episodeNumber + launchDate + mediaID + seasonNumber + poster + title + tvRating + }''' % episode_path + ['getVideoBySlug'] + else: + query = query % '''metaDescription + title + videos(first:1000,sort:["episode_number"]) { + edges { + node { + _id + slug } + } + }''' + show_data = self._download_json( + 'https://www.adultswim.com/api/search', display_id, + data=json.dumps({'query': query}).encode(), + headers={'Content-Type': 'application/json'})['data']['getShowBySlug'] + if episode_path: + video_data = show_data['getVideoBySlug'] + video_id = video_data['_id'] + episode_title = title = video_data['title'] + series = show_data.get('title') + if series: + title = '%s - %s' % (series, title) + info = { + 'id': video_id, + 'title': title, + 'description': strip_or_none(video_data.get('description')), + 'duration': float_or_none(video_data.get('duration')), + 'formats': [], + 'subtitles': {}, + 'age_limit': parse_age_limit(video_data.get('tvRating')), + 'thumbnail': video_data.get('poster'), + 'timestamp': parse_iso8601(video_data.get('launchDate')), + 'series': series, + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode': episode_title, + 'episode_number': int_or_none(video_data.get('episodeNumber')), + } + + auth = video_data.get('auth') + media_id = video_data.get('mediaID') + if media_id: + info.update(self._extract_ngtv_info(media_id, { + # CDN_TOKEN_APP_ID from: + # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js + 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE', + }, { + 'url': url, + 'site_name': 'AdultSwim', + 'auth_required': auth, + })) + + if not auth: + extract_data = self._download_json( + 'https://www.adultswim.com/api/shows/v1/videos/' + video_id, + video_id, query={'fields': 'stream'}, fatal=False) or {} + assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or [] + for asset in assets: + asset_url = asset.get('url') + if not asset_url: + continue + ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) + if ext == 'm3u8': + info['formats'].extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + continue + # info['formats'].extend(self._extract_f4m_formats( + # asset_url, video_id, f4m_id='hds', fatal=False)) + elif ext in ('scc', 'ttml', 'vtt'): + info['subtitles'].setdefault('en', []).append({ + 'url': asset_url, + }) + self._sort_formats(info['formats']) + + return info + else: + entries = [] + for edge in show_data.get('videos', {}).get('edges', []): + video = edge.get('node') or {} + slug = video.get('slug') + if not slug: + continue + entries.append(self.url_result( + 'http://adultswim.com/videos/%s/%s' % (show_path, slug), + 'AdultSwim', video.get('_id'))) + return self.playlist_result( + entries, show_path, show_data.get('title'), + strip_or_none(show_data.get('metaDescription')))