]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/adultswim.py
d/control: Build-depend on flake8 and python3-nose.
[youtubedl] / youtube_dl / extractor / adultswim.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5 import re
6
7 from .turner import TurnerBaseIE
8 from ..utils import (
9 determine_ext,
10 float_or_none,
11 int_or_none,
12 mimetype2ext,
13 parse_age_limit,
14 parse_iso8601,
15 strip_or_none,
16 try_get,
17 )
18
19
20 class AdultSwimIE(TurnerBaseIE):
21 _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
22
23 _TESTS = [{
24 'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
25 'info_dict': {
26 'id': 'rQxZvXQ4ROaSOqq-or2Mow',
27 'ext': 'mp4',
28 'title': 'Rick and Morty - Pilot',
29 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
30 'timestamp': 1543294800,
31 'upload_date': '20181127',
32 },
33 'params': {
34 # m3u8 download
35 'skip_download': True,
36 },
37 'expected_warnings': ['Unable to download f4m manifest'],
38 }, {
39 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
40 'info_dict': {
41 'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
42 'ext': 'mp4',
43 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
44 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
45 'upload_date': '20080124',
46 'timestamp': 1201150800,
47 },
48 'params': {
49 # m3u8 download
50 'skip_download': True,
51 },
52 'skip': '404 Not Found',
53 }, {
54 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
55 'info_dict': {
56 'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
57 'ext': 'mp4',
58 'title': 'Decker - Inside Decker: A New Hero',
59 'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
60 'timestamp': 1469480460,
61 'upload_date': '20160725',
62 },
63 'params': {
64 # m3u8 download
65 'skip_download': True,
66 },
67 'expected_warnings': ['Unable to download f4m manifest'],
68 }, {
69 'url': 'http://www.adultswim.com/videos/attack-on-titan',
70 'info_dict': {
71 'id': 'attack-on-titan',
72 'title': 'Attack on Titan',
73 'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
74 },
75 'playlist_mincount': 12,
76 }, {
77 'url': 'http://www.adultswim.com/videos/streams/williams-stream',
78 'info_dict': {
79 'id': 'd8DEBj7QRfetLsRgFnGEyg',
80 'ext': 'mp4',
81 'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
82 'description': 'original programming',
83 },
84 'params': {
85 # m3u8 download
86 'skip_download': True,
87 },
88 'skip': '404 Not Found',
89 }]
90
91 def _real_extract(self, url):
92 show_path, episode_path = re.match(self._VALID_URL, url).groups()
93 display_id = episode_path or show_path
94 query = '''query {
95 getShowBySlug(slug:"%s") {
96 %%s
97 }
98 }''' % show_path
99 if episode_path:
100 query = query % '''title
101 getVideoBySlug(slug:"%s") {
102 _id
103 auth
104 description
105 duration
106 episodeNumber
107 launchDate
108 mediaID
109 seasonNumber
110 poster
111 title
112 tvRating
113 }''' % episode_path
114 ['getVideoBySlug']
115 else:
116 query = query % '''metaDescription
117 title
118 videos(first:1000,sort:["episode_number"]) {
119 edges {
120 node {
121 _id
122 slug
123 }
124 }
125 }'''
126 show_data = self._download_json(
127 'https://www.adultswim.com/api/search', display_id,
128 data=json.dumps({'query': query}).encode(),
129 headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
130 if episode_path:
131 video_data = show_data['getVideoBySlug']
132 video_id = video_data['_id']
133 episode_title = title = video_data['title']
134 series = show_data.get('title')
135 if series:
136 title = '%s - %s' % (series, title)
137 info = {
138 'id': video_id,
139 'title': title,
140 'description': strip_or_none(video_data.get('description')),
141 'duration': float_or_none(video_data.get('duration')),
142 'formats': [],
143 'subtitles': {},
144 'age_limit': parse_age_limit(video_data.get('tvRating')),
145 'thumbnail': video_data.get('poster'),
146 'timestamp': parse_iso8601(video_data.get('launchDate')),
147 'series': series,
148 'season_number': int_or_none(video_data.get('seasonNumber')),
149 'episode': episode_title,
150 'episode_number': int_or_none(video_data.get('episodeNumber')),
151 }
152
153 auth = video_data.get('auth')
154 media_id = video_data.get('mediaID')
155 if media_id:
156 info.update(self._extract_ngtv_info(media_id, {
157 # CDN_TOKEN_APP_ID from:
158 # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
159 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
160 }, {
161 'url': url,
162 'site_name': 'AdultSwim',
163 'auth_required': auth,
164 }))
165
166 if not auth:
167 extract_data = self._download_json(
168 'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
169 video_id, query={'fields': 'stream'}, fatal=False) or {}
170 assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
171 for asset in assets:
172 asset_url = asset.get('url')
173 if not asset_url:
174 continue
175 ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
176 if ext == 'm3u8':
177 info['formats'].extend(self._extract_m3u8_formats(
178 asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
179 elif ext == 'f4m':
180 continue
181 # info['formats'].extend(self._extract_f4m_formats(
182 # asset_url, video_id, f4m_id='hds', fatal=False))
183 elif ext in ('scc', 'ttml', 'vtt'):
184 info['subtitles'].setdefault('en', []).append({
185 'url': asset_url,
186 })
187 self._sort_formats(info['formats'])
188
189 return info
190 else:
191 entries = []
192 for edge in show_data.get('videos', {}).get('edges', []):
193 video = edge.get('node') or {}
194 slug = video.get('slug')
195 if not slug:
196 continue
197 entries.append(self.url_result(
198 'http://adultswim.com/videos/%s/%s' % (show_path, slug),
199 'AdultSwim', video.get('_id')))
200 return self.playlist_result(
201 entries, show_path, show_data.get('title'),
202 strip_or_none(show_data.get('metaDescription')))