]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/funimation.py
Update upstream source from tag 'upstream/2020.03.24'
[youtubedl] / youtube_dl / extractor / funimation.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import random
5 import string
6
7 from .common import InfoExtractor
8 from ..compat import compat_HTTPError
9 from ..utils import (
10 determine_ext,
11 int_or_none,
12 js_to_json,
13 ExtractorError,
14 urlencode_postdata
15 )
16
17
18 class FunimationIE(InfoExtractor):
19 _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
20
21 _NETRC_MACHINE = 'funimation'
22 _TOKEN = None
23
24 _TESTS = [{
25 'url': 'https://www.funimation.com/shows/hacksign/role-play/',
26 'info_dict': {
27 'id': '91144',
28 'display_id': 'role-play',
29 'ext': 'mp4',
30 'title': '.hack//SIGN - Role Play',
31 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
32 'thumbnail': r're:https?://.*\.jpg',
33 },
34 'params': {
35 # m3u8 download
36 'skip_download': True,
37 },
38 }, {
39 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
40 'info_dict': {
41 'id': '210051',
42 'display_id': 'broadcast-dub-preview',
43 'ext': 'mp4',
44 'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
45 'thumbnail': r're:https?://.*\.(?:jpg|png)',
46 },
47 'params': {
48 # m3u8 download
49 'skip_download': True,
50 },
51 }, {
52 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
53 'only_matching': True,
54 }]
55
56 def _login(self):
57 username, password = self._get_login_info()
58 if username is None:
59 return
60 try:
61 data = self._download_json(
62 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
63 None, 'Logging in', data=urlencode_postdata({
64 'username': username,
65 'password': password,
66 }))
67 self._TOKEN = data['token']
68 except ExtractorError as e:
69 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
70 error = self._parse_json(e.cause.read().decode(), None)['error']
71 raise ExtractorError(error, expected=True)
72 raise
73
74 def _real_initialize(self):
75 self._login()
76
77 def _real_extract(self, url):
78 display_id = self._match_id(url)
79 webpage = self._download_webpage(url, display_id)
80
81 def _search_kane(name):
82 return self._search_regex(
83 r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
84 webpage, name, default=None)
85
86 title_data = self._parse_json(self._search_regex(
87 r'TITLE_DATA\s*=\s*({[^}]+})',
88 webpage, 'title data', default=''),
89 display_id, js_to_json, fatal=False) or {}
90
91 video_id = title_data.get('id') or self._search_regex([
92 r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
93 r'<iframe[^>]+src="/player/(\d+)',
94 ], webpage, 'video_id', default=None)
95 if not video_id:
96 player_url = self._html_search_meta([
97 'al:web:url',
98 'og:video:url',
99 'og:video:secure_url',
100 ], webpage, fatal=True)
101 video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
102
103 title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
104 series = _search_kane('showName')
105 if series:
106 title = '%s - %s' % (series, title)
107 description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
108
109 try:
110 headers = {}
111 if self._TOKEN:
112 headers['Authorization'] = 'Token %s' % self._TOKEN
113 sources = self._download_json(
114 'https://www.funimation.com/api/showexperience/%s/' % video_id,
115 video_id, headers=headers, query={
116 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
117 })['items']
118 except ExtractorError as e:
119 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
120 error = self._parse_json(e.cause.read(), video_id)['errors'][0]
121 raise ExtractorError('%s said: %s' % (
122 self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
123 raise
124
125 formats = []
126 for source in sources:
127 source_url = source.get('src')
128 if not source_url:
129 continue
130 source_type = source.get('videoType') or determine_ext(source_url)
131 if source_type == 'm3u8':
132 formats.extend(self._extract_m3u8_formats(
133 source_url, video_id, 'mp4',
134 m3u8_id='hls', fatal=False))
135 else:
136 formats.append({
137 'format_id': source_type,
138 'url': source_url,
139 })
140 self._sort_formats(formats)
141
142 return {
143 'id': video_id,
144 'display_id': display_id,
145 'title': title,
146 'description': description,
147 'thumbnail': self._og_search_thumbnail(webpage),
148 'series': series,
149 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
150 'episode_number': int_or_none(title_data.get('episodeNum')),
151 'episode': episode,
152 'season_id': title_data.get('seriesId'),
153 'formats': formats,
154 }