]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/drtv.py
New upstream version 2017.02.07
[youtubedl] / youtube_dl / extractor / drtv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 int_or_none,
8 float_or_none,
9 mimetype2ext,
10 parse_iso8601,
11 remove_end,
12 update_url_query,
13 )
14
15
16 class DRTVIE(InfoExtractor):
17 _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
18 IE_NAME = 'drtv'
19 _TESTS = [{
20 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
21 'md5': '25e659cccc9a2ed956110a299fdf5983',
22 'info_dict': {
23 'id': 'klassen-darlig-taber-10',
24 'ext': 'mp4',
25 'title': 'Klassen - Dårlig taber (10)',
26 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
27 'timestamp': 1471991907,
28 'upload_date': '20160823',
29 'duration': 606.84,
30 },
31 'params': {
32 'skip_download': True,
33 },
34 }, {
35 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
36 'md5': '2c37175c718155930f939ef59952474a',
37 'info_dict': {
38 'id': 'christiania-pusher-street-ryddes-drdkrjpo',
39 'ext': 'mp4',
40 'title': 'LIVE Christianias rydning af Pusher Street er i gang',
41 'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
42 'timestamp': 1472800279,
43 'upload_date': '20160902',
44 'duration': 131.4,
45 },
46 }]
47
48 def _real_extract(self, url):
49 video_id = self._match_id(url)
50
51 webpage = self._download_webpage(url, video_id)
52
53 if '>Programmet er ikke længere tilgængeligt' in webpage:
54 raise ExtractorError(
55 'Video %s is not available' % video_id, expected=True)
56
57 video_id = self._search_regex(
58 (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
59 r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
60 webpage, 'video id')
61
62 programcard = self._download_json(
63 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
64 video_id, 'Downloading video JSON')
65 data = programcard['Data'][0]
66
67 title = remove_end(self._og_search_title(
68 webpage, default=None), ' | TV | DR') or data['Title']
69 description = self._og_search_description(
70 webpage, default=None) or data.get('Description')
71
72 timestamp = parse_iso8601(data.get('CreatedTime'))
73
74 thumbnail = None
75 duration = None
76
77 restricted_to_denmark = False
78
79 formats = []
80 subtitles = {}
81
82 for asset in data['Assets']:
83 kind = asset.get('Kind')
84 if kind == 'Image':
85 thumbnail = asset.get('Uri')
86 elif kind in ('VideoResource', 'AudioResource'):
87 duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
88 restricted_to_denmark = asset.get('RestrictedToDenmark')
89 spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
90 for link in asset.get('Links', []):
91 uri = link.get('Uri')
92 if not uri:
93 continue
94 target = link.get('Target')
95 format_id = target or ''
96 preference = None
97 if spoken_subtitles:
98 preference = -1
99 format_id += '-spoken-subtitles'
100 if target == 'HDS':
101 f4m_formats = self._extract_f4m_formats(
102 uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
103 video_id, preference, f4m_id=format_id)
104 if kind == 'AudioResource':
105 for f in f4m_formats:
106 f['vcodec'] = 'none'
107 formats.extend(f4m_formats)
108 elif target == 'HLS':
109 formats.extend(self._extract_m3u8_formats(
110 uri, video_id, 'mp4', entry_protocol='m3u8_native',
111 preference=preference, m3u8_id=format_id))
112 else:
113 bitrate = link.get('Bitrate')
114 if bitrate:
115 format_id += '-%s' % bitrate
116 formats.append({
117 'url': uri,
118 'format_id': format_id,
119 'tbr': int_or_none(bitrate),
120 'ext': link.get('FileFormat'),
121 'vcodec': 'none' if kind == 'AudioResource' else None,
122 })
123 subtitles_list = asset.get('SubtitlesList')
124 if isinstance(subtitles_list, list):
125 LANGS = {
126 'Danish': 'da',
127 }
128 for subs in subtitles_list:
129 if not subs.get('Uri'):
130 continue
131 lang = subs.get('Language') or 'da'
132 subtitles.setdefault(LANGS.get(lang, lang), []).append({
133 'url': subs['Uri'],
134 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
135 })
136
137 if not formats and restricted_to_denmark:
138 self.raise_geo_restricted(
139 'Unfortunately, DR is not allowed to show this program outside Denmark.',
140 expected=True)
141
142 self._sort_formats(formats)
143
144 return {
145 'id': video_id,
146 'title': title,
147 'description': description,
148 'thumbnail': thumbnail,
149 'timestamp': timestamp,
150 'duration': duration,
151 'formats': formats,
152 'subtitles': subtitles,
153 }
154
155
156 class DRTVLiveIE(InfoExtractor):
157 IE_NAME = 'drtv:live'
158 _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
159 _TEST = {
160 'url': 'https://www.dr.dk/tv/live/dr1',
161 'info_dict': {
162 'id': 'dr1',
163 'ext': 'mp4',
164 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
165 },
166 'params': {
167 # m3u8 download
168 'skip_download': True,
169 },
170 }
171
172 def _real_extract(self, url):
173 channel_id = self._match_id(url)
174 channel_data = self._download_json(
175 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
176 channel_id)
177 title = self._live_title(channel_data['Title'])
178
179 formats = []
180 for streaming_server in channel_data.get('StreamingServers', []):
181 server = streaming_server.get('Server')
182 if not server:
183 continue
184 link_type = streaming_server.get('LinkType')
185 for quality in streaming_server.get('Qualities', []):
186 for stream in quality.get('Streams', []):
187 stream_path = stream.get('Stream')
188 if not stream_path:
189 continue
190 stream_url = update_url_query(
191 '%s/%s' % (server, stream_path), {'b': ''})
192 if link_type == 'HLS':
193 formats.extend(self._extract_m3u8_formats(
194 stream_url, channel_id, 'mp4',
195 m3u8_id=link_type, fatal=False, live=True))
196 elif link_type == 'HDS':
197 formats.extend(self._extract_f4m_formats(update_url_query(
198 '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
199 channel_id, f4m_id=link_type, fatal=False))
200 self._sort_formats(formats)
201
202 return {
203 'id': channel_id,
204 'title': title,
205 'thumbnail': channel_data.get('PrimaryImageUri'),
206 'formats': formats,
207 'is_live': True,
208 }