X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/5ea61d104de9ca8a7d19d63d83173eb7391081d2..d2632ebbe0759622d4ab7aff134421194974b394:/youtube_dl/extractor/drtv.py diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index baa24c6..e966d74 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -4,26 +4,46 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, + float_or_none, + mimetype2ext, parse_iso8601, + remove_end, + update_url_query, ) class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' - - _TEST = { - 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', - 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + IE_NAME = 'drtv' + _TESTS = [{ + 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', + 'md5': '25e659cccc9a2ed956110a299fdf5983', 'info_dict': { - 'id': 'panisk-paske-5', + 'id': 'klassen-darlig-taber-10', 'ext': 'mp4', - 'title': 'Panisk Påske (5)', - 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', - 'timestamp': 1426984612, - 'upload_date': '20150322', - 'duration': 1455, + 'title': 'Klassen - Dårlig taber (10)', + 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', + 'timestamp': 1471991907, + 'upload_date': '20160823', + 'duration': 606.84, }, - } + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', + 'md5': '2c37175c718155930f939ef59952474a', + 'info_dict': { + 'id': 'christiania-pusher-street-ryddes-drdkrjpo', + 'ext': 'mp4', + 'title': 'LIVE Christianias rydning af Pusher Street er i gang', + 'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.', + 'timestamp': 1472800279, + 'upload_date': '20160902', + 'duration': 131.4, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -35,7 +55,8 @@ class DRTVIE(InfoExtractor): 'Video %s is not available' % video_id, expected=True) video_id = self._search_regex( - r'data-(?:material-identifier|episode-slug)="([^"]+)"', + (r'data-(?:material-identifier|episode-slug)="([^"]+)"', + r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), webpage, 'video id') programcard = self._download_json( @@ -43,9 +64,12 @@ class DRTVIE(InfoExtractor): video_id, 'Downloading video JSON') data = programcard['Data'][0] - title = data['Title'] - description = data['Description'] - timestamp = parse_iso8601(data['CreatedTime']) + title = remove_end(self._og_search_title( + webpage, default=None), ' | TV | DR') or data['Title'] + description = self._og_search_description( + webpage, default=None) or data.get('Description') + + timestamp = parse_iso8601(data.get('CreatedTime')) thumbnail = None duration = None @@ -56,28 +80,35 @@ class DRTVIE(InfoExtractor): subtitles = {} for asset in data['Assets']: - if asset['Kind'] == 'Image': - thumbnail = asset['Uri'] - elif asset['Kind'] == 'VideoResource': - duration = asset['DurationInMilliseconds'] / 1000.0 - restricted_to_denmark = asset['RestrictedToDenmark'] - spoken_subtitles = asset['Target'] == 'SpokenSubtitles' - for link in asset['Links']: - uri = link['Uri'] - target = link['Target'] - format_id = target + kind = asset.get('Kind') + if kind == 'Image': + thumbnail = asset.get('Uri') + elif kind in ('VideoResource', 'AudioResource'): + duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) + restricted_to_denmark = asset.get('RestrictedToDenmark') + spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' + for link in asset.get('Links', []): + uri = link.get('Uri') + if not uri: + continue + target = link.get('Target') + format_id = target or '' preference = None if spoken_subtitles: preference = -1 format_id += '-spoken-subtitles' if target == 'HDS': - formats.extend(self._extract_f4m_formats( + f4m_formats = self._extract_f4m_formats( uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', - video_id, preference, f4m_id=format_id)) + video_id, preference, f4m_id=format_id) + if kind == 'AudioResource': + for f in f4m_formats: + f['vcodec'] = 'none' + formats.extend(f4m_formats) elif target == 'HLS': formats.extend(self._extract_m3u8_formats( - uri, video_id, 'mp4', preference=preference, - m3u8_id=format_id)) + uri, video_id, 'mp4', entry_protocol='m3u8_native', + preference=preference, m3u8_id=format_id)) else: bitrate = link.get('Bitrate') if bitrate: @@ -85,21 +116,28 @@ class DRTVIE(InfoExtractor): formats.append({ 'url': uri, 'format_id': format_id, - 'tbr': bitrate, + 'tbr': int_or_none(bitrate), 'ext': link.get('FileFormat'), + 'vcodec': 'none' if kind == 'AudioResource' else None, }) subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): LANGS = { - 'Danish': 'dk', + 'Danish': 'da', } for subs in subtitles_list: - lang = subs['Language'] - subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}] + if not subs.get('Uri'): + continue + lang = subs.get('Language') or 'da' + subtitles.setdefault(LANGS.get(lang, lang), []).append({ + 'url': subs['Uri'], + 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' + }) if not formats and restricted_to_denmark: - raise ExtractorError( - 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) + self.raise_geo_restricted( + 'Unfortunately, DR is not allowed to show this program outside Denmark.', + expected=True) self._sort_formats(formats) @@ -113,3 +151,58 @@ class DRTVIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class DRTVLiveIE(InfoExtractor): + IE_NAME = 'drtv:live' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P[\da-z-]+)' + _TEST = { + 'url': 'https://www.dr.dk/tv/live/dr1', + 'info_dict': { + 'id': 'dr1', + 'ext': 'mp4', + 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + channel_data = self._download_json( + 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id, + channel_id) + title = self._live_title(channel_data['Title']) + + formats = [] + for streaming_server in channel_data.get('StreamingServers', []): + server = streaming_server.get('Server') + if not server: + continue + link_type = streaming_server.get('LinkType') + for quality in streaming_server.get('Qualities', []): + for stream in quality.get('Streams', []): + stream_path = stream.get('Stream') + if not stream_path: + continue + stream_url = update_url_query( + '%s/%s' % (server, stream_path), {'b': ''}) + if link_type == 'HLS': + formats.extend(self._extract_m3u8_formats( + stream_url, channel_id, 'mp4', + m3u8_id=link_type, fatal=False, live=True)) + elif link_type == 'HDS': + formats.extend(self._extract_f4m_formats(update_url_query( + '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}), + channel_id, f4m_id=link_type, fatal=False)) + self._sort_formats(formats) + + return { + 'id': channel_id, + 'title': title, + 'thumbnail': channel_data.get('PrimaryImageUri'), + 'formats': formats, + 'is_live': True, + }