X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/d4ff594119bc679aa175947eb59a97bee8f966f4..3e696c728febc53e74c912c40ccd3b9504d536df:/youtube_dl/extractor/srmediathek.py diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py index 5d583c7..b03272f 100644 --- a/youtube_dl/extractor/srmediathek.py +++ b/youtube_dl/extractor/srmediathek.py @@ -1,17 +1,19 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals -import json +from .ard import ARDMediathekIE +from ..utils import ( + ExtractorError, + get_element_by_attribute, +) -from .common import InfoExtractor -from ..utils import js_to_json - -class SRMediathekIE(InfoExtractor): +class SRMediathekIE(ARDMediathekIE): + IE_NAME = 'sr:mediathek' IE_DESC = 'Saarländischer Rundfunk' - _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P[0-9]+)' + _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455', 'info_dict': { 'id': '28455', @@ -20,24 +22,38 @@ class SRMediathekIE(InfoExtractor): 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ', 'thumbnail': 're:^https?://.*\.jpg$', }, - } + 'skip': 'no longer available', + }, { + 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682', + 'info_dict': { + 'id': '37682', + 'ext': 'mp4', + 'title': 'Love, Cakes and Rock\'n\'Roll', + 'description': 'md5:18bf9763631c7d326c22603681e1123d', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - murls = json.loads(js_to_json(self._search_regex( - r'var mediaURLs\s*=\s*(.*?);\n', webpage, 'video URLs'))) - formats = [{'url': murl} for murl in murls] - self._sort_formats(formats) - - title = json.loads(js_to_json(self._search_regex( - r'var mediaTitles\s*=\s*(.*?);\n', webpage, 'title')))[0] + if '>Der gewünschte Beitrag ist leider nicht mehr verfügbar.<' in webpage: + raise ExtractorError('Video %s is no longer available' % video_id, expected=True) - return { + media_collection_url = self._search_regex( + r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url') + info = self._extract_media_info(media_collection_url, webpage, video_id) + info.update({ 'id': video_id, - 'title': title, - 'formats': formats, + 'title': get_element_by_attribute('class', 'ardplayer-title', webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), - } + }) + return info