X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/a4f82408d9a3d921d9c2af7e4d757f44737dc7ea..8b4fae8ce16f284d2b7a5bb2ee099e9ecaf0c0d2:/youtube_dl/extractor/wdr.py diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 54d37da..a851578 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,14 +1,17 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_urlparse, - determine_ext, +) +from ..utils import ( unified_strdate, + qualities, ) @@ -25,10 +28,12 @@ class WDRIE(InfoExtractor): 'title': 'Servicezeit', 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', 'upload_date': '20140310', + 'is_live': False }, 'params': { 'skip_download': True, }, + 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', @@ -38,10 +43,12 @@ class WDRIE(InfoExtractor): 'title': 'Marga Spiegel ist tot', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20140311', + 'is_live': False }, 'params': { 'skip_download': True, }, + 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', @@ -52,6 +59,7 @@ class WDRIE(InfoExtractor): 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20091129', + 'is_live': False }, }, { @@ -63,8 +71,31 @@ class WDRIE(InfoExtractor): 'title': 'Flavia Coelho: Amar é Amar', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', 'upload_date': '20140717', + 'is_live': False }, + 'skip': 'Page Not Found', }, + { + 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', + 'playlist_mincount': 146, + 'info_dict': { + 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', + } + }, + { + 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html', + 'info_dict': { + 'id': 'mdb-103364', + 'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', + 'ext': 'flv', + 'upload_date': '20150101', + 'is_live': True + }, + 'params': { + 'skip_download': True, + }, + } ] def _real_extract(self, url): @@ -77,42 +108,107 @@ class WDRIE(InfoExtractor): if mobj.group('player') is None: entries = [ self.url_result(page_url + href, 'WDR') - for href in re.findall(r'\s*]*>\s*\s*]+href="([^"]+)"', + webpage, 'm3u8 url', default=None) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, page_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + + direct_urls = re.findall( + r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) + if direct_urls: + for quality, video_url in direct_urls: + formats.append({ + 'url': video_url, + 'preference': preference(quality), + 'http_headers': { + 'User-Agent': 'mobile', + }, + }) + + self._sort_formats(formats) description = self._html_search_meta('Description', webpage, 'description') return { 'id': page_id, - 'url': video_url, - 'ext': ext, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, + 'is_live': is_live } @@ -141,8 +237,9 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, - 'ext': determine_ext(url), - 'user_agent': 'mobile', + 'http_headers': { + 'User-Agent': 'mobile', + }, } @@ -171,8 +268,7 @@ class WDRMausIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) param_code = self._html_search_regex( @@ -223,5 +319,3 @@ class WDRMausIE(InfoExtractor): 'thumbnail': thumbnail, 'upload_date': upload_date, } - -# TODO test _1 \ No newline at end of file