X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/d9d7cd0e85dc712461d9185db9df9d6c900a573b..233624c1db781ee7dabbaf88453cf18e248dd20d:/youtube_dl/extractor/pbs.py diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 16cc667..80340f5 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, determine_ext, @@ -14,6 +15,7 @@ from ..utils import ( strip_jsonp, strip_or_none, unified_strdate, + url_or_none, US_RATINGS, ) @@ -187,9 +189,9 @@ class PBSIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?: # Direct video URL - (?:%s)/(?:viralplayer|video)/(?P[0-9]+)/? | + (?:%s)/(?:(?:vir|port)alplayer|video)/(?P[0-9]+)(?:[?/]|$) | # Article with embedded player (or direct video) - (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | + (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P[^/]+)/ ) @@ -345,6 +347,65 @@ class PBSIE(InfoExtractor): 'formats': 'mincount:8', }, }, + { + # https://github.com/rg3/youtube-dl/issues/13801 + 'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/', + 'info_dict': { + 'id': '3003333873', + 'ext': 'mp4', + 'title': 'PBS NewsHour - full episode July 31, 2017', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'duration': 3265, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/', + 'info_dict': { + 'id': '2365936247', + 'ext': 'mp4', + 'title': 'Antiques Roadshow - Indianapolis, Hour 2', + 'description': 'md5:524b32249db55663e7231b6b8d1671a2', + 'duration': 3180, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, + { + 'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/', + 'info_dict': { + 'id': '3007193718', + 'ext': 'mp4', + 'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster", + 'description': 'md5:37efbac85e0c09b009586523ec143652', + 'duration': 6292, + 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, + { + 'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/', + 'info_dict': { + 'id': '3011407934', + 'ext': 'mp4', + 'title': 'Stories from the Stage - Road Trip', + 'duration': 1619, + 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, { 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', 'only_matching': True, @@ -352,6 +413,10 @@ class PBSIE(InfoExtractor): { 'url': 'http://watch.knpb.org/video/2365616055/', 'only_matching': True, + }, + { + 'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=', + 'only_matching': True, } ] _ERRORS = { @@ -402,6 +467,9 @@ class PBSIE(InfoExtractor): r'class="coveplayerid">([^<]+)<', # coveplayer r']+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/ r'', # jwplayer + r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',", + r']+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/ + r']+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/ ] media_id = self._search_regex( @@ -433,7 +501,11 @@ class PBSIE(InfoExtractor): if url: break - mobj = re.match(self._VALID_URL, url) + if not url: + url = self._og_search_url(webpage) + + mobj = re.match( + self._VALID_URL, self._proto_relative_url(url.strip())) player_id = mobj.group('player_id') if not display_id: @@ -443,13 +515,27 @@ class PBSIE(InfoExtractor): url, display_id, note='Downloading player page', errnote='Could not download player page') video_id = self._search_regex( - r'