import re
from .common import InfoExtractor
+from ..utils import (
+ US_RATINGS,
+)
class PBSIE(InfoExtractor):
# Article with embedded player
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
# Player
- video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
+ video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
'''
},
}
- def _real_extract(self, url):
+ def _extract_ids(self, url):
mobj = re.match(self._VALID_URL, url)
presumptive_id = mobj.group('presumptive_id')
display_id = presumptive_id
if presumptive_id:
webpage = self._download_webpage(url, display_id)
+
+ # frontline video embed
+ media_id = self._search_regex(
+ r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",
+ webpage, 'frontline video ID', fatal=False, default=None)
+ if media_id:
+ return media_id, presumptive_id
+
url = self._search_regex(
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
webpage, 'player URL')
video_id = mobj.group('id')
display_id = video_id
+ return video_id, display_id
+
+ def _real_extract(self, url):
+ video_id, display_id = self._extract_ids(url)
+
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info = self._download_json(info_url, display_id)
+ rating_str = info.get('rating')
+ if rating_str is not None:
+ rating_str = rating_str.rpartition('-')[2]
+ age_limit = US_RATINGS.get(rating_str)
+
return {
'id': video_id,
'title': info['title'],
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
'duration': info.get('duration'),
+ 'age_limit': age_limit,
}