+# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ determine_ext,
+ int_or_none,
unified_strdate,
US_RATINGS,
)
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
'duration': 3190,
},
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
},
{
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
'duration': 5050,
},
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ }
},
{
'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
'duration': 6559,
'thumbnail': 're:^https?://.*\.jpg$',
- }
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
},
{
'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
'duration': 3172,
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20140122',
- }
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
},
{
'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
'id': 'united-states-of-secrets',
},
'playlist_count': 2,
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
+ 'info_dict': {
+ 'id': '2280706814',
+ 'display_id': 'player',
+ 'ext': 'mp4',
+ 'title': 'Death and the Civil War',
+ 'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
+ 'duration': 6705,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
}
]
return media_id, presumptive_id, upload_date
url = self._search_regex(
- r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
+ r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
webpage, 'player URL')
mobj = re.match(self._VALID_URL, url)
for vid_id in video_id]
return self.playlist_result(entries, display_id)
- info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
- info = self._download_json(info_url, display_id)
-
- redirect_url = info['alternate_encoding']['url']
- redirect_info = self._download_json(
- redirect_url + '?format=json', display_id,
- 'Downloading video url info')
- if redirect_info['status'] == 'error':
- if redirect_info['http_code'] == 403:
- message = (
- 'The video is not available in your region due to '
- 'right restrictions')
+ info = self._download_json(
+ 'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
+ display_id)
+
+ formats = []
+ for encoding_name in ('recommended_encoding', 'alternate_encoding'):
+ redirect = info.get(encoding_name)
+ if not redirect:
+ continue
+ redirect_url = redirect.get('url')
+ if not redirect_url:
+ continue
+
+ redirect_info = self._download_json(
+ redirect_url + '?format=json', display_id,
+ 'Downloading %s video url info' % encoding_name)
+
+ if redirect_info['status'] == 'error':
+ if redirect_info['http_code'] == 403:
+ message = (
+ 'The video is not available in your region due to '
+ 'right restrictions')
+ else:
+ message = redirect_info['message']
+ raise ExtractorError(message, expected=True)
+
+ format_url = redirect_info.get('url')
+ if not format_url:
+ continue
+
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
else:
- message = redirect_info['message']
- raise ExtractorError(message, expected=True)
+ formats.append({
+ 'url': format_url,
+ 'format_id': redirect.get('eeid'),
+ })
+ self._sort_formats(formats)
rating_str = info.get('rating')
if rating_str is not None:
rating_str = rating_str.rpartition('-')[2]
age_limit = US_RATINGS.get(rating_str)
+ subtitles = {}
+ closed_captions_url = info.get('closed_captions_url')
+ if closed_captions_url:
+ subtitles['en'] = [{
+ 'ext': 'ttml',
+ 'url': closed_captions_url,
+ }]
+
return {
'id': video_id,
'display_id': display_id,
'title': info['title'],
- 'url': redirect_info['url'],
- 'ext': 'mp4',
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
- 'duration': info.get('duration'),
+ 'duration': int_or_none(info.get('duration')),
'age_limit': age_limit,
'upload_date': upload_date,
+ 'formats': formats,
+ 'subtitles': subtitles,
}