from ..utils import (
determine_ext,
ExtractorError,
- xpath_with_ns,
- unsmuggle_url,
- int_or_none,
- url_basename,
float_or_none,
+ int_or_none,
+ sanitized_Request,
+ unsmuggle_url,
+ xpath_with_ns,
+ mimetype2ext,
)
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
for caption in captions:
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
subtitles[lang] = [{
- 'ext': 'srt' if mime == 'text/srt' else 'ttml',
+ 'ext': mimetype2ext(mime),
'url': src,
}]
class ThePlatformIE(ThePlatformBaseIE):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
- (?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+ (?:(?P<media>(?:(?:[^/]+/)+select/)?media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|theplatform:)(?P<id>[^/\?&]+)'''
_TESTS = [{
'upload_date': '20150701',
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
},
+ }, {
+ # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
+ # geo-restricted (US), HLS encrypted with AES-128
+ 'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
+ 'only_matching': True,
}]
@staticmethod
# Seems there's no pattern for the interested script filename, so
# I try one by one
for script in reversed(scripts):
- feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
- feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
+ feed_script = self._download_webpage(
+ self._proto_relative_url(script, 'http:'),
+ video_id, 'Downloading feed script')
+ feed_id = self._search_regex(
+ r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
+ 'default feed id', default=None)
if feed_id is not None:
break
if feed_id is None:
if smuggled_data.get('force_smil_url', False):
smil_url = url
+ # Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
+ elif '/guid/' in url:
+ headers = {}
+ source_url = smuggled_data.get('source_url')
+ if source_url:
+ headers['Referer'] = source_url
+ request = sanitized_Request(url, headers=headers)
+ webpage = self._download_webpage(request, video_id)
+ smil_url = self._search_regex(
+ r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
+ webpage, 'smil url', group='url')
+ path = self._search_regex(
+ r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
+ smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
elif mobj.group('config'):
config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/')
first_video_id = None
duration = None
for item in entry['media$content']:
- smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
- cur_video_id = url_basename(smil_url)
+ smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
+ cur_video_id = ThePlatformIE._match_id(smil_url)
if first_video_id is None:
first_video_id = cur_video_id
duration = float_or_none(item.get('plfile$duration'))