-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
class ThePlatformBaseIE(OnceIE):
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
- meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
+ meta = self._download_xml(
+ smil_url, video_id, note=note, query={'format': 'SMIL'},
+ headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
if error_element is not None and error_element.attrib['src'].startswith(
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
'url': src,
})
+ duration = info.get('duration')
+ tp_chapters = info.get('chapters', [])
+ chapters = []
+ if tp_chapters:
+ def _add_chapter(start_time, end_time):
+ start_time = float_or_none(start_time, 1000)
+ end_time = float_or_none(end_time, 1000)
+ if start_time is None or end_time is None:
+ return
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ })
+
+ for chapter in tp_chapters[:-1]:
+ _add_chapter(chapter.get('startTime'), chapter.get('endTime'))
+ _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
+
return {
'title': info['title'],
'subtitles': subtitles,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
- 'duration': int_or_none(info.get('duration'), 1000),
+ 'duration': float_or_none(duration, 1000),
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
'uploader': info.get('billingCode'),
+ 'chapters': chapters,
}
def _extract_theplatform_metadata(self, path, video_id):
class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
- (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+ (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|theplatform:)(?P<id>[^/\?&]+)'''
_TESTS = [{
# rtmp download
'skip_download': True,
},
+ 'skip': '404 Not Found',
}, {
# from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
'title': 'iPhone Siri’s sassy response to a math question has people talking',
'description': 'md5:a565d1deadd5086f3331d57298ec6333',
'duration': 83.0,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1435752600,
'upload_date': '20150701',
'uploader': 'NBCU-NEWS',
if m:
return [m.group('url')]
+ # Are whitesapces ignored in URLs?
+ # https://github.com/rg3/youtube-dl/issues/12044
matches = re.findall(
- r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
+ r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
if matches:
- return list(zip(*matches))[1]
+ return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
def hex_to_bytes(hex):
return binascii.a2b_hex(hex.encode('ascii'))
- relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
+ relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
'ext': 'mp4',
'title': 'The Biden factor: will Joe run in 2016?',
'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140208',
'timestamp': 1391824260,
'duration': 467.0,
},
}]
- def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
+ def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
entry = self._download_json(real_url, video_id)['entries'][0]
+ main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
formats = []
subtitles = {}
if asset_type in asset_types_query:
query.update(asset_types_query[asset_type])
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
- smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+ main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
formats.extend(cur_formats)
subtitles = self._merge_subtitles(subtitles, cur_subtitles)