- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- if video_id is not None:
- all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
- info = all_info.find('video')
-
- return {
- 'id': video_id,
- 'title': info.find('headline').text,
- 'ext': 'flv',
- 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
- 'description': info.find('caption').text,
- 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
- }
- else:
- # "feature" and "nightly-news" pages use theplatform.com
- video_id = mobj.group('mpx_id')
- webpage = self._download_webpage(url, video_id)
-
- filter_param = 'byId'
- bootstrap_json = self._search_regex(
- [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
- r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
- webpage, 'bootstrap json', default=None)
- if bootstrap_json:
- bootstrap = self._parse_json(
- bootstrap_json, video_id, transform_source=unescapeHTML)
-
- info = None
- if 'results' in bootstrap:
- info = bootstrap['results'][0]['video']
- elif 'video' in bootstrap:
- info = bootstrap['video']
- elif 'msnbcVideoInfo' in bootstrap:
- info = bootstrap['msnbcVideoInfo']['meta']
- elif 'msnbcThePlatform' in bootstrap:
- info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
- else:
- info = bootstrap
-
- if 'guid' in info:
- video_id = info['guid']
- filter_param = 'byGuid'
- elif 'mpxId' in info:
- video_id = info['mpxId']
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- # http://feed.theplatform.com/f/2E2eJC/nbcnews also works
- 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
- 'ie_key': 'ThePlatformFeed',
- }
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*({.+});', webpage,
+ 'bootstrap json'), video_id, js_to_json)
+ video_data = try_get(data, lambda x: x['video']['current'], dict)
+ if not video_data:
+ video_data = data['article']['content'][0]['primaryMedia']['video']
+ title = video_data['headline']['primary']
+
+ formats = []
+ for va in video_data.get('videoAssets', []):
+ public_url = va.get('publicUrl')
+ if not public_url:
+ continue
+ if '://link.theplatform.com/' in public_url:
+ public_url = update_url_query(public_url, {'format': 'redirect'})
+ format_id = va.get('format')
+ if format_id == 'M3U':
+ formats.extend(self._extract_m3u8_formats(
+ public_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ continue
+ tbr = int_or_none(va.get('bitrate'), 1000)
+ if tbr:
+ format_id += '-%d' % tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': public_url,
+ 'width': int_or_none(va.get('width')),
+ 'height': int_or_none(va.get('height')),
+ 'tbr': tbr,
+ 'ext': 'mp4',
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ closed_captioning = video_data.get('closedCaptioning')
+ if closed_captioning:
+ for cc_url in closed_captioning.values():
+ if not cc_url:
+ continue
+ subtitles.setdefault('en', []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': try_get(video_data, lambda x: x['description']['primary']),
+ 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'timestamp': unified_timestamp(video_data.get('datePublished')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }