- video_id = mobj.group('id')
- webpage = self._download_webpage(url, video_id)
-
- items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
- webpage, u'items', flags=re.MULTILINE)
- items = json.loads(items_json)
- info = items['mediaItems']['query']['results']['mediaObj'][0]
- # The 'meta' field is not always in the video webpage, we request it
- # from another page
- long_id = info['id']
- query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
- ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id)
- data = compat_urllib_parse.urlencode({
- 'q': query,
- 'env': 'prod',
- 'format': 'json',
- })
- query_result_json = self._download_webpage(
- 'http://video.query.yahoo.com/v1/public/yql?' + data,
- video_id, u'Downloading video info')
- query_result = json.loads(query_result_json)
- info = query_result['query']['results']['mediaObj'][0]
- meta = info['meta']
+ display_id = mobj.group('display_id') or self._match_id(url)
+ page_id = mobj.group('id')
+ url = mobj.group('url')
+ host = mobj.group('host')
+ webpage = self._download_webpage(url, display_id)
+
+ # Look for iframed media first
+ iframe_m = re.search(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
+ if iframe_m:
+ iframepage = self._download_webpage(
+ host + iframe_m.group(1), display_id, 'Downloading iframe webpage')
+ items_json = self._search_regex(
+ r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None)
+ if items_json:
+ items = json.loads(items_json)
+ video_id = items[0]['id']
+ return self._get_info(video_id, display_id, webpage)
+ # Look for NBCSports iframes
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
+ # Query result is often embedded in webpage as JSON. Sometimes explicit requests
+ # to video API results in a failure with geo restriction reason therefore using
+ # embedded query result when present sounds reasonable.
+ config_json = self._search_regex(
+ r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
+ webpage, 'videoplayer applet', default=None)
+ if config_json:
+ config = self._parse_json(config_json, display_id, fatal=False)
+ if config:
+ sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
+ if sapi:
+ return self._extract_info(display_id, sapi, webpage)
+
+ items_json = self._search_regex(
+ r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
+ default=None)
+ if items_json is None:
+ CONTENT_ID_REGEXES = [
+ r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
+ r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
+ r'"first_videoid"\s*:\s*"([^"]+)"',
+ r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
+ ]
+ video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
+ else:
+ items = json.loads(items_json)
+ info = items['mediaItems']['query']['results']['mediaObj'][0]
+ # The 'meta' field is not always in the video webpage, we request it
+ # from another page
+ video_id = info['id']
+ return self._get_info(video_id, display_id, webpage)
+
+ def _extract_info(self, display_id, query, webpage):
+ info = query['query']['results']['mediaObj'][0]
+ meta = info.get('meta')
+ video_id = info.get('id')
+
+ if not meta:
+ msg = info['status'].get('msg')
+ if msg:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, msg), expected=True)
+ raise ExtractorError('Unable to extract media object meta')