- video_id = mobj.group('id')
- webpage = self._download_webpage(url, video_id)
-
- items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
- webpage, u'items', flags=re.MULTILINE)
- items = json.loads(items_json)
- info = items['mediaItems']['query']['results']['mediaObj'][0]
- # The 'meta' field is not always in the video webpage, we request it
- # from another page
- long_id = info['id']
- query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
- ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id)
- data = compat_urllib_parse.urlencode({
- 'q': query,
- 'env': 'prod',
- 'format': 'json',
- })
- query_result_json = self._download_webpage(
- 'http://video.query.yahoo.com/v1/public/yql?' + data,
- video_id, u'Downloading video info')
- query_result = json.loads(query_result_json)
- info = query_result['query']['results']['mediaObj'][0]
- meta = info['meta']
+ page_id = mobj.group('id')
+ display_id = mobj.group('display_id') or page_id
+ host = mobj.group('host')
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+ if 'err=404' in urlh.geturl():
+ raise ExtractorError('Video gone', expected=True)
+
+ # Look for iframed media first
+ entries = []
+ iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
+ for idx, iframe_url in enumerate(iframe_urls):
+ entries.append(self.url_result(host + iframe_url, 'Yahoo'))
+ if entries:
+ return self.playlist_result(entries, page_id)
+
+ # Look for NBCSports iframes
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key())
+
+ # Look for Brightcove Legacy Studio embeds
+ bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
+
+ def brightcove_url_result(bc_url):
+ return self.url_result(
+ smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}),
+ BrightcoveNewIE.ie_key())
+
+ # Look for Brightcove New Studio embeds
+ bc_url = BrightcoveNewIE._extract_url(self, webpage)
+ if bc_url:
+ return brightcove_url_result(bc_url)
+
+ brightcove_iframe = self._search_regex(
+ r'(<iframe[^>]+data-video-id=["\']\d+[^>]+>)', webpage,
+ 'brightcove iframe', default=None)
+ if brightcove_iframe:
+ attr = extract_attributes(brightcove_iframe)
+ src = attr.get('src')
+ if src:
+ parsed_src = compat_urlparse.urlparse(src)
+ qs = compat_urlparse.parse_qs(parsed_src.query)
+ account_id = qs.get('accountId', ['2376984109001'])[0]
+ brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0]
+ if account_id and brightcove_id:
+ return brightcove_url_result(
+ 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+ % (account_id, brightcove_id))
+
+ # Query result is often embedded in webpage as JSON. Sometimes explicit requests
+ # to video API results in a failure with geo restriction reason therefore using
+ # embedded query result when present sounds reasonable.
+ config_json = self._search_regex(
+ r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
+ webpage, 'videoplayer applet', default=None)
+ if config_json:
+ config = self._parse_json(config_json, display_id, fatal=False)
+ if config:
+ sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
+ if sapi and 'query' in sapi:
+ info = self._extract_info(display_id, sapi, webpage)
+ self._sort_formats(info['formats'])
+ return info
+
+ items_json = self._search_regex(
+ r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
+ default=None)
+ if items_json is None:
+ alias = self._search_regex(
+ r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None)
+ if alias is not None:
+ alias_info = self._download_json(
+ 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias,
+ display_id, 'Downloading alias info')
+ video_id = alias_info[0]['id']
+ else:
+ CONTENT_ID_REGEXES = [
+ r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
+ r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
+ r'"first_videoid"\s*:\s*"([^"]+)"',
+ r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
+ r'<article[^>]data-uuid=["\']([^"\']+)',
+ r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
+ r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
+ ]
+ video_id = self._search_regex(
+ CONTENT_ID_REGEXES, webpage, 'content ID')
+ else:
+ items = json.loads(items_json)
+ info = items['mediaItems']['query']['results']['mediaObj'][0]
+ # The 'meta' field is not always in the video webpage, we request it
+ # from another page
+ video_id = info['id']
+ return self._get_info(video_id, display_id, webpage)
+
+ def _extract_info(self, display_id, query, webpage):
+ info = query['query']['results']['mediaObj'][0]
+ meta = info.get('meta')
+ video_id = info.get('id')
+
+ if not meta:
+ msg = info['status'].get('msg')
+ if msg:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, msg), expected=True)
+ raise ExtractorError('Unable to extract media object meta')