- mobj = re.match(self._VALID_URL, url)
- page_id = mobj.group('id')
- display_id = mobj.group('display_id') or page_id
- host = mobj.group('host')
- webpage, urlh = self._download_webpage_handle(url, display_id)
- if 'err=404' in urlh.geturl():
- raise ExtractorError('Video gone', expected=True)
-
- # Look for iframed media first
- entries = []
- iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
- for idx, iframe_url in enumerate(iframe_urls):
- entries.append(self.url_result(host + iframe_url, 'Yahoo'))
- if entries:
- return self.playlist_result(entries, page_id)
-
- # Look for NBCSports iframes
- nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
- if nbc_sports_url:
- return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key())
-
- # Look for Brightcove Legacy Studio embeds
- bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
- if bc_url:
- return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
-
- def brightcove_url_result(bc_url):
- return self.url_result(
- smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}),
- BrightcoveNewIE.ie_key())
-
- # Look for Brightcove New Studio embeds
- bc_url = BrightcoveNewIE._extract_url(self, webpage)
- if bc_url:
- return brightcove_url_result(bc_url)
-
- brightcove_iframe = self._search_regex(
- r'(<iframe[^>]+data-video-id=["\']\d+[^>]+>)', webpage,
- 'brightcove iframe', default=None)
- if brightcove_iframe:
- attr = extract_attributes(brightcove_iframe)
- src = attr.get('src')
- if src:
- parsed_src = compat_urlparse.urlparse(src)
- qs = compat_urlparse.parse_qs(parsed_src.query)
- account_id = qs.get('accountId', ['2376984109001'])[0]
- brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0]
- if account_id and brightcove_id:
- return brightcove_url_result(
- 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
- % (account_id, brightcove_id))
-
- # Query result is often embedded in webpage as JSON. Sometimes explicit requests
- # to video API results in a failure with geo restriction reason therefore using
- # embedded query result when present sounds reasonable.
- config_json = self._search_regex(
- r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
- webpage, 'videoplayer applet', default=None)
- if config_json:
- config = self._parse_json(config_json, display_id, fatal=False)
- if config:
- sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
- if sapi and 'query' in sapi:
- info = self._extract_info(display_id, sapi, webpage)
- self._sort_formats(info['formats'])
- return info
-
- items_json = self._search_regex(
- r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
- default=None)
- if items_json is None:
- alias = self._search_regex(
- r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None)
- if alias is not None:
- alias_info = self._download_json(
- 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias,
- display_id, 'Downloading alias info')
- video_id = alias_info[0]['id']
- else:
- CONTENT_ID_REGEXES = [
- r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
- r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
- r'"first_videoid"\s*:\s*"([^"]+)"',
- r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
- r'<article[^>]data-uuid=["\']([^"\']+)',
- r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
- r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
- ]
- video_id = self._search_regex(
- CONTENT_ID_REGEXES, webpage, 'content ID')