+
+ # Is it an RSS feed?
+ try:
+ doc = parse_xml(webpage)
+ if doc.tag == 'rss':
+ return self._extract_rss(url, video_id, doc)
+ except compat_xml_parse_error:
+ pass
+
+ # Sometimes embedded video player is hidden behind percent encoding
+ # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+ # Unescaping the whole page allows to handle those cases in a generic way
+ webpage = compat_urllib_parse.unquote(webpage)
+
+ # it's tempting to parse this further, but you would
+ # have to take into account all the variations like
+ # Video Title - Site Name
+ # Site Name | Video Title
+ # Video Title - Tagline | Site Name
+ # and so on and so forth; it's just not practical
+ video_title = self._html_search_regex(
+ r'(?s)<title>(.*?)</title>', webpage, 'video title',
+ default='video')
+
+ # video uploader is domain name
+ video_uploader = self._search_regex(
+ r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
+
+ # Look for BrightCove:
+ bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
+ if bc_urls:
+ self.to_screen('Brightcove video detected.')
+ entries = [{
+ '_type': 'url',
+ 'url': smuggle_url(bc_url, {'Referer': url}),
+ 'ie_key': 'Brightcove'
+ } for bc_url in bc_urls]
+
+ return {
+ '_type': 'playlist',
+ 'title': video_title,
+ 'id': video_id,
+ 'entries': entries,
+ }
+
+ # Look for embedded (iframe) Vimeo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
+ if mobj:
+ player_url = unescapeHTML(mobj.group('url'))
+ surl = smuggle_url(player_url, {'Referer': url})
+ return self.url_result(surl, 'Vimeo')
+
+ # Look for embedded (swf embed) Vimeo player
+ mobj = re.search(
+ r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
+ if mobj:
+ return self.url_result(mobj.group(1), 'Vimeo')
+
+ # Look for embedded YouTube player
+ matches = re.findall(r'''(?x)
+ (?:<iframe[^>]+?src=|embedSWF\(\s*)
+ (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+ (?:embed|v)/.+?)
+ \1''', webpage)
+ if matches:
+ urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
+ for tuppl in matches]
+ return self.playlist_result(
+ urlrs, playlist_id=video_id, playlist_title=video_title)
+
+ # Look for embedded Dailymotion player
+ matches = re.findall(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
+ if matches:
+ urlrs = [self.url_result(unescapeHTML(tuppl[1]))
+ for tuppl in matches]
+ return self.playlist_result(
+ urlrs, playlist_id=video_id, playlist_title=video_title)
+
+ # Look for embedded Wistia player
+ match = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+ if match:
+ return {
+ '_type': 'url_transparent',
+ 'url': unescapeHTML(match.group('url')),
+ 'ie_key': 'Wistia',
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'id': video_id,
+ }
+
+ # Look for embedded blip.tv player
+ mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
+ if mobj:
+ return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
+ mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage)
+ if mobj:
+ return self.url_result(mobj.group(1), 'BlipTV')
+
+ # Look for embedded condenast player
+ matches = re.findall(
+ r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
+ webpage)
+ if matches:
+ return {
+ '_type': 'playlist',
+ 'entries': [{
+ '_type': 'url',
+ 'ie_key': 'CondeNast',
+ 'url': ma,
+ } for ma in matches],
+ 'title': video_title,
+ 'id': video_id,
+ }
+
+ # Look for Bandcamp pages with custom domain
+ mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
+ if mobj is not None:
+ burl = unescapeHTML(mobj.group(1))
+ # Don't set the extractor because it can be a track url or an album
+ return self.url_result(burl)
+
+ # Look for embedded Vevo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Ooyala videos
+ mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+ re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ if mobj is not None:
+ return OoyalaIE._build_url_result(mobj.group('ec'))
+
+ # Look for Aparat videos
+ mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Aparat')
+
+ # Look for MPORA videos
+ mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Mpora')
+
+ # Look for embedded NovaMov-based player
+ mobj = re.search(
+ r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
+ (?P<url>http://(?:(?:embed|www)\.)?
+ (?:novamov\.com|
+ nowvideo\.(?:ch|sx|eu|at|ag|co)|
+ videoweed\.(?:es|com)|
+ movshare\.(?:net|sx|ag)|
+ divxstage\.(?:eu|net|ch|co|at|ag))
+ /embed\.php.+?)\1''', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for embedded Facebook player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Facebook')
+
+ # Look for embedded VK player
+ mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'VK')
+
+ # Look for embedded Huffington Post player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'HuffPost')
+
+ # Look for embed.ly
+ mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+ mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
+ if mobj is not None:
+ return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
+
+ # Look for funnyordie embed
+ matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
+ if matches:
+ urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
+ for eurl in matches]
+ return self.playlist_result(
+ urlrs, playlist_id=video_id, playlist_title=video_title)
+
+ # Look for embedded RUTV player
+ rutv_url = RUTVIE._extract_url(webpage)
+ if rutv_url:
+ return self.url_result(rutv_url, 'RUTV')
+
+ # Look for embedded TED player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'TED')
+
+ # Look for embedded Ustream videos
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Ustream')
+
+ # Look for embedded arte.tv player
+ mobj = re.search(
+ r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+
+ # Look for embedded smotri.com player
+ smotri_url = SmotriIE._extract_url(webpage)
+ if smotri_url:
+ return self.url_result(smotri_url, 'Smotri')
+
+ # Look for embeded soundcloud player
+ mobj = re.search(
+ r'<iframe src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
+ webpage)
+ if mobj is not None:
+ url = unescapeHTML(mobj.group('url'))
+ return self.url_result(url)
+