ExtractorError,
float_or_none,
HEADRequest,
+ is_html,
orderedSet,
parse_xml,
smuggle_url,
unescapeHTML,
unified_strdate,
unsmuggle_url,
+ UnsupportedError,
url_basename,
)
from .brightcove import BrightcoveIE
# ooyala video
{
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
- 'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
+ 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
'info_dict': {
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get
},
+ 'add_ie': ['Ooyala'],
+ },
+ # multiple ooyala embeds on SBN network websites
+ {
+ 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+ 'info_dict': {
+ 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+ 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
+ },
+ 'playlist_mincount': 3,
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
},
# google redirect
{
'ext': 'mp4',
'upload_date': '20130224',
'uploader_id': 'TheVerge',
- 'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
+ 'description': 're:^Chris Ziegler takes a look at the\.*',
'uploader': 'The Verge',
'title': 'First Firefox OS phones side-by-side',
},
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
},
},
+ # BBC iPlayer embeds
+ {
+ 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
+ 'info_dict': {
+ 'title': 'BBC - Blogs - Adam Curtis - BUGGER',
+ },
+ 'playlist_mincount': 18,
+ },
# RUTV embed
{
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
'info_dict': {
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
'title': 'Zero Punctuation',
- 'description': 're:'
+ 'description': 're:.*groundbreaking video review series.*'
},
'playlist_mincount': 11,
},
'expected_warnings': [
'URL could be a direct video link, returning it as such.'
]
+ },
+ # Cinchcast embed
+ {
+ 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
+ 'info_dict': {
+ 'id': '7141703',
+ 'ext': 'mp3',
+ 'upload_date': '20141126',
+ 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
+ }
+ },
+ # Cinerama player
+ {
+ 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
+ 'info_dict': {
+ 'id': '730m_DandD_1901_512k',
+ 'ext': 'mp4',
+ 'uploader': 'www.abc.net.au',
+ 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
+ }
+ },
+ # embedded viddler video
+ {
+ 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
+ 'info_dict': {
+ 'id': '4d03aad9',
+ 'ext': 'mp4',
+ 'uploader': 'deadspin',
+ 'title': 'WALL-TO-GORTAT',
+ 'timestamp': 1422285291,
+ 'upload_date': '20150126',
+ },
+ 'add_ie': ['Viddler'],
}
-
]
def report_following_redirect(self, new_url):
# Maybe it's a direct link to a video?
# Be careful not to download the whole thing!
first_bytes = full_response.read(512)
- if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
+ if not is_html(first_bytes):
self._downloader.report_warning(
'URL could be a direct video link, returning it as such.')
upload_date = unified_strdate(
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Helper method
- def _playlist_from_matches(matches, getter, ie=None):
+ def _playlist_from_matches(matches, getter=None, ie=None):
urlrs = orderedSet(
- self.url_result(self._proto_relative_url(getter(m)), ie)
+ self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches)
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
if mobj is not None:
return self.url_result(mobj.group('url'))
+ # Look for embedded Viddler player
+ mobj = re.search(
+ r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
# Look for Ooyala videos
- mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
- re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+ re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
+ re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
if mobj is not None:
return OoyalaIE._build_url_result(mobj.group('ec'))
+ # Look for multiple Ooyala embeds on SBN network websites
+ mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
+ if mobj is not None:
+ embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
+ if embeds:
+ return _playlist_from_matches(
+ embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
+
# Look for Aparat videos
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
if mobj is not None:
return _playlist_from_matches(
matches, getter=unescapeHTML, ie='FunnyOrDie')
+ # Look for BBC iPlayer embed
+ matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
+ if matches:
+ return _playlist_from_matches(matches, ie='BBCCoUk')
+
# Look for embedded RUTV player
rutv_url = RUTVIE._extract_url(webpage)
if rutv_url:
# Look for embedded TED player
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'TED')
if mobj is not None:
return self.url_result(mobj.group('url'), 'SBS')
+ # Look for embedded Cinchcast player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Cinchcast')
+
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
webpage)
found = filter_video(re.findall(r'''(?xs)
flowplayer\("[^"]+",\s*
\{[^}]+?\}\s*,
- \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
+ \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
["']?url["']?\s*:\s*["']([^"']+)["']
''', webpage))
+ if not found:
+ # Cinerama player
+ found = re.findall(
+ r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
if not found:
# Try to find twitter cards info
found = filter_video(re.findall(
'url': new_url,
}
if not found:
- raise ExtractorError('Unsupported URL: %s' % url)
+ raise UnsupportedError(url)
entries = []
for video_url in found: