X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/15b1d10671b48df598afd70e17ba21e9e64ac766..04cba85eab6b6d4e03d869473bd2204a09046333:/youtube_dl/extractor/generic.py diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 328301d..fbbc79a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -17,12 +17,14 @@ from ..utils import ( ExtractorError, float_or_none, HEADRequest, + is_html, orderedSet, parse_xml, smuggle_url, unescapeHTML, unified_strdate, unsmuggle_url, + UnsupportedError, url_basename, ) from .brightcove import BrightcoveIE @@ -130,12 +132,26 @@ class GenericIE(InfoExtractor): # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', - 'md5': '5644c6ca5d5782c1d0d350dad9bd840c', + 'md5': '166dd577b433b4d4ebfee10b0824d8ff', 'info_dict': { 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', 'ext': 'mp4', 'title': '2cc213299525360.mov', # that's what we get }, + 'add_ie': ['Ooyala'], + }, + # multiple ooyala embeds on SBN network websites + { + 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', + 'info_dict': { + 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok', + 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com', + }, + 'playlist_mincount': 3, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], }, # google redirect { @@ -145,7 +161,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', - 'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.', + 'description': 're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, @@ -180,6 +196,14 @@ class GenericIE(InfoExtractor): 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', }, }, + # BBC iPlayer embeds + { + 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER', + 'info_dict': { + 'title': 'BBC - Blogs - Adam Curtis - BUGGER', + }, + 'playlist_mincount': 18, + }, # RUTV embed { 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html', @@ -351,7 +375,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', 'title': 'Zero Punctuation', - 'description': 're:' + 'description': 're:.*groundbreaking video review series.*' }, 'playlist_mincount': 11, }, @@ -467,8 +491,40 @@ class GenericIE(InfoExtractor): 'expected_warnings': [ 'URL could be a direct video link, returning it as such.' ] + }, + # Cinchcast embed + { + 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', + 'info_dict': { + 'id': '7141703', + 'ext': 'mp3', + 'upload_date': '20141126', + 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', + } + }, + # Cinerama player + { + 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm', + 'info_dict': { + 'id': '730m_DandD_1901_512k', + 'ext': 'mp4', + 'uploader': 'www.abc.net.au', + 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015', + } + }, + # embedded viddler video + { + 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597', + 'info_dict': { + 'id': '4d03aad9', + 'ext': 'mp4', + 'uploader': 'deadspin', + 'title': 'WALL-TO-GORTAT', + 'timestamp': 1422285291, + 'upload_date': '20150126', + }, + 'add_ie': ['Viddler'], } - ] def report_following_redirect(self, new_url): @@ -628,7 +684,7 @@ class GenericIE(InfoExtractor): # Maybe it's a direct link to a video? # Be careful not to download the whole thing! first_bytes = full_response.read(512) - if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')): + if not is_html(first_bytes): self._downloader.report_warning( 'URL could be a direct video link, returning it as such.') upload_date = unified_strdate( @@ -689,9 +745,9 @@ class GenericIE(InfoExtractor): r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') # Helper method - def _playlist_from_matches(matches, getter, ie=None): + def _playlist_from_matches(matches, getter=None, ie=None): urlrs = orderedSet( - self.url_result(self._proto_relative_url(getter(m)), ie) + self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) for m in matches) return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) @@ -830,12 +886,28 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url')) + # Look for embedded Viddler player + mobj = re.search( + r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for Ooyala videos - mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or - re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage)) + mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or + re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage) or + re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage)) if mobj is not None: return OoyalaIE._build_url_result(mobj.group('ec')) + # Look for multiple Ooyala embeds on SBN network websites + mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) + if mobj is not None: + embeds = self._parse_json(mobj.group(1), video_id, fatal=False) + if embeds: + return _playlist_from_matches( + embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') + # Look for Aparat videos mobj = re.search(r'