X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/3712eb2135b6beeff3820f65bdfc6173fd1dbbab..ccd42797f06db6131ae8e119ac7529ce4ac7aaff:/youtube_dl/extractor/generic.py?ds=sidebyside diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 067de28..d1725d9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -89,7 +89,10 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE -from .openload import OpenloadIE +from .openload import ( + OpenloadIE, + VerystreamIE, +) from .videopress import VideoPressIE from .rutube import RutubeIE from .limelight import LimelightBaseIE @@ -430,7 +433,7 @@ class GenericIE(InfoExtractor): }, }, { - # https://github.com/rg3/youtube-dl/issues/2253 + # https://github.com/ytdl-org/youtube-dl/issues/2253 'url': 'http://bcove.me/i6nfkrc3', 'md5': '0ba9446db037002366bab3b3eb30c88c', 'info_dict': { @@ -455,7 +458,7 @@ class GenericIE(InfoExtractor): }, }, { - # https://github.com/rg3/youtube-dl/issues/3541 + # https://github.com/ytdl-org/youtube-dl/issues/3541 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', 'info_dict': { @@ -919,7 +922,7 @@ class GenericIE(InfoExtractor): } }, # Multiple brightcove videos - # https://github.com/rg3/youtube-dl/issues/2283 + # https://github.com/ytdl-org/youtube-dl/issues/2283 { 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html', 'info_dict': { @@ -2072,6 +2075,22 @@ class GenericIE(InfoExtractor): }, 'playlist_count': 6, }, + { + # Squarespace video embed, 2019-08-28 + 'url': 'http://ootboxford.com', + 'info_dict': { + 'id': 'Tc7b_JGdZfw', + 'title': 'Out of the Blue, at Childish Things 10', + 'ext': 'mp4', + 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f', + 'uploader_id': 'helendouglashouse', + 'uploader': 'Helen & Douglas House', + 'upload_date': '20140328', + }, + 'params': { + 'skip_download': True, + }, + }, { # Zype embed 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', @@ -2101,6 +2120,23 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': ['Failed to download MPD manifest'], }, + { + # DailyMotion embed with DM.player + 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804', + 'info_dict': { + 'id': 'k6aKkGHd9FJs4mtJN39', + 'ext': 'mp4', + 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final', + 'description': 'This video is private.', + 'uploader_id': 'x1jf30l', + 'uploader': 'beIN SPORTS USA', + 'upload_date': '20190528', + 'timestamp': 1559062971, + }, + 'params': { + 'skip_download': True, + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2206,7 +2242,7 @@ class GenericIE(InfoExtractor): default_search = 'fixup_error' if default_search in ('auto', 'auto_warning', 'fixup_error'): - if '/' in url: + if re.match(r'^[^\s/]+\.[^\s/]+/', url): self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) elif default_search != 'fixup_error': @@ -2371,10 +2407,16 @@ class GenericIE(InfoExtractor): return camtasia_res # Sometimes embedded video player is hidden behind percent encoding - # (e.g. https://github.com/rg3/youtube-dl/issues/2448) + # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448) # Unescaping the whole page allows to handle those cases in a generic way webpage = compat_urllib_parse_unquote(webpage) + # Unescape squarespace embeds to be detected by generic extractor, + # see https://github.com/ytdl-org/youtube-dl/issues/21294 + webpage = re.sub( + r'