from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
-from .vessel import VesselIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
-from .soundcloud import SoundcloudIE
+from .soundcloud import SoundcloudEmbedIE
from .tunein import TuneInBaseIE
from .vbox7 import Vbox7IE
from .dbtv import DBTVIE
from .videa import VideaIE
from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE
-from .openload import (
- OpenloadIE,
- VerystreamIE,
-)
from .videopress import VideoPressIE
from .rutube import RutubeIE
from .limelight import LimelightBaseIE
from .viqeo import ViqeoIE
from .expressen import ExpressenIE
from .zype import ZypeIE
+from .odnoklassniki import OdnoklassnikiIE
+from .kinja import KinjaEmbedIE
class GenericIE(InfoExtractor):
'timestamp': 1432570283,
},
},
- # OnionStudios embed
+ # Kinja embed
{
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
'info_dict': {
- 'id': '2855',
+ 'id': '106351',
'ext': 'mp4',
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+ 'description': 'Migrated from OnionStudios',
'thumbnail': r're:^https?://.*\.jpe?g$',
- 'uploader': 'ClickHole',
- 'uploader_id': 'clickhole',
+ 'uploader': 'clickhole',
+ 'upload_date': '20150527',
+ 'timestamp': 1432744860,
}
},
# SnagFilms embed
},
'playlist_count': 6,
},
+ {
+ # Squarespace video embed, 2019-08-28
+ 'url': 'http://ootboxford.com',
+ 'info_dict': {
+ 'id': 'Tc7b_JGdZfw',
+ 'title': 'Out of the Blue, at Childish Things 10',
+ 'ext': 'mp4',
+ 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
+ 'uploader_id': 'helendouglashouse',
+ 'uploader': 'Helen & Douglas House',
+ 'upload_date': '20140328',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
{
# Zype embed
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
'ext': 'mp4',
'title': 'Smoky Barbecue Favorites',
'thumbnail': r're:^https?://.*\.jpe?g',
+ 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ 'upload_date': '20170909',
+ 'timestamp': 1504915200,
},
'add_ie': [ZypeIE.ie_key()],
'params': {
default_search = 'fixup_error'
if default_search in ('auto', 'auto_warning', 'fixup_error'):
- if '/' in url:
+ if re.match(r'^[^\s/]+\.[^\s/]+/', url):
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
elif default_search != 'fixup_error':
if head_response is not False:
# Check for redirect
- new_url = compat_str(head_response.geturl())
+ new_url = head_response.geturl()
if url != new_url:
self.report_following_redirect(new_url)
if force_videoid:
return self.playlist_result(
self._parse_xspf(
doc, video_id, xspf_url=url,
- xspf_base_url=compat_str(full_response.geturl())),
+ xspf_base_url=full_response.geturl()),
video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
doc,
- mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
+ mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url)
self._sort_formats(info_dict['formats'])
return info_dict
# Unescaping the whole page allows to handle those cases in a generic way
webpage = compat_urllib_parse_unquote(webpage)
+ # Unescape squarespace embeds to be detected by generic extractor,
+ # see https://github.com/ytdl-org/youtube-dl/issues/21294
+ webpage = re.sub(
+ r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
+ lambda x: unescapeHTML(x.group(0)), webpage)
+
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
if tp_urls:
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
- # Look for Vessel embeds
- vessel_urls = VesselIE._extract_urls(webpage)
- if vessel_urls:
- return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
-
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
return self.playlist_from_matches(
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
+ # Look for Teachable embeds, must be before Wistia
+ teachable_url = TeachableIE._extract_url(webpage, url)
+ if teachable_url:
+ return self.url_result(teachable_url)
+
# Look for embedded Wistia player
- wistia_url = WistiaIE._extract_url(webpage)
- if wistia_url:
- return {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(wistia_url),
- 'ie_key': WistiaIE.ie_key(),
- 'uploader': video_uploader,
- }
+ wistia_urls = WistiaIE._extract_urls(webpage)
+ if wistia_urls:
+ playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
+ for entry in playlist['entries']:
+ entry.update({
+ '_type': 'url_transparent',
+ 'uploader': video_uploader,
+ })
+ return playlist
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
return self.url_result(mobj.group('url'), 'VK')
# Look for embedded Odnoklassniki player
- mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Odnoklassniki')
+ odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
+ if odnoklassniki_url:
+ return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
# Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
return self.url_result(myvi_url)
# Look for embedded soundcloud player
- soundcloud_urls = SoundcloudIE._extract_urls(webpage)
+ soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
if soundcloud_urls:
- return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+ return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
# Look for tunein player
tunein_urls = TuneInBaseIE._extract_urls(webpage)
if senate_isvp_url:
return self.url_result(senate_isvp_url, 'SenateISVP')
+ # Look for Kinja embeds
+ kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
+ if kinja_embed_urls:
+ return self.playlist_from_matches(
+ kinja_embed_urls, video_id, video_title)
+
# Look for OnionStudios embeds
onionstudios_url = OnionStudiosIE._extract_url(webpage)
if onionstudios_url:
# Look for VODPlatform embeds
mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
webpage)
if mobj is not None:
return self.url_result(
# Look for Mangomolo embeds
mobj = re.search(
- r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
+ r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
+ (?:
+ admin\.mangomolo\.com/analytics/index\.php/customers/embed|
+ player\.mangomolo\.com/v1
+ )/
(?:
video\?.*?\bid=(?P<video_id>\d+)|
- index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+ (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
).+?)\1''', webpage)
if mobj is not None:
info = {
return self.playlist_from_matches(
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
- # Look for Openload embeds
- openload_urls = OpenloadIE._extract_urls(webpage)
- if openload_urls:
- return self.playlist_from_matches(
- openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
-
- # Look for Verystream embeds
- verystream_urls = VerystreamIE._extract_urls(webpage)
- if verystream_urls:
- return self.playlist_from_matches(
- verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key())
-
# Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls:
return self.playlist_from_matches(
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
- teachable_url = TeachableIE._extract_url(webpage, url)
- if teachable_url:
- return self.url_result(teachable_url)
-
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
if indavideo_urls:
return self.playlist_from_matches(