HEADRequest,
is_html,
js_to_json,
+ KNOWN_EXTENSIONS,
+ merge_dicts,
+ mimetype2ext,
orderedSet,
sanitized_Request,
smuggle_url,
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .tvc import TVCIE
-from .sportbox import SportBoxEmbedIE
+from .sportbox import SportBoxIE
from .smotri import SmotriIE
from .myvi import MyviIE
from .condenast import CondeNastIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE
from .redtube import RedTubeIE
+from .tube8 import Tube8IE
+from .mofosex import MofosexEmbedIE
+from .spankwire import SpankwireIE
+from .youporn import YouPornIE
from .vimeo import VimeoIE
-from .dailymotion import (
- DailymotionIE,
- DailymotionCloudIE,
-)
+from .dailymotion import DailymotionIE
from .dailymail import DailyMailIE
from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE
from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
-from .vessel import VesselIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
-from .soundcloud import SoundcloudIE
+from .soundcloud import SoundcloudEmbedIE
from .tunein import TuneInBaseIE
from .vbox7 import Vbox7IE
from .dbtv import DBTVIE
from .videa import VideaIE
from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE
-from .openload import OpenloadIE
from .videopress import VideoPressIE
from .rutube import RutubeIE
from .limelight import LimelightBaseIE
from .joj import JojIE
from .megaphone import MegaphoneIE
from .vzaar import VzaarIE
+from .channel9 import Channel9IE
+from .vshare import VShareIE
+from .mediasite import MediasiteIE
+from .springboardplatform import SpringboardPlatformIE
+from .yapfiles import YapFilesIE
+from .vice import ViceIE
+from .xfileshare import XFileShareIE
+from .cloudflarestream import CloudflareStreamIE
+from .peertube import PeerTubeIE
+from .teachable import TeachableIE
+from .indavideo import IndavideoEmbedIE
+from .apa import APAIE
+from .foxnews import FoxNewsIE
+from .viqeo import ViqeoIE
+from .expressen import ExpressenIE
+from .zype import ZypeIE
+from .odnoklassniki import OdnoklassnikiIE
+from .kinja import KinjaEmbedIE
class GenericIE(InfoExtractor):
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
},
+ # RSS feed with enclosures and unsupported link URLs
+ {
+ 'url': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'info_dict': {
+ 'id': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
+ 'title': 'Hello Internet',
+ },
+ 'playlist_mincount': 100,
+ },
# SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
{
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
},
},
{
- # https://github.com/rg3/youtube-dl/issues/2253
+ # https://github.com/ytdl-org/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3',
'md5': '0ba9446db037002366bab3b3eb30c88c',
'info_dict': {
},
},
{
- # https://github.com/rg3/youtube-dl/issues/3541
+ # https://github.com/ytdl-org/youtube-dl/issues/3541
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': {
}
},
# Multiple brightcove videos
- # https://github.com/rg3/youtube-dl/issues/2283
+ # https://github.com/ytdl-org/youtube-dl/issues/2283
{
'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
'info_dict': {
'ext': 'mp4',
'upload_date': '20150212',
'uploader': 'The National Archives UK',
- 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
+ 'description': 'md5:8078af856dca76edc42910b61273dbbf',
'uploader_id': 'NationalArchives08',
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
},
},
# jwplayer rtmp
{
- 'url': 'http://www.suffolk.edu/sjc/',
+ 'url': 'http://www.suffolk.edu/sjc/live.php',
'info_dict': {
- 'id': 'sjclive',
+ 'id': 'live',
'ext': 'flv',
'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
'uploader': 'www.suffolk.edu',
},
'params': {
'skip_download': True,
- }
+ },
+ 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
},
# Complex jwplayer
{
'id': 'videos',
'ext': 'mp4',
'title': 'king machine trailer 1',
+ 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
'skip_download': True,
}
},
+ {
+ # JWPlatform iframe
+ 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
+ 'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
+ 'info_dict': {
+ 'id': 'O0c5JcKT',
+ 'ext': 'mp4',
+ 'upload_date': '20171122',
+ 'timestamp': 1511366290,
+ 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
+ },
+ 'add_ie': [JWPlatformIE.ie_key()],
+ },
+ {
+ # Video.js embed, multiple formats
+ 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
+ 'info_dict': {
+ 'id': 'yygqldloqIk',
+ 'ext': 'mp4',
+ 'title': 'SolidWorks. Урок 6 Настройка чертежа',
+ 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
+ 'upload_date': '20130314',
+ 'uploader': 'PROстое3D',
+ 'uploader_id': 'PROstoe3D',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Video.js embed, single format
+ 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
+ 'info_dict': {
+ 'id': 'watch',
+ 'ext': 'mp4',
+ 'title': 'Step 1 - Good Foundation',
+ 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
# rtl.nl embed
{
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
'playlist_mincount': 5,
'info_dict': {
'id': 'aanslagen-kopenhagen',
- 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
+ 'title': 'Aanslagen Kopenhagen',
}
},
# Zapiks embed
'title': '35871',
'timestamp': 1355743100,
'upload_date': '20121217',
- 'uploader_id': 'batchUser',
+ 'uploader_id': 'cplapp@learn360.com',
},
'add_ie': ['Kaltura'],
},
},
'add_ie': ['Kaltura'],
},
- # EaglePlatform embed (generic URL)
{
- 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
- # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
+ # Kaltura iframe embed, more sophisticated
+ 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
'info_dict': {
- 'id': '227304',
+ 'id': '1_9gzouybz',
'ext': 'mp4',
- 'title': 'Навальный вышел на свободу',
- 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 87,
- 'view_count': int,
- 'age_limit': 0,
+ 'title': 'lecture-05sep2017',
+ 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
+ 'upload_date': '20170913',
+ 'uploader_id': 'eps2',
+ 'timestamp': 1505340777,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # meta twitter:player
+ 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
+ 'info_dict': {
+ 'id': '0_01b42zps',
+ 'ext': 'mp4',
+ 'title': 'Main Twerk (Video)',
+ 'upload_date': '20171208',
+ 'uploader_id': 'sebastian.salinas@thechive.com',
+ 'timestamp': 1512713057,
},
'params': {
'skip_download': True,
},
+ 'add_ie': ['Kaltura'],
},
# referrer protected EaglePlatform embed
{
'params': {
'skip_download': True,
},
+ 'skip': 'This video is unavailable.',
},
# Pladform embed
{
'duration': 694,
'age_limit': 0,
},
+ 'skip': 'HTTP Error 404: Not Found',
},
# Playwire embed
{
'id': '518726732',
'ext': 'mp4',
'title': 'Facebook Creates "On This Day" | Crunch Report',
+ 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
+ 'timestamp': 1427237531,
+ 'uploader': 'Crunch Report',
+ 'upload_date': '20150324',
},
- },
- # SVT embed
- {
- 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
- 'info_dict': {
- 'id': '2900353',
- 'ext': 'flv',
- 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
- 'duration': 27,
- 'age_limit': 0,
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
},
},
# Crooks and Liars embed
'upload_date': '20140107',
'timestamp': 1389118457,
},
+ 'skip': 'Invalid Page URL',
},
# NBC News embed
{
'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
'md5': '1aa589c675898ae6d37a17913cf68d66',
'info_dict': {
- 'id': '701714499682',
+ 'id': 'x_dtl_oa_LettermanliftPR_160608',
'ext': 'mp4',
- 'title': 'PREVIEW: On Assignment: David Letterman',
+ 'title': 'David Letterman: A Preview',
'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
+ 'upload_date': '20160609',
+ 'timestamp': 1465431544,
+ 'uploader': 'NBCU-NEWS',
},
},
# UDN embed
# m3u8 download
'skip_download': True,
},
- },
- # Ooyala embed
- {
- 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
- 'info_dict': {
- 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
- 'ext': 'mp4',
- 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
- 'title': 'This is what separates the Excel masters from the wannabes',
- 'duration': 191.933,
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- }
+ 'expected_warnings': ['Failed to parse JSON Expecting value'],
},
# Brightcove URL in single quotes
{
'timestamp': 1432570283,
},
},
- # Dailymotion Cloud video
- {
- 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
- 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
- 'info_dict': {
- 'id': 'x2uy8t3',
- 'ext': 'mp4',
- 'title': 'Sauvons les abeilles ! - Le débat',
- 'description': 'md5:d9082128b1c5277987825d684939ca26',
- 'thumbnail': r're:^https?://.*\.jpe?g$',
- 'timestamp': 1434970506,
- 'upload_date': '20150622',
- 'uploader': 'Public Sénat',
- 'uploader_id': 'xa9gza',
- }
- },
- # OnionStudios embed
+ # Kinja embed
{
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
'info_dict': {
- 'id': '2855',
+ 'id': '106351',
'ext': 'mp4',
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+ 'description': 'Migrated from OnionStudios',
'thumbnail': r're:^https?://.*\.jpe?g$',
- 'uploader': 'ClickHole',
- 'uploader_id': 'clickhole',
+ 'uploader': 'clickhole',
+ 'upload_date': '20150527',
+ 'timestamp': 1432744860,
}
},
# SnagFilms embed
},
'add_ie': ['BrightcoveLegacy'],
},
- # Nexx embed
- {
- 'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
- 'info_dict': {
- 'id': '247746',
- 'ext': 'mp4',
- 'title': "Yesterday's Jam (OV)",
- 'description': 'md5:09bc0984723fed34e2581624a84e05f0',
- 'timestamp': 1492594816,
- 'upload_date': '20170419',
- },
- 'params': {
- 'format': 'bestvideo',
- 'skip_download': True,
- },
- },
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
},
'add_ie': ['Kaltura'],
},
+ {
+ # multiple kaltura embeds, nsfw
+ 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
+ 'info_dict': {
+ 'id': 'kamila-avec-video-jaime-sadomie',
+ 'title': "Kamila avec vídeo “J'aime sadomie”",
+ },
+ 'playlist_count': 8,
+ },
{
# Non-standard Vimeo embed
'url': 'https://openclassrooms.com/courses/understanding-the-web',
'title': 'Rescue Kit 14 Free Edition - Getting started',
},
'playlist_count': 4,
- }
+ },
+ {
+ # vshare embed
+ 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
+ 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
+ 'info_dict': {
+ 'id': '0f64ce6',
+ 'title': 'vl14062007715967',
+ 'ext': 'mp4',
+ }
+ },
+ {
+ 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
+ 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
+ 'info_dict': {
+ 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
+ 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
+ 'timestamp': 1474354800,
+ 'upload_date': '20160920',
+ }
+ },
+ {
+ 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
+ 'info_dict': {
+ 'id': '1731611',
+ 'ext': 'mp4',
+ 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
+ 'description': 'md5:eb5f23826a027ba95277d105f248b825',
+ 'timestamp': 1516100691,
+ 'upload_date': '20180116',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [SpringboardPlatformIE.ie_key()],
+ },
+ {
+ 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
+ 'info_dict': {
+ 'id': 'uPDB5I9wfp8',
+ 'ext': 'webm',
+ 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
+ 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
+ 'upload_date': '20160219',
+ 'uploader': 'Pocoyo - Português (BR)',
+ 'uploader_id': 'PocoyoBrazil',
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
+ 'info_dict': {
+ 'id': 'vMDE4NzI1Mjgt690b',
+ 'ext': 'mp4',
+ 'title': 'Котята',
+ },
+ 'add_ie': [YapFilesIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # CloudflareStream embed
+ 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ },
+ 'add_ie': [CloudflareStreamIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # PeerTube embed
+ 'url': 'https://joinpeertube.org/fr/home/',
+ 'info_dict': {
+ 'id': 'home',
+ 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # Indavideo embed
+ 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
+ 'info_dict': {
+ 'id': '1693903',
+ 'ext': 'mp4',
+ 'title': 'Így kell otthon hamburgert sütni',
+ 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
+ 'timestamp': 1426330212,
+ 'upload_date': '20150314',
+ 'uploader': 'StreetKitchen',
+ 'uploader_id': '546363',
+ },
+ 'add_ie': [IndavideoEmbedIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # APA embed via JWPlatform embed
+ 'url': 'http://www.vol.at/blue-man-group/5593454',
+ 'info_dict': {
+ 'id': 'jjv85FdZ',
+ 'ext': 'mp4',
+ 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 254,
+ 'timestamp': 1519211149,
+ 'upload_date': '20180221',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
+ 'md5': 'b68d276de422ab07ee1d49388103f457',
+ 'info_dict': {
+ 'id': '83645793',
+ 'title': 'Lock up and get excited',
+ 'ext': 'mp4'
+ },
+ 'skip': 'TODO: fix nested playlists processing in tests',
+ },
+ {
+ # Viqeo embeds
+ 'url': 'https://viqeo.tv/',
+ 'info_dict': {
+ 'id': 'viqeo',
+ 'title': 'All-new video platform',
+ },
+ 'playlist_count': 6,
+ },
+ {
+ # Squarespace video embed, 2019-08-28
+ 'url': 'http://ootboxford.com',
+ 'info_dict': {
+ 'id': 'Tc7b_JGdZfw',
+ 'title': 'Out of the Blue, at Childish Things 10',
+ 'ext': 'mp4',
+ 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
+ 'uploader_id': 'helendouglashouse',
+ 'uploader': 'Helen & Douglas House',
+ 'upload_date': '20140328',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Zype embed
+ 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
+ 'info_dict': {
+ 'id': '5b400b834b32992a310622b9',
+ 'ext': 'mp4',
+ 'title': 'Smoky Barbecue Favorites',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ 'upload_date': '20170909',
+ 'timestamp': 1504915200,
+ },
+ 'add_ie': [ZypeIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # videojs embed
+ 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
+ 'info_dict': {
+ 'id': 'shell',
+ 'ext': 'mp4',
+ 'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
+ 'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest'],
+ },
+ {
+ # DailyMotion embed with DM.player
+ 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
+ 'info_dict': {
+ 'id': 'k6aKkGHd9FJs4mtJN39',
+ 'ext': 'mp4',
+ 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
+ 'description': 'This video is private.',
+ 'uploader_id': 'x1jf30l',
+ 'uploader': 'beIN SPORTS USA',
+ 'upload_date': '20190528',
+ 'timestamp': 1559062971,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
# {
# # TODO: find another test
# # http://schema.org/VideoObject
entries = []
for it in doc.findall('./channel/item'):
- next_url = xpath_text(it, 'link', fatal=False)
+ next_url = None
+ enclosure_nodes = it.findall('./enclosure')
+ for e in enclosure_nodes:
+ next_url = e.attrib.get('url')
+ if next_url:
+ break
+
if not next_url:
- enclosure_nodes = it.findall('./enclosure')
- for e in enclosure_nodes:
- next_url = e.attrib.get('url')
- if next_url:
- break
+ next_url = xpath_text(it, 'link', fatal=False)
if not next_url:
continue
def _real_extract(self, url):
if url.startswith('//'):
- return {
- '_type': 'url',
- 'url': self.http_scheme() + url,
- }
+ return self.url_result(self.http_scheme() + url)
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
default_search = 'fixup_error'
if default_search in ('auto', 'auto_warning', 'fixup_error'):
- if '/' in url:
+ if re.match(r'^[^\s/]+\.[^\s/]+/', url):
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
elif default_search != 'fixup_error':
if head_response is not False:
# Check for redirect
- new_url = compat_str(head_response.geturl())
+ new_url = head_response.geturl()
if url != new_url:
self.report_following_redirect(new_url)
if force_videoid:
self._sort_formats(smil['formats'])
return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
- return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
+ return self.playlist_result(
+ self._parse_xspf(
+ doc, video_id, xspf_url=url,
+ xspf_base_url=full_response.geturl()),
+ video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
- doc, video_id,
- mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
+ doc,
+ mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url)
self._sort_formats(info_dict['formats'])
return info_dict
return camtasia_res
# Sometimes embedded video player is hidden behind percent encoding
- # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+ # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
# Unescaping the whole page allows to handle those cases in a generic way
webpage = compat_urllib_parse_unquote(webpage)
+ # Unescape squarespace embeds to be detected by generic extractor,
+ # see https://github.com/ytdl-org/youtube-dl/issues/21294
+ webpage = re.sub(
+ r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
+ lambda x: unescapeHTML(x.group(0)), webpage)
+
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
# And then there are the jokers who advertise that they use RTA,
# but actually don't.
AGE_LIMIT_MARKERS = [
- r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
+ r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
]
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
age_limit = 18
# Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
if bc_urls:
- return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
+ return self.playlist_from_matches(
+ bc_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'referrer': url}),
+ ie='BrightcoveNew')
# Look for Nexx embeds
nexx_urls = NexxIE._extract_urls(webpage)
if tp_urls:
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
- # Look for Vessel embeds
- vessel_urls = VesselIE._extract_urls(webpage)
- if vessel_urls:
- return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
-
# Look for embedded rtl.nl player
matches = re.findall(
- r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
+ r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
webpage)
if matches:
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
return self.playlist_from_matches(
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
+ # Look for Teachable embeds, must be before Wistia
+ teachable_url = TeachableIE._extract_url(webpage, url)
+ if teachable_url:
+ return self.url_result(teachable_url)
+
# Look for embedded Wistia player
- wistia_url = WistiaIE._extract_url(webpage)
- if wistia_url:
- return {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(wistia_url),
- 'ie_key': WistiaIE.ie_key(),
- 'uploader': video_uploader,
- }
+ wistia_urls = WistiaIE._extract_urls(webpage)
+ if wistia_urls:
+ playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
+ for entry in playlist['entries']:
+ entry.update({
+ '_type': 'url_transparent',
+ 'uploader': video_uploader,
+ })
+ return playlist
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
return self.url_result(mobj.group('url'))
# Look for Ooyala videos
- mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
- re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
- re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
- re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
- re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
+ or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
+ or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
embed_token = self._search_regex(
r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora')
- # Look for embedded NovaMov-based player
- mobj = re.search(
- r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
- (?P<url>http://(?:(?:embed|www)\.)?
- (?:novamov\.com|
- nowvideo\.(?:ch|sx|eu|at|ag|co)|
- videoweed\.(?:es|com)|
- movshare\.(?:net|sx|ag)|
- divxstage\.(?:eu|net|ch|co|at|ag))
- /embed\.php.+?)\1''', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
# Look for embedded Facebook player
facebook_urls = FacebookIE._extract_urls(webpage)
if facebook_urls:
return self.url_result(mobj.group('url'), 'VK')
# Look for embedded Odnoklassniki player
- mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Odnoklassniki')
+ odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
+ if odnoklassniki_url:
+ return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
# Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
return self.url_result(tvc_url, 'TVC')
# Look for embedded SportBox player
- sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
+ sportbox_urls = SportBoxIE._extract_urls(webpage)
if sportbox_urls:
- return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
+ return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if redtube_urls:
return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
+ # Look for embedded Tube8 player
+ tube8_urls = Tube8IE._extract_urls(webpage)
+ if tube8_urls:
+ return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
+
+ # Look for embedded Mofosex player
+ mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
+ if mofosex_urls:
+ return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
+
+ # Look for embedded Spankwire player
+ spankwire_urls = SpankwireIE._extract_urls(webpage)
+ if spankwire_urls:
+ return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
+
+ # Look for embedded YouPorn player
+ youporn_urls = YouPornIE._extract_urls(webpage)
+ if youporn_urls:
+ return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
+
# Look for embedded Tvigle player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
return self.url_result(myvi_url)
# Look for embedded soundcloud player
- soundcloud_urls = SoundcloudIE._extract_urls(webpage)
+ soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
if soundcloud_urls:
- return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+ return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
# Look for tunein player
tunein_urls = TuneInBaseIE._extract_urls(webpage)
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
- kaltura_url = KalturaIE._extract_url(webpage)
- if kaltura_url:
- return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
+ kaltura_urls = KalturaIE._extract_urls(webpage)
+ if kaltura_urls:
+ return self.playlist_from_matches(
+ kaltura_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'source_url': url}),
+ ie=KalturaIE.ie_key())
# Look for EaglePlatform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
# Look for UDN embeds
mobj = re.search(
- r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
+ r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
if senate_isvp_url:
return self.url_result(senate_isvp_url, 'SenateISVP')
- # Look for Dailymotion Cloud videos
- dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
- if dmcloud_url:
- return self.url_result(dmcloud_url, 'DailymotionCloud')
+ # Look for Kinja embeds
+ kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
+ if kinja_embed_urls:
+ return self.playlist_from_matches(
+ kinja_embed_urls, video_id, video_title)
# Look for OnionStudios embeds
onionstudios_url = OnionStudiosIE._extract_url(webpage)
return self.url_result(viewlift_url)
# Look for JWPlatform embeds
- jwplatform_url = JWPlatformIE._extract_url(webpage)
- if jwplatform_url:
- return self.url_result(jwplatform_url, 'JWPlatform')
+ jwplatform_urls = JWPlatformIE._extract_urls(webpage)
+ if jwplatform_urls:
+ return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
# Look for Digiteka embeds
digiteka_url = DigitekaIE._extract_url(webpage)
# Look for VODPlatform embeds
mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
webpage)
if mobj is not None:
return self.url_result(
# Look for Mangomolo embeds
mobj = re.search(
- r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
+ r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
+ (?:
+ admin\.mangomolo\.com/analytics/index\.php/customers/embed|
+ player\.mangomolo\.com/v1
+ )/
(?:
video\?.*?\bid=(?P<video_id>\d+)|
- index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+ (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
).+?)\1''', webpage)
if mobj is not None:
info = {
return self.playlist_from_matches(
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
- # Look for Openload embeds
- openload_urls = OpenloadIE._extract_urls(webpage)
- if openload_urls:
- return self.playlist_from_matches(
- openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
-
# Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls:
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
# Look for Mediaset embeds
- mediaset_urls = MediasetIE._extract_urls(webpage)
+ mediaset_urls = MediasetIE._extract_urls(self, webpage)
if mediaset_urls:
return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
return self.playlist_from_matches(
vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
- def merge_dicts(dict1, dict2):
- merged = {}
- for k, v in dict1.items():
- if v is not None:
- merged[k] = v
- for k, v in dict2.items():
- if v is None:
- continue
- if (k not in merged or
- (isinstance(v, compat_str) and v and
- isinstance(merged[k], compat_str) and
- not merged[k])):
- merged[k] = v
- return merged
+ channel9_urls = Channel9IE._extract_urls(webpage)
+ if channel9_urls:
+ return self.playlist_from_matches(
+ channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
+
+ vshare_urls = VShareIE._extract_urls(webpage)
+ if vshare_urls:
+ return self.playlist_from_matches(
+ vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
+
+ # Look for Mediasite embeds
+ mediasite_urls = MediasiteIE._extract_urls(webpage)
+ if mediasite_urls:
+ entries = [
+ self.url_result(smuggle_url(
+ compat_urlparse.urljoin(url, mediasite_url),
+ {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
+ for mediasite_url in mediasite_urls]
+ return self.playlist_result(entries, video_id, video_title)
+
+ springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
+ if springboardplatform_urls:
+ return self.playlist_from_matches(
+ springboardplatform_urls, video_id, video_title,
+ ie=SpringboardPlatformIE.ie_key())
+
+ yapfiles_urls = YapFilesIE._extract_urls(webpage)
+ if yapfiles_urls:
+ return self.playlist_from_matches(
+ yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
+
+ vice_urls = ViceIE._extract_urls(webpage)
+ if vice_urls:
+ return self.playlist_from_matches(
+ vice_urls, video_id, video_title, ie=ViceIE.ie_key())
+
+ xfileshare_urls = XFileShareIE._extract_urls(webpage)
+ if xfileshare_urls:
+ return self.playlist_from_matches(
+ xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
+
+ cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
+ if cloudflarestream_urls:
+ return self.playlist_from_matches(
+ cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
+
+ peertube_urls = PeerTubeIE._extract_urls(webpage, url)
+ if peertube_urls:
+ return self.playlist_from_matches(
+ peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
+
+ indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
+ if indavideo_urls:
+ return self.playlist_from_matches(
+ indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
+
+ apa_urls = APAIE._extract_urls(webpage)
+ if apa_urls:
+ return self.playlist_from_matches(
+ apa_urls, video_id, video_title, ie=APAIE.ie_key())
+
+ foxnews_urls = FoxNewsIE._extract_urls(webpage)
+ if foxnews_urls:
+ return self.playlist_from_matches(
+ foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
+
+ sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
+ r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
+ webpage)]
+ if sharevideos_urls:
+ return self.playlist_from_matches(
+ sharevideos_urls, video_id, video_title)
+
+ viqeo_urls = ViqeoIE._extract_urls(webpage)
+ if viqeo_urls:
+ return self.playlist_from_matches(
+ viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
+
+ expressen_urls = ExpressenIE._extract_urls(webpage)
+ if expressen_urls:
+ return self.playlist_from_matches(
+ expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
+
+ zype_urls = ZypeIE._extract_urls(webpage)
+ if zype_urls:
+ return self.playlist_from_matches(
+ zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json)
if jwplayer_data:
- info = self._parse_jwplayer_data(
- jwplayer_data, video_id, require_title=False, base_url=url)
- return merge_dicts(info, info_dict)
+ try:
+ info = self._parse_jwplayer_data(
+ jwplayer_data, video_id, require_title=False, base_url=url)
+ return merge_dicts(info, info_dict)
+ except ExtractorError:
+ # See https://github.com/ytdl-org/youtube-dl/pull/16735
+ pass
+
+ # Video.js embed
+ mobj = re.search(
+ r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
+ webpage)
+ if mobj is not None:
+ sources = self._parse_json(
+ mobj.group(1), video_id, transform_source=js_to_json,
+ fatal=False) or []
+ if not isinstance(sources, list):
+ sources = [sources]
+ formats = []
+ for source in sources:
+ src = source.get('src')
+ if not src or not isinstance(src, compat_str):
+ continue
+ src = compat_urlparse.urljoin(url, src)
+ src_type = source.get('type')
+ if isinstance(src_type, compat_str):
+ src_type = src_type.lower()
+ ext = determine_ext(src).lower()
+ if src_type == 'video/youtube':
+ return self.url_result(src, YoutubeIE.ie_key())
+ if src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ 'ext': (mimetype2ext(src_type)
+ or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+ })
+ if formats:
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
+ return info_dict
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
# be supported by youtube-dl thus this is checked the very last (see
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
- if embed_url:
+ if embed_url and embed_url != url:
return self.url_result(embed_url)
if not found: