X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/00368b4c3a5d4e909e1b7ecfc4030bf28da020f3..4121346dfbb2b660ded98e98069857c7a50e381f:/youtube_dl/extractor/generic.py diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7d0edf0..a495ee1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -22,6 +22,9 @@ from ..utils import ( HEADRequest, is_html, js_to_json, + KNOWN_EXTENSIONS, + merge_dicts, + mimetype2ext, orderedSet, sanitized_Request, smuggle_url, @@ -44,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE from .tvc import TVCIE -from .sportbox import SportBoxEmbedIE +from .sportbox import SportBoxIE from .smotri import SmotriIE from .myvi import MyviIE from .condenast import CondeNastIE @@ -56,11 +59,9 @@ from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE from .drtuber import DrTuberIE from .redtube import RedTubeIE +from .tube8 import Tube8IE from .vimeo import VimeoIE -from .dailymotion import ( - DailymotionIE, - DailymotionCloudIE, -) +from .dailymotion import DailymotionIE from .dailymail import DailyMailIE from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE @@ -76,11 +77,10 @@ from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE -from .vessel import VesselIE from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE -from .soundcloud import SoundcloudIE +from .soundcloud import SoundcloudEmbedIE from .tunein import TuneInBaseIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE @@ -88,7 +88,6 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE -from .openload import OpenloadIE from .videopress import VideoPressIE from .rutube import RutubeIE from .limelight import LimelightBaseIE @@ -99,6 +98,24 @@ from .mediaset import MediasetIE from .joj import JojIE from .megaphone import MegaphoneIE from .vzaar import VzaarIE +from .channel9 import Channel9IE +from .vshare import VShareIE +from .mediasite import MediasiteIE +from .springboardplatform import SpringboardPlatformIE +from .yapfiles import YapFilesIE +from .vice import ViceIE +from .xfileshare import XFileShareIE +from .cloudflarestream import CloudflareStreamIE +from .peertube import PeerTubeIE +from .teachable import TeachableIE +from .indavideo import IndavideoEmbedIE +from .apa import APAIE +from .foxnews import FoxNewsIE +from .viqeo import ViqeoIE +from .expressen import ExpressenIE +from .zype import ZypeIE +from .odnoklassniki import OdnoklassnikiIE +from .kinja import KinjaEmbedIE class GenericIE(InfoExtractor): @@ -183,6 +200,16 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # RSS feed with enclosures and unsupported link URLs + { + 'url': 'http://www.hellointernet.fm/podcast?format=rss', + 'info_dict': { + 'id': 'http://www.hellointernet.fm/podcast?format=rss', + 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.', + 'title': 'Hello Internet', + }, + 'playlist_mincount': 100, + }, # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng { 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', @@ -403,7 +430,7 @@ class GenericIE(InfoExtractor): }, }, { - # https://github.com/rg3/youtube-dl/issues/2253 + # https://github.com/ytdl-org/youtube-dl/issues/2253 'url': 'http://bcove.me/i6nfkrc3', 'md5': '0ba9446db037002366bab3b3eb30c88c', 'info_dict': { @@ -428,7 +455,7 @@ class GenericIE(InfoExtractor): }, }, { - # https://github.com/rg3/youtube-dl/issues/3541 + # https://github.com/ytdl-org/youtube-dl/issues/3541 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', 'info_dict': { @@ -892,7 +919,7 @@ class GenericIE(InfoExtractor): } }, # Multiple brightcove videos - # https://github.com/rg3/youtube-dl/issues/2283 + # https://github.com/ytdl-org/youtube-dl/issues/2283 { 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html', 'info_dict': { @@ -1088,23 +1115,24 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20150212', 'uploader': 'The National Archives UK', - 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', + 'description': 'md5:8078af856dca76edc42910b61273dbbf', 'uploader_id': 'NationalArchives08', 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', }, }, # jwplayer rtmp { - 'url': 'http://www.suffolk.edu/sjc/', + 'url': 'http://www.suffolk.edu/sjc/live.php', 'info_dict': { - 'id': 'sjclive', + 'id': 'live', 'ext': 'flv', 'title': 'Massachusetts Supreme Judicial Court Oral Arguments', 'uploader': 'www.suffolk.edu', }, 'params': { 'skip_download': True, - } + }, + 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/', }, # Complex jwplayer { @@ -1113,6 +1141,7 @@ class GenericIE(InfoExtractor): 'id': 'videos', 'ext': 'mp4', 'title': 'king machine trailer 1', + 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.', 'thumbnail': r're:^https?://.*\.jpg$', }, }, @@ -1130,13 +1159,55 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + { + # JWPlatform iframe + 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/', + 'md5': 'ca00a040364b5b439230e7ebfd02c4e9', + 'info_dict': { + 'id': 'O0c5JcKT', + 'ext': 'mp4', + 'upload_date': '20171122', + 'timestamp': 1511366290, + 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone', + }, + 'add_ie': [JWPlatformIE.ie_key()], + }, + { + # Video.js embed, multiple formats + 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', + 'info_dict': { + 'id': 'yygqldloqIk', + 'ext': 'mp4', + 'title': 'SolidWorks. Урок 6 Настройка чертежа', + 'description': 'md5:baf95267792646afdbf030e4d06b2ab3', + 'upload_date': '20130314', + 'uploader': 'PROстое3D', + 'uploader_id': 'PROstoe3D', + }, + 'params': { + 'skip_download': True, + }, + }, + { + # Video.js embed, single format + 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=', + 'info_dict': { + 'id': 'watch', + 'ext': 'mp4', + 'title': 'Step 1 - Good Foundation', + 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4', + }, + 'params': { + 'skip_download': True, + }, + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', 'playlist_mincount': 5, 'info_dict': { 'id': 'aanslagen-kopenhagen', - 'title': 'Aanslagen Kopenhagen | RTL Nieuws', + 'title': 'Aanslagen Kopenhagen', } }, # Zapiks embed @@ -1169,7 +1240,7 @@ class GenericIE(InfoExtractor): 'title': '35871', 'timestamp': 1355743100, 'upload_date': '20121217', - 'uploader_id': 'batchUser', + 'uploader_id': 'cplapp@learn360.com', }, 'add_ie': ['Kaltura'], }, @@ -1220,23 +1291,38 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # EaglePlatform embed (generic URL) { - 'url': 'http://lenta.ru/news/2015/03/06/navalny/', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used + # Kaltura iframe embed, more sophisticated + 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html', 'info_dict': { - 'id': '227304', + 'id': '1_9gzouybz', 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, + 'title': 'lecture-05sep2017', + 'description': 'md5:40f347d91fd4ba047e511c5321064b49', + 'upload_date': '20170913', + 'uploader_id': 'eps2', + 'timestamp': 1505340777, }, 'params': { 'skip_download': True, }, + 'add_ie': ['Kaltura'], + }, + { + # meta twitter:player + 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/', + 'info_dict': { + 'id': '0_01b42zps', + 'ext': 'mp4', + 'title': 'Main Twerk (Video)', + 'upload_date': '20171208', + 'uploader_id': 'sebastian.salinas@thechive.com', + 'timestamp': 1512713057, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Kaltura'], }, # referrer protected EaglePlatform embed { @@ -1268,6 +1354,7 @@ class GenericIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This video is unavailable.', }, # Pladform embed { @@ -1281,6 +1368,7 @@ class GenericIE(InfoExtractor): 'duration': 694, 'age_limit': 0, }, + 'skip': 'HTTP Error 404: Not Found', }, # Playwire embed { @@ -1301,17 +1389,14 @@ class GenericIE(InfoExtractor): 'id': '518726732', 'ext': 'mp4', 'title': 'Facebook Creates "On This Day" | Crunch Report', + 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild', + 'timestamp': 1427237531, + 'uploader': 'Crunch Report', + 'upload_date': '20150324', }, - }, - # SVT embed - { - 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun', - 'info_dict': { - 'id': '2900353', - 'ext': 'flv', - 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)', - 'duration': 27, - 'age_limit': 0, + 'params': { + # m3u8 download + 'skip_download': True, }, }, # Crooks and Liars embed @@ -1352,16 +1437,20 @@ class GenericIE(InfoExtractor): 'upload_date': '20140107', 'timestamp': 1389118457, }, + 'skip': 'Invalid Page URL', }, # NBC News embed { 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html', 'md5': '1aa589c675898ae6d37a17913cf68d66', 'info_dict': { - 'id': '701714499682', + 'id': 'x_dtl_oa_LettermanliftPR_160608', 'ext': 'mp4', - 'title': 'PREVIEW: On Assignment: David Letterman', + 'title': 'David Letterman: A Preview', 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.', + 'upload_date': '20160609', + 'timestamp': 1465431544, + 'uploader': 'NBCU-NEWS', }, }, # UDN embed @@ -1378,21 +1467,7 @@ class GenericIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - }, - # Ooyala embed - { - 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', - 'info_dict': { - 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs', - 'ext': 'mp4', - 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.', - 'title': 'This is what separates the Excel masters from the wannabes', - 'duration': 191.933, - }, - 'params': { - # m3u8 downloads - 'skip_download': True, - } + 'expected_warnings': ['Failed to parse JSON Expecting value'], }, # Brightcove URL in single quotes { @@ -1409,32 +1484,18 @@ class GenericIE(InfoExtractor): 'timestamp': 1432570283, }, }, - # Dailymotion Cloud video - { - 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910', - 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38', - 'info_dict': { - 'id': 'x2uy8t3', - 'ext': 'mp4', - 'title': 'Sauvons les abeilles ! - Le débat', - 'description': 'md5:d9082128b1c5277987825d684939ca26', - 'thumbnail': r're:^https?://.*\.jpe?g$', - 'timestamp': 1434970506, - 'upload_date': '20150622', - 'uploader': 'Public Sénat', - 'uploader_id': 'xa9gza', - } - }, - # OnionStudios embed + # Kinja embed { 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', 'info_dict': { - 'id': '2855', + 'id': '106351', 'ext': 'mp4', 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', + 'description': 'Migrated from OnionStudios', 'thumbnail': r're:^https?://.*\.jpe?g$', - 'uploader': 'ClickHole', - 'uploader_id': 'clickhole', + 'uploader': 'clickhole', + 'upload_date': '20150527', + 'timestamp': 1432744860, } }, # SnagFilms embed @@ -1581,22 +1642,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['BrightcoveLegacy'], }, - # Nexx embed - { - 'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503', - 'info_dict': { - 'id': '247746', - 'ext': 'mp4', - 'title': "Yesterday's Jam (OV)", - 'description': 'md5:09bc0984723fed34e2581624a84e05f0', - 'timestamp': 1492594816, - 'upload_date': '20170419', - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, # Facebook