X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/532a08904ffbacc5e5ccf99edb660c5f37ddb213..421b7b220ea958384617bd7d339f188bf601280e:/youtube_dl/extractor/generic.py diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c108d4a..2a9c3e2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -10,6 +10,7 @@ from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( compat_etree_fromstring, + compat_str, compat_urllib_parse_unquote, compat_urlparse, compat_xml_parse_error, @@ -21,6 +22,8 @@ from ..utils import ( HEADRequest, is_html, js_to_json, + KNOWN_EXTENSIONS, + mimetype2ext, orderedSet, sanitized_Request, smuggle_url, @@ -35,6 +38,10 @@ from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) +from .nexx import ( + NexxIE, + NexxEmbedIE, +) from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE @@ -56,6 +63,7 @@ from .dailymotion import ( DailymotionIE, DailymotionCloudIE, ) +from .dailymail import DailyMailIE from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE from .mtv import MTVServicesEmbeddedIE @@ -90,6 +98,10 @@ from .anvato import AnvatoIE from .washingtonpost import WashingtonPostIE from .wistia import WistiaIE from .mediaset import MediasetIE +from .joj import JojIE +from .megaphone import MegaphoneIE +from .vzaar import VzaarIE +from .channel9 import Channel9IE class GenericIE(InfoExtractor): @@ -567,6 +579,19 @@ class GenericIE(InfoExtractor): }, 'skip': 'movie expired', }, + # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js + { + 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/', + 'info_dict': { + 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2', + 'ext': 'mp4', + 'title': 'Steampunk Fest Comes to Honesdale', + 'duration': 43.276, + }, + 'params': { + 'skip_download': True, + } + }, # embed.ly video { 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -758,6 +783,20 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Dailymotion'], }, + # DailyMail embed + { + 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot', + 'info_dict': { + 'id': '1495629', + 'ext': 'mp4', + 'title': 'Care worker punches elderly dementia patient in head 11 times', + 'description': 'md5:3a743dee84e57e48ec68bf67113199a5', + }, + 'add_ie': ['DailyMail'], + 'params': { + 'skip_download': True, + }, + }, # YouTube embed { 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', @@ -1052,7 +1091,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20150212', 'uploader': 'The National Archives UK', - 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', + 'description': 'md5:8078af856dca76edc42910b61273dbbf', 'uploader_id': 'NationalArchives08', 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', }, @@ -1068,7 +1107,8 @@ class GenericIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, + 'skip': 'does not contain a video anymore', }, # Complex jwplayer { @@ -1077,6 +1117,7 @@ class GenericIE(InfoExtractor): 'id': 'videos', 'ext': 'mp4', 'title': 'king machine trailer 1', + 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.', 'thumbnail': r're:^https?://.*\.jpg$', }, }, @@ -1094,13 +1135,42 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + { + # Video.js embed, multiple formats + 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', + 'info_dict': { + 'id': 'yygqldloqIk', + 'ext': 'mp4', + 'title': 'SolidWorks. Урок 6 Настройка чертежа', + 'description': 'md5:baf95267792646afdbf030e4d06b2ab3', + 'upload_date': '20130314', + 'uploader': 'PROстое3D', + 'uploader_id': 'PROstoe3D', + }, + 'params': { + 'skip_download': True, + }, + }, + { + # Video.js embed, single format + 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=', + 'info_dict': { + 'id': 'watch', + 'ext': 'mp4', + 'title': 'Step 1 - Good Foundation', + 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4', + }, + 'params': { + 'skip_download': True, + }, + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', 'playlist_mincount': 5, 'info_dict': { 'id': 'aanslagen-kopenhagen', - 'title': 'Aanslagen Kopenhagen | RTL Nieuws', + 'title': 'Aanslagen Kopenhagen', } }, # Zapiks embed @@ -1184,7 +1254,7 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # Eagle.Platform embed (generic URL) + # EaglePlatform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used @@ -1198,8 +1268,26 @@ class GenericIE(InfoExtractor): 'view_count': int, 'age_limit': 0, }, + 'params': { + 'skip_download': True, + }, + }, + # referrer protected EaglePlatform embed + { + 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', + 'info_dict': { + 'id': '582306', + 'ext': 'mp4', + 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3382, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, }, - # ClipYou (Eagle.Platform) embed (custom URL) + # ClipYou (EaglePlatform) embed (custom URL) { 'url': 'http://muz-tv.ru/play/7129/', # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used @@ -1211,6 +1299,10 @@ class GenericIE(InfoExtractor): 'duration': 216, 'view_count': int, }, + 'params': { + 'skip_download': True, + }, + 'skip': 'This video is unavailable.', }, # Pladform embed { @@ -1224,6 +1316,7 @@ class GenericIE(InfoExtractor): 'duration': 694, 'age_limit': 0, }, + 'skip': 'HTTP Error 404: Not Found', }, # Playwire embed { @@ -1244,6 +1337,14 @@ class GenericIE(InfoExtractor): 'id': '518726732', 'ext': 'mp4', 'title': 'Facebook Creates "On This Day" | Crunch Report', + 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild', + 'timestamp': 1427237531, + 'uploader': 'Crunch Report', + 'upload_date': '20150324', + }, + 'params': { + # m3u8 download + 'skip_download': True, }, }, # SVT embed @@ -1295,16 +1396,20 @@ class GenericIE(InfoExtractor): 'upload_date': '20140107', 'timestamp': 1389118457, }, + 'skip': 'Invalid Page URL', }, # NBC News embed { 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html', 'md5': '1aa589c675898ae6d37a17913cf68d66', 'info_dict': { - 'id': '701714499682', + 'id': 'x_dtl_oa_LettermanliftPR_160608', 'ext': 'mp4', - 'title': 'PREVIEW: On Assignment: David Letterman', + 'title': 'David Letterman: A Preview', 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.', + 'upload_date': '20160609', + 'timestamp': 1465431544, + 'uploader': 'NBCU-NEWS', }, }, # UDN embed @@ -1321,6 +1426,7 @@ class GenericIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + 'expected_warnings': ['Failed to parse JSON Expecting value'], }, # Ooyala embed { @@ -1328,7 +1434,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs', 'ext': 'mp4', - 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.', + 'description': 'Index/Match versus VLOOKUP.', 'title': 'This is what separates the Excel masters from the wannabes', 'duration': 191.933, }, @@ -1366,7 +1472,8 @@ class GenericIE(InfoExtractor): 'upload_date': '20150622', 'uploader': 'Public Sénat', 'uploader_id': 'xa9gza', - } + }, + 'skip': 'File not found.', }, # OnionStudios embed { @@ -1462,14 +1569,27 @@ class GenericIE(InfoExtractor): # LiveLeak embed { 'url': 'http://www.wykop.pl/link/3088787/', - 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d', + 'md5': '7619da8c820e835bef21a1efa2a0fc71', 'info_dict': { 'id': '874_1459135191', 'ext': 'mp4', 'title': 'Man shows poor quality of new apartment building', 'description': 'The wall is like a sand pile.', 'uploader': 'Lake8737', - } + }, + 'add_ie': [LiveLeakIE.ie_key()], + }, + # Another LiveLeak embed pattern (#13336) + { + 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/', + 'info_dict': { + 'id': '2eb_1496309988', + 'ext': 'mp4', + 'title': 'Thief robs place where everyone was armed', + 'description': 'md5:694d73ee79e535953cf2488562288eee', + 'uploader': 'brazilwtf', + }, + 'add_ie': [LiveLeakIE.ie_key()], }, # Duplicated embedded video URLs { @@ -1521,6 +1641,21 @@ class GenericIE(InfoExtractor): 'title': 'Facebook video #599637780109885', }, }, + # Facebook