X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/fe979149c83b5a935f7d28baf75848a9137316fd..97a8fc3ae80fb363c69c2e6b8c29b5373ac72aea:/youtube_dl/extractor/generic.py diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4aa2406..274f817 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals @@ -20,6 +20,7 @@ from ..utils import ( float_or_none, HEADRequest, is_html, + js_to_json, orderedSet, sanitized_Request, smuggle_url, @@ -27,9 +28,9 @@ from ..utils import ( unified_strdate, unsmuggle_url, UnsupportedError, - url_basename, xpath_text, ) +from .commonprotocols import RtmpIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, @@ -48,22 +49,42 @@ from .svt import SVTIE from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE +from .drtuber import DrTuberIE +from .redtube import RedTubeIE from .vimeo import VimeoIE -from .dailymotion import DailymotionCloudIE +from .dailymotion import ( + DailymotionIE, + DailymotionCloudIE, +) from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE -from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE from .videomore import VideomoreIE +from .webcaster import WebcasterFeedIE from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE +from .arkena import ArkenaIE from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE from .vessel import VesselIE +from .kaltura import KalturaIE +from .eagleplatform import EaglePlatformIE +from .facebook import FacebookIE +from .soundcloud import SoundcloudIE +from .tunein import TuneInBaseIE +from .vbox7 import Vbox7IE +from .dbtv import DBTVIE +from .piksel import PikselIE +from .videa import VideaIE +from .twentymin import TwentyMinutenIE +from .ustream import UstreamIE +from .openload import OpenloadIE +from .videopress import VideoPressIE +from .rutube import RutubeIE class GenericIE(InfoExtractor): @@ -94,7 +115,8 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': [ 'URL could be a direct video link, returning it as such.' - ] + ], + 'skip': 'URL invalid', }, # Direct download with broken HEAD { @@ -224,7 +246,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Tikibad ontruimd wegens brand', 'description': 'md5:05ca046ff47b931f9b04855015e163a4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 33, }, 'params': { @@ -258,7 +280,8 @@ class GenericIE(InfoExtractor): 'params': { # m3u8 downloads 'skip_download': True, - } + }, + 'skip': 'video gone', }, # m3u8 served with Content-Type: text/plain { @@ -273,7 +296,8 @@ class GenericIE(InfoExtractor): 'params': { # m3u8 downloads 'skip_download': True, - } + }, + 'skip': 'video gone', }, # google redirect { @@ -283,7 +307,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', - 'description': 're:^Chris Ziegler takes a look at the\.*', + 'description': r're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, @@ -329,10 +353,10 @@ class GenericIE(InfoExtractor): }, 'skip': 'There is a limit of 200 free downloads / month for the test song', }, - # embedded brightcove video - # it also tests brightcove videos that need to set the 'Referer' in the - # http requests { + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' + # in the http requests 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'info_dict': { @@ -346,6 +370,24 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # embedded with itemprop embedURL and video id spelled as `idVideo` + 'add_id': ['BrightcoveLegacy'], + 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', + 'info_dict': { + 'id': '5255628253001', + 'ext': 'mp4', + 'title': 'md5:37c519b1128915607601e75a87995fc0', + 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', + 'uploader': 'BFM BUSINESS', + 'uploader_id': '876450612001', + 'timestamp': 1482255315, + 'upload_date': '20161220', + }, + 'params': { + 'skip_download': True, + }, + }, { # https://github.com/rg3/youtube-dl/issues/2253 'url': 'http://bcove.me/i6nfkrc3', @@ -358,6 +400,7 @@ class GenericIE(InfoExtractor): 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', }, 'add_ie': ['BrightcoveLegacy'], + 'skip': 'video gone', }, { 'url': 'http://www.championat.com/video/football/v/87/87499.html', @@ -386,6 +429,43 @@ class GenericIE(InfoExtractor): 'skip_download': True, # m3u8 download }, }, + { + # Brightcove with alternative playerID key + 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html', + 'info_dict': { + 'id': 'nmeth.2062_SV1', + 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research', + }, + 'playlist': [{ + 'info_dict': { + 'id': '2228375078001', + 'ext': 'mp4', + 'title': 'nmeth.2062-sv1', + 'description': 'nmeth.2062-sv1', + 'timestamp': 1363357591, + 'upload_date': '20130315', + 'uploader': 'Nature Publishing Group', + 'uploader_id': '1964492299001', + }, + }], + }, + { + # Brightcove with UUID in videoPlayer + 'url': 'http://www8.hp.com/cn/zh/home.html', + 'info_dict': { + 'id': '5255815316001', + 'ext': 'mp4', + 'title': 'Sprocket Video - China', + 'description': 'Sprocket Video - China', + 'uploader': 'HP-Video Gallery', + 'timestamp': 1482263210, + 'upload_date': '20161220', + 'uploader_id': '1107601872001', + }, + 'params': { + 'skip_download': True, # m3u8 download + }, + }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', @@ -411,6 +491,7 @@ class GenericIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'movie expired', }, # embed.ly video { @@ -438,6 +519,8 @@ class GenericIE(InfoExtractor): 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', }, + # HEAD requests lead to endless 301, while GET is OK + 'expected_warnings': ['301'], }, # RUTV embed { @@ -467,7 +550,7 @@ class GenericIE(InfoExtractor): 'url': 'http://www.vestifinance.ru/articles/25753', 'info_dict': { 'id': '25753', - 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"', + 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"', }, 'playlist': [{ 'info_dict': { @@ -500,7 +583,7 @@ class GenericIE(InfoExtractor): 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e', 'ext': 'mp4', 'title': 'Ужастики, русский трейлер (2015)', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 153, } }, @@ -512,6 +595,9 @@ class GenericIE(InfoExtractor): 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )', }, 'playlist_mincount': 7, + # This forum does not allow