X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/47d80ec0b18245caeb97018d4c1af18d0b5b972b..39393b81acfaf4045fb7f20454a0226f0dc9142e:/youtube_dl/extractor/generic.py diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3949c8b..355067a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -10,6 +10,7 @@ from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( compat_etree_fromstring, + compat_str, compat_urllib_parse_unquote, compat_urlparse, compat_xml_parse_error, @@ -20,6 +21,10 @@ from ..utils import ( float_or_none, HEADRequest, is_html, + js_to_json, + KNOWN_EXTENSIONS, + merge_dicts, + mimetype2ext, orderedSet, sanitized_Request, smuggle_url, @@ -29,15 +34,20 @@ from ..utils import ( UnsupportedError, xpath_text, ) +from .commonprotocols import RtmpIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) +from .nexx import ( + NexxIE, + NexxEmbedIE, +) from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE from .tvc import TVCIE -from .sportbox import SportBoxEmbedIE +from .sportbox import SportBoxIE from .smotri import SmotriIE from .myvi import MyviIE from .condenast import CondeNastIE @@ -49,11 +59,13 @@ from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE from .drtuber import DrTuberIE from .redtube import RedTubeIE +from .tube8 import Tube8IE +from .mofosex import MofosexEmbedIE +from .spankwire import SpankwireIE +from .youporn import YouPornIE from .vimeo import VimeoIE -from .dailymotion import ( - DailymotionIE, - DailymotionCloudIE, -) +from .dailymotion import DailymotionIE +from .dailymail import DailyMailIE from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE from .mtv import MTVServicesEmbeddedIE @@ -68,13 +80,45 @@ from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE -from .vessel import VesselIE from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE -from .soundcloud import SoundcloudIE +from .soundcloud import SoundcloudEmbedIE +from .tunein import TuneInBaseIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE +from .piksel import PikselIE +from .videa import VideaIE +from .twentymin import TwentyMinutenIE +from .ustream import UstreamIE +from .videopress import VideoPressIE +from .rutube import RutubeIE +from .limelight import LimelightBaseIE +from .anvato import AnvatoIE +from .washingtonpost import WashingtonPostIE +from .wistia import WistiaIE +from .mediaset import MediasetIE +from .joj import JojIE +from .megaphone import MegaphoneIE +from .vzaar import VzaarIE +from .channel9 import Channel9IE +from .vshare import VShareIE +from .mediasite import MediasiteIE +from .springboardplatform import SpringboardPlatformIE +from .yapfiles import YapFilesIE +from .vice import ViceIE +from .xfileshare import XFileShareIE +from .cloudflarestream import CloudflareStreamIE +from .peertube import PeerTubeIE +from .teachable import TeachableIE +from .indavideo import IndavideoEmbedIE +from .apa import APAIE +from .foxnews import FoxNewsIE +from .viqeo import ViqeoIE +from .expressen import ExpressenIE +from .zype import ZypeIE +from .odnoklassniki import OdnoklassnikiIE +from .kinja import KinjaEmbedIE class GenericIE(InfoExtractor): @@ -159,6 +203,16 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # RSS feed with enclosures and unsupported link URLs + { + 'url': 'http://www.hellointernet.fm/podcast?format=rss', + 'info_dict': { + 'id': 'http://www.hellointernet.fm/podcast?format=rss', + 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.', + 'title': 'Hello Internet', + }, + 'playlist_mincount': 100, + }, # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng { 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', @@ -236,7 +290,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Tikibad ontruimd wegens brand', 'description': 'md5:05ca046ff47b931f9b04855015e163a4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 33, }, 'params': { @@ -297,7 +351,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', - 'description': 're:^Chris Ziegler takes a look at the\.*', + 'description': r're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, @@ -343,10 +397,10 @@ class GenericIE(InfoExtractor): }, 'skip': 'There is a limit of 200 free downloads / month for the test song', }, - # embedded brightcove video - # it also tests brightcove videos that need to set the 'Referer' in the - # http requests { + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' + # in the http requests 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'info_dict': { @@ -361,7 +415,25 @@ class GenericIE(InfoExtractor): }, }, { - # https://github.com/rg3/youtube-dl/issues/2253 + # embedded with itemprop embedURL and video id spelled as `idVideo` + 'add_id': ['BrightcoveLegacy'], + 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', + 'info_dict': { + 'id': '5255628253001', + 'ext': 'mp4', + 'title': 'md5:37c519b1128915607601e75a87995fc0', + 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', + 'uploader': 'BFM BUSINESS', + 'uploader_id': '876450612001', + 'timestamp': 1482255315, + 'upload_date': '20161220', + }, + 'params': { + 'skip_download': True, + }, + }, + { + # https://github.com/ytdl-org/youtube-dl/issues/2253 'url': 'http://bcove.me/i6nfkrc3', 'md5': '0ba9446db037002366bab3b3eb30c88c', 'info_dict': { @@ -386,7 +458,7 @@ class GenericIE(InfoExtractor): }, }, { - # https://github.com/rg3/youtube-dl/issues/3541 + # https://github.com/ytdl-org/youtube-dl/issues/3541 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', 'info_dict': { @@ -401,6 +473,112 @@ class GenericIE(InfoExtractor): 'skip_download': True, # m3u8 download }, }, + { + # Brightcove video in