-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
float_or_none,
HEADRequest,
is_html,
+ js_to_json,
orderedSet,
sanitized_Request,
smuggle_url,
unified_strdate,
unsmuggle_url,
UnsupportedError,
- url_basename,
xpath_text,
)
+from .commonprotocols import RtmpIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE
+from .drtuber import DrTuberIE
+from .redtube import RedTubeIE
from .vimeo import VimeoIE
from .dailymotion import (
DailymotionIE,
)
from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE
-from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
from .videomore import VideomoreIE
+from .webcaster import WebcasterFeedIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
from .soundcloud import SoundcloudIE
+from .tunein import TuneInBaseIE
from .vbox7 import Vbox7IE
+from .dbtv import DBTVIE
+from .piksel import PikselIE
+from .videa import VideaIE
+from .twentymin import TwentyMinutenIE
+from .ustream import UstreamIE
+from .openload import OpenloadIE
+from .videopress import VideoPressIE
class GenericIE(InfoExtractor):
},
'expected_warnings': [
'URL could be a direct video link, returning it as such.'
- ]
+ ],
+ 'skip': 'URL invalid',
},
# Direct download with broken HEAD
{
'ext': 'mp4',
'title': 'Tikibad ontruimd wegens brand',
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 33,
},
'params': {
'params': {
# m3u8 downloads
'skip_download': True,
- }
+ },
+ 'skip': 'video gone',
},
# m3u8 served with Content-Type: text/plain
{
'params': {
# m3u8 downloads
'skip_download': True,
- }
+ },
+ 'skip': 'video gone',
},
# google redirect
{
'ext': 'mp4',
'upload_date': '20130224',
'uploader_id': 'TheVerge',
- 'description': 're:^Chris Ziegler takes a look at the\.*',
+ 'description': r're:^Chris Ziegler takes a look at the\.*',
'uploader': 'The Verge',
'title': 'First Firefox OS phones side-by-side',
},
},
'skip': 'There is a limit of 200 free downloads / month for the test song',
},
- # embedded brightcove video
- # it also tests brightcove videos that need to set the 'Referer' in the
- # http requests
{
+ # embedded brightcove video
+ # it also tests brightcove videos that need to set the 'Referer'
+ # in the http requests
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': {
'skip_download': True,
},
},
+ {
+ # embedded with itemprop embedURL and video id spelled as `idVideo`
+ 'add_id': ['BrightcoveLegacy'],
+ 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
+ 'info_dict': {
+ 'id': '5255628253001',
+ 'ext': 'mp4',
+ 'title': 'md5:37c519b1128915607601e75a87995fc0',
+ 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
+ 'uploader': 'BFM BUSINESS',
+ 'uploader_id': '876450612001',
+ 'timestamp': 1482255315,
+ 'upload_date': '20161220',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
{
# https://github.com/rg3/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3',
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
},
'add_ie': ['BrightcoveLegacy'],
+ 'skip': 'video gone',
},
{
'url': 'http://www.championat.com/video/football/v/87/87499.html',
'skip_download': True, # m3u8 download
},
},
+ {
+ # Brightcove with alternative playerID key
+ 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
+ 'info_dict': {
+ 'id': 'nmeth.2062_SV1',
+ 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2228375078001',
+ 'ext': 'mp4',
+ 'title': 'nmeth.2062-sv1',
+ 'description': 'nmeth.2062-sv1',
+ 'timestamp': 1363357591,
+ 'upload_date': '20130315',
+ 'uploader': 'Nature Publishing Group',
+ 'uploader_id': '1964492299001',
+ },
+ }],
+ },
# ooyala video
{
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
'params': {
'skip_download': True,
},
+ 'skip': 'movie expired',
},
# embed.ly video
{
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
},
+ # HEAD requests lead to endless 301, while GET is OK
+ 'expected_warnings': ['301'],
},
# RUTV embed
{
'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
'ext': 'mp4',
'title': 'Ужастики, русский трейлер (2015)',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 153,
}
},
'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
},
'playlist_mincount': 7,
+ # This forum does not allow <iframe> syntaxes anymore
+ # Now HTML tags are displayed as-is
+ 'skip': 'No videos on this page',
},
# Embedded TED video
{
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
}
},
- # Embedded Ustream video
- {
- 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
- 'md5': '27b99cdb639c9b12a79bca876a073417',
- 'info_dict': {
- 'id': '45734260',
- 'ext': 'flv',
- 'uploader': 'AU SPA: The NSA and Privacy',
- 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
- }
- },
# nowvideo embed hidden behind percent encoding
{
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
},
'params': {
'skip_download': 'Requires rtmpdump'
- }
+ },
+ 'skip': 'video gone',
},
# francetv embed
{
'duration': 48,
'timestamp': 1401537900,
'upload_date': '20140531',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
# Wistia embed
},
'playlist_mincount': 7,
},
+ # TuneIn station embed
+ {
+ 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
+ 'info_dict': {
+ 'id': '204146',
+ 'ext': 'mp3',
+ 'title': 'CNRV',
+ 'location': 'Paris, France',
+ 'is_live': True,
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
# Livestream embed
{
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
},
},
+ # jwplayer rtmp
+ {
+ 'url': 'http://www.suffolk.edu/sjc/',
+ 'info_dict': {
+ 'id': 'sjclive',
+ 'ext': 'flv',
+ 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
+ 'uploader': 'www.suffolk.edu',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # Complex jwplayer
+ {
+ 'url': 'http://www.indiedb.com/games/king-machine/videos',
+ 'info_dict': {
+ 'id': 'videos',
+ 'ext': 'mp4',
+ 'title': 'king machine trailer 1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
# rtl.nl embed
{
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
},
},
- # Kaltura embed protected with referrer
- {
- 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
- 'info_dict': {
- 'id': '1_g4fbemnq',
- 'ext': 'mp4',
- 'title': 'Violetta - Achter De Schermen - Ruggero',
- 'description': 'Achter de schermen met Ruggero',
- 'timestamp': 1435133761,
- 'upload_date': '20150624',
- 'uploader_id': 'echojecka',
- },
- },
# Kaltura embed with single quotes
{
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
'skip_download': True,
}
},
+ {
+ # Kaltura embedded, some fileExt broken (#11480)
+ 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
+ 'info_dict': {
+ 'id': '1_sgtvehim',
+ 'ext': 'mp4',
+ 'title': 'Our "Standard Models" of particle physics and cosmology',
+ 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
+ 'timestamp': 1321158993,
+ 'upload_date': '20111113',
+ 'uploader_id': 'kps1',
+ },
+ 'add_ie': ['Kaltura'],
+ },
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
'ext': 'mp4',
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 694,
'age_limit': 0,
},
'id': '3519514',
'ext': 'mp4',
'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
- 'thumbnail': 're:^https?://.*\.png$',
+ 'thumbnail': r're:^https?://.*\.png$',
'duration': 45.115,
},
},
'id': '300346',
'ext': 'mp4',
'title': '中一中男師變性 全校師生力挺',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
'ext': 'mp4',
'title': 'Sauvons les abeilles ! - Le débat',
'description': 'md5:d9082128b1c5277987825d684939ca26',
- 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
'timestamp': 1434970506,
'upload_date': '20150622',
'uploader': 'Public Sénat',
'id': '2855',
'ext': 'mp4',
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
- 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
'uploader': 'ClickHole',
'uploader_id': 'clickhole',
}
'duration': 248.667,
},
},
- # ScreenwaveMedia embed
- {
- 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
- 'md5': '24ace5baba0d35d55c6810b51f34e9e0',
- 'info_dict': {
- 'id': 'cinemasnob-55d26273809dd',
- 'ext': 'mp4',
- 'title': 'cinemasnob',
- },
- },
# BrightcoveInPageEmbed embed
{
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
'duration': 51690,
},
},
- # JWPlayer with M3U8
- {
- 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
- 'info_dict': {
- 'id': 'playlist',
- 'ext': 'mp4',
- 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
- 'uploader': 'ren.tv',
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- }
- },
# Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
# This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
{
},
'add_ie': ['Vimeo'],
},
+ {
+ # generic vimeo embed that requires original URL passed as Referer
+ 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
+ 'only_matching': True,
+ },
{
'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
},
'add_ie': [Vbox7IE.ie_key()],
},
+ {
+ # DBTV embeds
+ 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
+ 'info_dict': {
+ 'id': '43254897',
+ 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
+ },
+ 'playlist_mincount': 3,
+ },
+ {
+ # Videa embeds
+ 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
+ 'info_dict': {
+ 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
+ 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
+ },
+ 'playlist_mincount': 2,
+ },
+ {
+ # 20 minuten embed
+ 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
+ 'info_dict': {
+ 'id': '523629',
+ 'ext': 'mp4',
+ 'title': 'So kommen Sie bei Eis und Schnee sicher an',
+ 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [TwentyMinutenIE.ie_key()],
+ },
+ {
+ # VideoPress embed
+ 'url': 'https://en.support.wordpress.com/videopress/',
+ 'info_dict': {
+ 'id': 'OcobLTqC',
+ 'ext': 'm4v',
+ 'title': 'IMG_5786',
+ 'timestamp': 1435711927,
+ 'upload_date': '20150701',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [VideoPressIE.ie_key()],
+ },
+ {
+ # ThePlatform embedded with whitespaces in URLs
+ 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+ 'only_matching': True,
+ },
# {
# # TODO: find another test
# # http://schema.org/VideoObject
force_videoid = smuggled_data['force_videoid']
video_id = force_videoid
else:
- video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+ video_id = self._generic_id(url)
self.to_screen('%s: Requesting header' % video_id)
info_dict = {
'id': video_id,
- 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+ 'title': self._generic_title(url),
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
}
doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
+ elif doc.tag == 'SmoothStreamingMedia':
+ info_dict['formats'] = self._parse_ism_formats(doc, url)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
smil = self._parse_smil(doc, url, video_id)
self._sort_formats(smil['formats'])
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
- doc, video_id, mpd_base_url=url.rpartition('/')[0])
+ doc, video_id,
+ mpd_base_url=full_response.geturl().rpartition('/')[0],
+ mpd_url=url)
self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
if matches:
return _playlist_from_matches(matches, ie='RtlNl')
- vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
- if vimeo_url is not None:
- return self.url_result(vimeo_url)
+ vimeo_urls = VimeoIE._extract_urls(url, webpage)
+ if vimeo_urls:
+ return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
vid_me_embed_url = self._search_regex(
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
- return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
+ embed_token = self._search_regex(
+ r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
+ webpage, 'ooyala embed token', default=None)
+ return OoyalaIE._build_url_result(smuggle_url(
+ mobj.group('ec'), {
+ 'domain': url,
+ 'embed_token': embed_token,
+ }))
# Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
- # Look for embedded PornHub player
- pornhub_url = PornHubIE._extract_url(webpage)
- if pornhub_url:
- return self.url_result(pornhub_url, 'PornHub')
-
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls:
if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
+ # Look for embedded PornHub player
+ pornhub_urls = PornHubIE._extract_urls(webpage)
+ if pornhub_urls:
+ return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
+
+ # Look for embedded DrTuber player
+ drtuber_urls = DrTuberIE._extract_urls(webpage)
+ if drtuber_urls:
+ return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
+
+ # Look for embedded RedTube player
+ redtube_urls = RedTubeIE._extract_urls(webpage)
+ if redtube_urls:
+ return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
+
# Look for embedded Tvigle player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
return self.url_result(mobj.group('url'), 'TED')
# Look for embedded Ustream videos
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Ustream')
+ ustream_url = UstreamIE._extract_url(webpage)
+ if ustream_url:
+ return self.url_result(ustream_url, UstreamIE.ie_key())
# Look for embedded arte.tv player
mobj = re.search(
if soundcloud_urls:
return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+ # Look for tunein player
+ tunein_urls = TuneInBaseIE._extract_urls(webpage)
+ if tunein_urls:
+ return _playlist_from_matches(tunein_urls)
+
# Look for embedded mtvservices player
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
if mtvservices_url:
if videomore_url:
return self.url_result(videomore_url)
+ # Look for Webcaster embeds
+ webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
+ if webcaster_url:
+ return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
+
# Look for Playwire embeds
mobj = re.search(
r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform')
- # Look for ScreenwaveMedia embeds
- mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
- if mobj is not None:
- return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
-
# Look for Digiteka embeds
digiteka_url = DigitekaIE._extract_url(webpage)
if digiteka_url:
if arkena_url:
return self.url_result(arkena_url, ArkenaIE.ie_key())
+ # Look for Piksel embeds
+ piksel_url = PikselIE._extract_url(webpage)
+ if piksel_url:
+ return self.url_result(piksel_url, PikselIE.ie_key())
+
# Look for Limelight embeds
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj:
'Channel': 'channel',
'ChannelList': 'channel_list',
}
- return self.url_result('limelight:%s:%s' % (
- lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
+ return self.url_result(smuggle_url('limelight:%s:%s' % (
+ lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
+ 'Limelight%s' % mobj.group(1), mobj.group(2))
+
+ mobj = re.search(
+ r'''(?sx)
+ <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
+ <param[^>]+
+ name=(["\'])flashVars\2[^>]+
+ value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
+ ''', webpage)
+ if mobj:
+ return self.url_result(smuggle_url(
+ 'limelight:media:%s' % mobj.group('id'),
+ {'source_url': url}), 'LimelightMedia', mobj.group('id'))
# Look for AdobeTVVideo embeds
mobj = re.search(
# Look for VODPlatform embeds
mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
webpage)
if mobj is not None:
return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
+ self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
+
+ # Look for Mangomolo embeds
+ mobj = re.search(
+ r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
+ (?:
+ video\?.*?\bid=(?P<video_id>\d+)|
+ index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+ ).+?)\1''', webpage)
+ if mobj is not None:
+ info = {
+ '_type': 'url_transparent',
+ 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ }
+ video_id = mobj.group('video_id')
+ if video_id:
+ info.update({
+ 'ie_key': 'MangomoloVideo',
+ 'id': video_id,
+ })
+ else:
+ info.update({
+ 'ie_key': 'MangomoloLive',
+ 'id': mobj.group('channel_id'),
+ })
+ return info
# Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
if vbox7_url:
return self.url_result(vbox7_url, Vbox7IE.ie_key())
+ # Look for DBTV embeds
+ dbtv_urls = DBTVIE._extract_urls(webpage)
+ if dbtv_urls:
+ return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
+
+ # Look for Videa embeds
+ videa_urls = VideaIE._extract_urls(webpage)
+ if videa_urls:
+ return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
+
+ # Look for 20 minuten embeds
+ twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
+ if twentymin_urls:
+ return _playlist_from_matches(
+ twentymin_urls, ie=TwentyMinutenIE.ie_key())
+
+ # Look for Openload embeds
+ openload_urls = OpenloadIE._extract_urls(webpage)
+ if openload_urls:
+ return _playlist_from_matches(
+ openload_urls, ie=OpenloadIE.ie_key())
+
+ # Look for VideoPress embeds
+ videopress_urls = VideoPressIE._extract_urls(webpage)
+ if videopress_urls:
+ return _playlist_from_matches(
+ videopress_urls, ie=VideoPressIE.ie_key())
+
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
info_dict.update(json_ld)
return info_dict
+ # Look for HTML5 media
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+ if entries:
+ for entry in entries:
+ entry.update({
+ 'id': video_id,
+ 'title': video_title,
+ })
+ self._sort_formats(entry['formats'])
+ return self.playlist_result(entries)
+
+ jwplayer_data_str = self._find_jwplayer_data(webpage)
+ if jwplayer_data_str:
+ try:
+ jwplayer_data = self._parse_json(
+ jwplayer_data_str, video_id, transform_source=js_to_json)
+ return self._parse_jwplayer_data(jwplayer_data, video_id)
+ except ExtractorError:
+ pass
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
+ if RtmpIE.suitable(vurl):
+ return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
- return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
def filter_video(urls):
return list(filter(check_video, urls))
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None:
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
- if not found:
- # HTML5 video
- found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
if not found:
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
'age_limit': age_limit,
}
+ if RtmpIE.suitable(video_url):
+ entry_info_dict.update({
+ '_type': 'url_transparent',
+ 'ie_key': RtmpIE.ie_key(),
+ 'url': video_url,
+ })
+ entries.append(entry_info_dict)
+ continue
+
ext = determine_ext(video_url)
if ext == 'smil':
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
+ elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
+ # Just matching .ism/manifest is not enough to be reliably sure
+ # whether it's actually an ISM manifest or some other streaming
+ # manifest since there are various streaming URL formats
+ # possible (see [1]) as well as some other shenanigans like
+ # .smil/manifest URLs that actually serve an ISM (see [2]) and
+ # so on.
+ # Thus the most reasonable way to solve this is to delegate
+ # to generic extractor in order to look into the contents of
+ # the manifest itself.
+ # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
+ # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
+ entry_info_dict = self.url_result(
+ smuggle_url(video_url, {'to_generic': True}),
+ GenericIE.ie_key())
else:
entry_info_dict['url'] = video_url