+version 2019.09.28
+
+Core
+* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
+
+Extractors
+* [vk] Fix extraction (#22522)
+* [heise] Fix kaltura embeds extraction (#22514)
+* [ted] Check for resources validity and extract subtitled downloads (#22513)
++ [youtube] Add support for
+ owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
++ [nhk] Add support for clips
+* [nhk] Fix video extraction (#22249, #22353)
+* [byutv] Fix extraction (#22070)
++ [openload] Add support for oload.online (#22304)
++ [youtube] Add support for invidious.drycat.fr (#22451)
+* [jwplatfom] Do not match video URLs (#20596, #22148)
+* [youtube:playlist] Unescape playlist uploader (#22483)
++ [bilibili] Add support audio albums and songs (#21094)
++ [instagram] Add support for tv URLs
++ [mixcloud] Allow uppercase letters in format URLs (#19280)
+* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
+ #12842, #13912, #15669, #16303)
+* [hotstar] Use native HLS downloader by default
++ [hotstar] Extract more formats (#22323)
+* [9now] Fix extraction (#22361)
+* [zdf] Bypass geo restriction
++ [tv4] Extract series metadata
+* [tv4] Fix extraction (#22443)
+
+
+version 2019.09.12.1
+
+Extractors
+* [youtube] Remove quality and tbr for itag 43 (#22372)
+
+
+version 2019.09.12
+
+Extractors
+* [youtube] Quick extraction tempfix (#22367, #22163)
+
+
version 2019.09.01
Core
- **Bigflix**
- **Bild**: Bild.de
- **BiliBili**
+ - **BilibiliAudio**
+ - **BilibiliAudioAlbum**
- **BioBioChileTV**
- **BIQLE**
- **BitChute**
extract_flat = self.params.get('extract_flat', False)
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
or extract_flat is True):
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(ie_result))
+ self.__forced_printings(
+ ie_result, self.prepare_filename(ie_result),
+ incomplete=True)
return ie_result
if result_type == 'video':
subs[lang] = f
return subs
+ def __forced_printings(self, info_dict, filename, incomplete):
+ def print_mandatory(field):
+ if (self.params.get('force%s' % field, False)
+ and (not incomplete or info_dict.get(field) is not None)):
+ self.to_stdout(info_dict[field])
+
+ def print_optional(field):
+ if (self.params.get('force%s' % field, False)
+ and info_dict.get(field) is not None):
+ self.to_stdout(info_dict[field])
+
+ print_mandatory('title')
+ print_mandatory('id')
+ if self.params.get('forceurl', False) and not incomplete:
+ if info_dict.get('requested_formats') is not None:
+ for f in info_dict['requested_formats']:
+ self.to_stdout(f['url'] + f.get('play_path', ''))
+ else:
+ # For RTMP URLs, also include the playpath
+ self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ print_optional('thumbnail')
+ print_optional('description')
+ if self.params.get('forcefilename', False) and filename is not None:
+ self.to_stdout(filename)
+ if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+ self.to_stdout(formatSeconds(info_dict['duration']))
+ print_mandatory('format')
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(info_dict))
+
def process_info(self, info_dict):
"""Process a single resolved IE result."""
if self._num_downloads >= int(max_downloads):
raise MaxDownloadsReached()
+ # TODO: backward compatibility, to be removed
info_dict['fulltitle'] = info_dict['title']
- if len(info_dict['title']) > 200:
- info_dict['title'] = info_dict['title'][:197] + '...'
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
# Forced printings
- if self.params.get('forcetitle', False):
- self.to_stdout(info_dict['fulltitle'])
- if self.params.get('forceid', False):
- self.to_stdout(info_dict['id'])
- if self.params.get('forceurl', False):
- if info_dict.get('requested_formats') is not None:
- for f in info_dict['requested_formats']:
- self.to_stdout(f['url'] + f.get('play_path', ''))
- else:
- # For RTMP URLs, also include the playpath
- self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
- if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
- self.to_stdout(info_dict['thumbnail'])
- if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
- self.to_stdout(info_dict['description'])
- if self.params.get('forcefilename', False) and filename is not None:
- self.to_stdout(filename)
- if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
- self.to_stdout(formatSeconds(info_dict['duration']))
- if self.params.get('forceformat', False):
- self.to_stdout(info_dict['format'])
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(info_dict))
+ self.__forced_printings(info_dict, filename, incomplete=False)
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
float_or_none,
parse_iso8601,
smuggle_url,
+ str_or_none,
strip_jsonp,
unified_timestamp,
unsmuggle_url,
return self.playlist_result(
entries, bangumi_id,
season_info.get('bangumi_title'), season_info.get('evaluate'))
+
+
+class BilibiliAudioBaseIE(InfoExtractor):
+ def _call_api(self, path, sid, query=None):
+ if not query:
+ query = {'sid': sid}
+ return self._download_json(
+ 'https://www.bilibili.com/audio/music-service-c/web/' + path,
+ sid, query=query)['data']
+
+
+class BilibiliAudioIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/au1003142',
+ 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
+ 'info_dict': {
+ 'id': '1003142',
+ 'ext': 'm4a',
+ 'title': '【tsukimi】YELLOW / 神山羊',
+ 'artist': 'tsukimi',
+ 'comment_count': int,
+ 'description': 'YELLOW的mp3版!',
+ 'duration': 183,
+ 'subtitles': {
+ 'origin': [{
+ 'ext': 'lrc',
+ }],
+ },
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'timestamp': 1564836614,
+ 'upload_date': '20190803',
+ 'uploader': 'tsukimi-つきみぐー',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ au_id = self._match_id(url)
+
+ play_data = self._call_api('url', au_id)
+ formats = [{
+ 'url': play_data['cdns'][0],
+ 'filesize': int_or_none(play_data.get('size')),
+ }]
+
+ song = self._call_api('song/info', au_id)
+ title = song['title']
+ statistic = song.get('statistic') or {}
+
+ subtitles = None
+ lyric = song.get('lyric')
+ if lyric:
+ subtitles = {
+ 'origin': [{
+ 'url': lyric,
+ }]
+ }
+
+ return {
+ 'id': au_id,
+ 'title': title,
+ 'formats': formats,
+ 'artist': song.get('author'),
+ 'comment_count': int_or_none(statistic.get('comment')),
+ 'description': song.get('intro'),
+ 'duration': int_or_none(song.get('duration')),
+ 'subtitles': subtitles,
+ 'thumbnail': song.get('cover'),
+ 'timestamp': int_or_none(song.get('passtime')),
+ 'uploader': song.get('uname'),
+ 'view_count': int_or_none(statistic.get('play')),
+ }
+
+
+class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/am10624',
+ 'info_dict': {
+ 'id': '10624',
+ 'title': '每日新曲推荐(每日11:00更新)',
+ 'description': '每天11:00更新,为你推送最新音乐',
+ },
+ 'playlist_count': 19,
+ }
+
+ def _real_extract(self, url):
+ am_id = self._match_id(url)
+
+ songs = self._call_api(
+ 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
+
+ entries = []
+ for song in songs:
+ sid = str_or_none(song.get('id'))
+ if not sid:
+ continue
+ entries.append(self.url_result(
+ 'https://www.bilibili.com/audio/au' + sid,
+ BilibiliAudioIE.ie_key(), sid))
+
+ if entries:
+ album_data = self._call_api('menu/info', am_id) or {}
+ album_title = album_data.get('title')
+ if album_title:
+ for entry in entries:
+ entry['album'] = album_title
+ return self.playlist_result(
+ entries, am_id, album_title, album_data.get('intro'))
+
+ return self.playlist_result(entries, am_id)
from __future__ import unicode_literals
import base64
-import json
import re
import struct
from ..compat import (
compat_etree_fromstring,
compat_parse_qs,
- compat_str,
compat_urllib_parse_urlparse,
compat_urlparse,
compat_xml_parse_error,
compat_HTTPError,
)
from ..utils import (
- determine_ext,
ExtractorError,
extract_attributes,
find_xpath_attr,
js_to_json,
int_or_none,
parse_iso8601,
+ smuggle_url,
unescapeHTML,
unsmuggle_url,
update_url_query,
clean_html,
mimetype2ext,
+ UnsupportedError,
)
class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
- _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
_TESTS = [
{
'timestamp': 1368213670,
'upload_date': '20130510',
'uploader_id': '1589608506001',
- }
+ },
+ 'skip': 'The player has been deactivated by the content owner',
},
{
# From http://medianetwork.oracle.com/video/player/1785452137001
'upload_date': '20120814',
'uploader_id': '1460825906',
},
+ 'skip': 'video not playable',
},
{
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
'ext': 'mp4',
'title': 'This Bracelet Acts as a Personal Thermostat',
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
- 'uploader': 'Mashable',
+ # 'uploader': 'Mashable',
'timestamp': 1382041798,
'upload_date': '20131017',
'uploader_id': '1130468786001',
'id': '3550319591001',
},
'playlist_mincount': 7,
+ 'skip': 'Unsupported URL',
},
{
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
'title': 'Lesson 08',
},
'playlist_mincount': 10,
+ 'skip': 'Unsupported URL',
},
{
# playerID inferred from bcpid
'only_matching': True, # Tested in GenericIE
}
]
- FLV_VCODECS = {
- 1: 'SORENSON',
- 2: 'ON2',
- 3: 'H264',
- 4: 'VP8',
- }
@classmethod
def _build_brighcove_url(cls, object_str):
@classmethod
def _make_brightcove_url(cls, params):
- return update_url_query(cls._FEDERATED_URL, params)
+ return update_url_query(
+ 'http://c.brightcove.com/services/viewer/htmlFederated', params)
@classmethod
def _extract_brightcove_url(cls, webpage):
videoPlayer = query.get('@videoPlayer')
if videoPlayer:
# We set the original url as the default 'Referer' header
- referer = smuggled_data.get('Referer', url)
+ referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
+ video_id = videoPlayer[0]
if 'playerID' not in query:
mobj = re.search(r'/bcpid(\d+)', url)
if mobj is not None:
query['playerID'] = [mobj.group(1)]
- return self._get_video_info(
- videoPlayer[0], query, referer=referer)
- elif 'playerKey' in query:
- player_key = query['playerKey']
- return self._get_playlist_info(player_key[0])
- else:
- raise ExtractorError(
- 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
- expected=True)
-
- def _brightcove_new_url_result(self, publisher_id, video_id):
- brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
- return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
-
- def _get_video_info(self, video_id, query, referer=None):
- headers = {}
- linkBase = query.get('linkBaseURL')
- if linkBase is not None:
- referer = linkBase[0]
- if referer is not None:
- headers['Referer'] = referer
- webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
-
- error_msg = self._html_search_regex(
- r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
- 'error message', default=None)
- if error_msg is not None:
publisher_id = query.get('publisherId')
if publisher_id and publisher_id[0].isdigit():
publisher_id = publisher_id[0]
else:
player_id = query.get('playerID')
if player_id and player_id[0].isdigit():
+ headers = {}
+ if referer:
+ headers['Referer'] = referer
player_page = self._download_webpage(
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
video_id, headers=headers, fatal=False)
if player_key:
enc_pub_id = player_key.split(',')[1].replace('~', '=')
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
- if publisher_id:
- return self._brightcove_new_url_result(publisher_id, video_id)
- raise ExtractorError(
- 'brightcove said: %s' % error_msg, expected=True)
-
- self.report_extraction(video_id)
- info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
- info = json.loads(info)['data']
- video_info = info['programmedContent']['videoPlayer']['mediaDTO']
- video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
-
- return self._extract_video_info(video_info)
-
- def _get_playlist_info(self, player_key):
- info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
- playlist_info = self._download_webpage(
- info_url, player_key, 'Downloading playlist information')
-
- json_data = json.loads(playlist_info)
- if 'videoList' in json_data:
- playlist_info = json_data['videoList']
- playlist_dto = playlist_info['mediaCollectionDTO']
- elif 'playlistTabs' in json_data:
- playlist_info = json_data['playlistTabs']
- playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
- else:
- raise ExtractorError('Empty playlist')
-
- videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
-
- return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
- playlist_title=playlist_dto['displayName'])
-
- def _extract_video_info(self, video_info):
- video_id = compat_str(video_info['id'])
- publisher_id = video_info.get('publisherId')
- info = {
- 'id': video_id,
- 'title': video_info['displayName'].strip(),
- 'description': video_info.get('shortDescription'),
- 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
- 'uploader': video_info.get('publisherName'),
- 'uploader_id': compat_str(publisher_id) if publisher_id else None,
- 'duration': float_or_none(video_info.get('length'), 1000),
- 'timestamp': int_or_none(video_info.get('creationDate'), 1000),
- }
-
- renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
- if renditions:
- formats = []
- for rend in renditions:
- url = rend['defaultURL']
- if not url:
- continue
- ext = None
- if rend['remote']:
- url_comp = compat_urllib_parse_urlparse(url)
- if url_comp.path.endswith('.m3u8'):
- formats.extend(
- self._extract_m3u8_formats(
- url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- continue
- elif 'akamaihd.net' in url_comp.netloc:
- # This type of renditions are served through
- # akamaihd.net, but they don't use f4m manifests
- url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
- ext = 'flv'
- if ext is None:
- ext = determine_ext(url)
- tbr = int_or_none(rend.get('encodingRate'), 1000)
- a_format = {
- 'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
- 'url': url,
- 'ext': ext,
- 'filesize': int_or_none(rend.get('size')) or None,
- 'tbr': tbr,
- }
- if rend.get('audioOnly'):
- a_format.update({
- 'vcodec': 'none',
- })
- else:
- a_format.update({
- 'height': int_or_none(rend.get('frameHeight')),
- 'width': int_or_none(rend.get('frameWidth')),
- 'vcodec': rend.get('videoCodec'),
- })
-
- # m3u8 manifests with remote == false are media playlists
- # Not calling _extract_m3u8_formats here to save network traffic
- if ext == 'm3u8':
- a_format.update({
- 'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
- 'ext': 'mp4',
- 'protocol': 'm3u8_native',
- })
-
- formats.append(a_format)
- self._sort_formats(formats)
- info['formats'] = formats
- elif video_info.get('FLVFullLengthURL') is not None:
- info.update({
- 'url': video_info['FLVFullLengthURL'],
- 'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
- 'filesize': int_or_none(video_info.get('FLVFullSize')),
- })
-
- if self._downloader.params.get('include_ads', False):
- adServerURL = video_info.get('_youtubedl_adServerURL')
- if adServerURL:
- ad_info = {
- '_type': 'url',
- 'url': adServerURL,
- }
- if 'url' in info:
- return {
- '_type': 'playlist',
- 'title': info['title'],
- 'entries': [ad_info, info],
- }
- else:
- return ad_info
-
- if not info.get('url') and not info.get('formats'):
- uploader_id = info.get('uploader_id')
- if uploader_id:
- info.update(self._brightcove_new_url_result(uploader_id, video_id))
- else:
- raise ExtractorError('Unable to extract video url for %s' % video_id)
- return info
+ if publisher_id:
+ brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
+ if referer:
+ brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
+ return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
+ # TODO: figure out if it's possible to extract playlistId from playerKey
+ # elif 'playerKey' in query:
+ # player_key = query['playerKey']
+ # return self._get_playlist_info(player_key[0])
+ raise UnsupportedError(url)
class BrightcoveNewIE(AdobePassIE):
import re
from .common import InfoExtractor
-from ..utils import parse_duration
+from ..utils import (
+ determine_ext,
+ merge_dicts,
+ parse_duration,
+ url_or_none,
+)
class BYUtvIE(InfoExtractor):
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
- info = self._download_json(
+ video = self._download_json(
'https://api.byutv.org/api3/catalog/getvideosforcontent',
display_id, query={
'contentid': video_id,
'x-byutv-platformkey': 'xsaaw9c7y5',
})
- ep = info.get('ooyalaVOD')
+ ep = video.get('ooyalaVOD')
if ep:
return {
'_type': 'url_transparent',
'thumbnail': ep.get('imageThumbnail'),
}
- ep = info['dvr']
- title = ep['title']
- formats = self._extract_m3u8_formats(
- ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
+ info = {}
+ formats = []
+ for format_id, ep in video.items():
+ if not isinstance(ep, dict):
+ continue
+ video_url = url_or_none(ep.get('videoUrl'))
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+ merge_dicts(info, {
+ 'title': ep.get('title'),
+ 'description': ep.get('description'),
+ 'thumbnail': ep.get('imageThumbnail'),
+ 'duration': parse_duration(ep.get('length')),
+ })
self._sort_formats(formats)
- return {
+
+ return merge_dicts(info, {
'id': video_id,
'display_id': display_id,
- 'title': title,
- 'description': ep.get('description'),
- 'thumbnail': ep.get('imageThumbnail'),
- 'duration': parse_duration(ep.get('length')),
+ 'title': display_id,
'formats': formats,
- }
+ })
from .bilibili import (
BiliBiliIE,
BiliBiliBangumiIE,
+ BilibiliAudioIE,
+ BilibiliAudioAlbumIE,
)
from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
webpage, default=None) or self._html_search_meta(
'description', webpage)
- kaltura_url = KalturaIE._extract_url(webpage)
- if kaltura_url:
+ def _make_kaltura_result(kaltura_url):
return {
'_type': 'url_transparent',
'url': smuggle_url(kaltura_url, {'source_url': url}),
'description': description,
}
+ kaltura_url = KalturaIE._extract_url(webpage)
+ if kaltura_url:
+ return _make_kaltura_result(kaltura_url)
+
+ kaltura_id = self._search_regex(
+ r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
+ default=None, group='id')
+ if kaltura_id:
+ return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
+
yt_urls = YoutubeIE._extract_urls(webpage)
if yt_urls:
return self.playlist_from_matches(
import hashlib
import hmac
+import re
import time
import uuid
format_url = url_or_none(playback_set.get('playbackUrl'))
if not format_url:
continue
+ format_url = re.sub(
+ r'(?<=//staragvod)(\d)', r'web\1', format_url)
tags = str_or_none(playback_set.get('tagsCombination')) or ''
if tags and 'encryption:plain' not in tags:
continue
try:
if 'package:hls' in tags or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id='hls'))
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls'))
elif 'package:dash' in tags or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash'))
class InstagramIE(InfoExtractor):
- _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
}, {
'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.instagram.com/tv/aye83DjauH/',
+ 'only_matching': True,
}]
@staticmethod
class JWPlatformIE(InfoExtractor):
- _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
def decrypt_url(f_url):
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
decrypted_url = self._decrypt_xor_cipher(k, f_url)
- if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
+ if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
return decrypted_url
for url_key in ('url', 'hlsUrl', 'dashUrl'):
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{
+ # clip
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
+ 'md5': '256a1be14f48d960a7e61e2532d95ec3',
+ 'info_dict': {
+ 'id': 'a95j5iza',
+ 'ext': 'mp4',
+ 'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
+ 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
+ 'timestamp': 1565965194,
+ 'upload_date': '20190816',
+ },
+ }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
'only_matching': True,
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
'only_matching': True,
}]
- _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json'
+ _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'
def _real_extract(self, url):
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
is_video = m_type == 'video'
episode = self._download_json(
- self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''),
+ self._API_URL_TEMPLATE % (
+ 'v' if is_video else 'r',
+ 'clip' if episode_id[:4] == '9999' else 'esd',
+ episode_id, lang, '/all' if is_video else ''),
episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
title = episode.get('sub_title_clean') or episode['sub_title']
if is_video:
info.update({
'_type': 'url_transparent',
- 'ie_key': 'Ooyala',
- 'url': 'ooyala:' + episode['vod_id'],
+ 'ie_key': 'Piksel',
+ 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
})
else:
audio = episode['audio']
https?://
(?:
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
- (?:www\.)?nickjr\.[a-z]{2}
+ (?:www\.)?nickjr\.[a-z]{2}|
+ (?:www\.)?nickelodeonjunior\.fr
)
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
'''
}, {
'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, display_id)
page_data = self._parse_json(self._search_regex(
r'window\.__data\s*=\s*({.*?});', webpage,
- 'page data'), display_id)
+ 'page data', default='{}'), display_id, fatal=False)
+ if not page_data:
+ page_data = self._parse_json(self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
+ webpage, 'page data'), display_id), display_id)
for kind in ('episode', 'clip'):
current_key = page_data.get(kind, {}).get(
_DOMAINS = r'''
(?:
openload\.(?:co|io|link|pw)|
- oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website|vip)|
+ oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|press|pw|life|live|space|services|website|vip)|
oladblock\.(?:services|xyz|me)|openloed\.co
)
'''
}, {
'url': 'https://oload.services/embed/bs1NWj1dCag/',
'only_matching': True,
+ }, {
+ 'url': 'https://oload.online/f/W8o2UfN1vNY/',
+ 'only_matching': True,
}, {
'url': 'https://oload.press/embed/drTBl1aOTvk/',
'only_matching': True,
class PikselIE(InfoExtractor):
- _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
+ _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
_TESTS = [
{
'url': 'http://player.piksel.com/v/ums2867l',
'timestamp': 1486171129,
'upload_date': '20170204'
}
+ },
+ {
+ # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
+ 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
+ 'only_matching': True,
}
]
return mobj.group('url')
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'data-de-program-uuid=[\'"]([a-z0-9]+)',
+ webpage, 'program uuid', default=display_id)
app_token = self._search_regex([
r'clientAPI\s*:\s*"([^"]+)"',
r'data-de-api-key\s*=\s*"([^"]+)"'
)
-class PlatziIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:
- platzi\.com/clases| # es version
- courses\.platzi\.com/classes # en version
- )/[^/]+/(?P<id>\d+)-[^/?\#&]+
- '''
+class PlatziBaseIE(InfoExtractor):
_LOGIN_URL = 'https://platzi.com/login/'
_NETRC_MACHINE = 'platzi'
- _TESTS = [{
- 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
- 'md5': '8f56448241005b561c10f11a595b37e3',
- 'info_dict': {
- 'id': '12074',
- 'ext': 'mp4',
- 'title': 'Creando nuestra primera página',
- 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
- 'duration': 420,
- },
- 'skip': 'Requires platzi account credentials',
- }, {
- 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
- 'info_dict': {
- 'id': '13430',
- 'ext': 'mp4',
- 'title': 'Background',
- 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
- 'duration': 360,
- },
- 'skip': 'Requires platzi account credentials',
- 'params': {
- 'skip_download': True,
- },
- }]
-
def _real_initialize(self):
self._login()
'Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
+
+class PlatziIE(PlatziBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/[^/]+/(?P<id>\d+)-[^/?\#&]+
+ '''
+
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
+ 'md5': '8f56448241005b561c10f11a595b37e3',
+ 'info_dict': {
+ 'id': '12074',
+ 'ext': 'mp4',
+ 'title': 'Creando nuestra primera página',
+ 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
+ 'duration': 420,
+ },
+ 'skip': 'Requires platzi account credentials',
+ }, {
+ 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
+ 'info_dict': {
+ 'id': '13430',
+ 'ext': 'mp4',
+ 'title': 'Background',
+ 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
+ 'duration': 360,
+ },
+ 'skip': 'Requires platzi account credentials',
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
def _real_extract(self, url):
lecture_id = self._match_id(url)
data = self._parse_json(
self._search_regex(
- r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'),
+ # client_data may contain "};" so that we have to try more
+ # strict regex first
+ (r'client_data\s*=\s*({.+?})\s*;\s*\n',
+ r'client_data\s*=\s*({.+?})\s*;'),
+ webpage, 'client data'),
lecture_id)
material = data['initialState']['material']
}
-class PlatziCourseIE(InfoExtractor):
+class PlatziCourseIE(PlatziBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
title = talk_info['title'].strip()
- native_downloads = try_get(
- talk_info,
- (lambda x: x['downloads']['nativeDownloads'],
- lambda x: x['nativeDownloads']),
- dict) or {}
+ downloads = talk_info.get('downloads') or {}
+ native_downloads = downloads.get('nativeDownloads') or talk_info.get('nativeDownloads') or {}
formats = [{
'url': format_url,
'format_id': format_id,
- 'format': format_id,
} for (format_id, format_url) in native_downloads.items() if format_url is not None]
+
+ subtitled_downloads = downloads.get('subtitledDownloads') or {}
+ for lang, subtitled_download in subtitled_downloads.items():
+ for q in self._NATIVE_FORMATS:
+ q_url = subtitled_download.get(q)
+ if not q_url:
+ continue
+ formats.append({
+ 'url': q_url,
+ 'format_id': '%s-%s' % (q, lang),
+ 'language': lang,
+ })
+
if formats:
for f in formats:
- finfo = self._NATIVE_FORMATS.get(f['format_id'])
+ finfo = self._NATIVE_FORMATS.get(f['format_id'].split('-')[0])
if finfo:
f.update(finfo)
http_url = None
for format_id, resources in resources_.items():
- if format_id == 'h264':
- for resource in resources:
- h264_url = resource.get('file')
- if not h264_url:
- continue
- bitrate = int_or_none(resource.get('bitrate'))
- formats.append({
- 'url': h264_url,
- 'format_id': '%s-%sk' % (format_id, bitrate),
- 'tbr': bitrate,
- })
- if re.search(r'\d+k', h264_url):
- http_url = h264_url
- elif format_id == 'rtmp':
- streamer = talk_info.get('streamer')
- if not streamer:
- continue
- for resource in resources:
- formats.append({
- 'format_id': '%s-%s' % (format_id, resource.get('name')),
- 'url': streamer,
- 'play_path': resource['file'],
- 'ext': 'flv',
- 'width': int_or_none(resource.get('width')),
- 'height': int_or_none(resource.get('height')),
- 'tbr': int_or_none(resource.get('bitrate')),
- })
- elif format_id == 'hls':
+ if format_id == 'hls':
if not isinstance(resources, dict):
continue
stream_url = url_or_none(resources.get('stream'))
formats.extend(self._extract_m3u8_formats(
stream_url, video_name, 'mp4', m3u8_id=format_id,
fatal=False))
+ else:
+ if not isinstance(resources, list):
+ continue
+ if format_id == 'h264':
+ for resource in resources:
+ h264_url = resource.get('file')
+ if not h264_url:
+ continue
+ bitrate = int_or_none(resource.get('bitrate'))
+ formats.append({
+ 'url': h264_url,
+ 'format_id': '%s-%sk' % (format_id, bitrate),
+ 'tbr': bitrate,
+ })
+ if re.search(r'\d+k', h264_url):
+ http_url = h264_url
+ elif format_id == 'rtmp':
+ streamer = talk_info.get('streamer')
+ if not streamer:
+ continue
+ for resource in resources:
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, resource.get('name')),
+ 'url': streamer,
+ 'play_path': resource['file'],
+ 'ext': 'flv',
+ 'width': int_or_none(resource.get('width')),
+ 'height': int_or_none(resource.get('height')),
+ 'tbr': int_or_none(resource.get('bitrate')),
+ })
m3u8_formats = list(filter(
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
video_id = self._match_id(url)
info = self._download_json(
- 'http://www.tv4play.se/player/assets/%s.json' % video_id,
- video_id, 'Downloading video info JSON')
+ 'https://playback-api.b17g.net/asset/%s' % video_id,
+ video_id, 'Downloading video info JSON', query={
+ 'service': 'tv4',
+ 'device': 'browser',
+ 'protocol': 'hls,dash',
+ 'drm': 'widevine',
+ })['metadata']
title = info['title']
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('image'),
- 'is_live': info.get('is_live') is True,
+ 'is_live': info.get('isLive') is True,
+ 'series': info.get('seriesTitle'),
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode': info.get('episodeTitle'),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
}
data = self._parse_json(
self._search_regex(
r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
- 'player params'),
- video_id)['params'][0]
+ 'player params', default='{}'),
+ video_id)
+ if data:
+ data = data['params'][0]
+
+ # <!--{...}
+ if not data:
+ data = self._parse_json(
+ self._search_regex(
+ r'<!--\s*({.+})', info_page, 'payload'),
+ video_id)['payload'][-1][-1]['player']['params'][0]
title = unescapeHTML(data['md_title'])
orderedSet,
parse_codecs,
parse_duration,
- qualities,
remove_quotes,
remove_start,
smuggle_url,
(?:www\.)?invidious\.13ad\.de/|
(?:www\.)?invidious\.mastodon\.host/|
(?:www\.)?invidious\.nixnet\.xyz/|
+ (?:www\.)?invidious\.drycat\.fr/|
(?:www\.)?tube\.poal\.co/|
(?:www\.)?vid\.wxzm\.sx/|
(?:www\.)?yt\.elukerio\.org/|
+ (?:www\.)?kgg2m7yk5aybusll\.onion/|
+ (?:www\.)?qklhadlycap4cnod\.onion/|
+ (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
+ (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
+ (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
+ (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
+ (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
return int_or_none(self._search_regex(
r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
+ streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
+ streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
+
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
'url': video_info['conn'][0],
'player_url': player_url,
}]
- elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
+ elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
+ formats = []
formats_spec = {}
fmt_list = video_info.get('fmt_list', [''])[0]
if fmt_list:
'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]),
}
- q = qualities(['small', 'medium', 'hd720'])
- streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
- if streaming_formats:
- for fmt in streaming_formats:
- itag = str_or_none(fmt.get('itag'))
- if not itag:
- continue
- quality = fmt.get('quality')
- quality_label = fmt.get('qualityLabel') or quality
- formats_spec[itag] = {
- 'asr': int_or_none(fmt.get('audioSampleRate')),
- 'filesize': int_or_none(fmt.get('contentLength')),
- 'format_note': quality_label,
- 'fps': int_or_none(fmt.get('fps')),
- 'height': int_or_none(fmt.get('height')),
- 'quality': q(quality),
- # bitrate for itag 43 is always 2147483647
- 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
- 'width': int_or_none(fmt.get('width')),
- }
- formats = []
- for url_data_str in encoded_url_map.split(','):
- url_data = compat_parse_qs(url_data_str)
- if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
+ for fmt in streaming_formats:
+ itag = str_or_none(fmt.get('itag'))
+ if not itag:
continue
+ quality = fmt.get('quality')
+ quality_label = fmt.get('qualityLabel') or quality
+ formats_spec[itag] = {
+ 'asr': int_or_none(fmt.get('audioSampleRate')),
+ 'filesize': int_or_none(fmt.get('contentLength')),
+ 'format_note': quality_label,
+ 'fps': int_or_none(fmt.get('fps')),
+ 'height': int_or_none(fmt.get('height')),
+ # bitrate for itag 43 is always 2147483647
+ 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
+ 'width': int_or_none(fmt.get('width')),
+ }
+
+ for fmt in streaming_formats:
+ if fmt.get('drm_families'):
+ continue
+ url = url_or_none(fmt.get('url'))
+
+ if not url:
+ cipher = fmt.get('cipher')
+ if not cipher:
+ continue
+ url_data = compat_parse_qs(cipher)
+ url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
+ if not url:
+ continue
+ else:
+ cipher = None
+ url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
# Unsupported FORMAT_STREAM_TYPE_OTF
if stream_type == 3:
continue
- format_id = url_data['itag'][0]
- url = url_data['url'][0]
-
- if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
- jsplayer_url_json = self._search_regex(
- ASSETS_RE,
- embed_webpage if age_gate else video_webpage,
- 'JS player URL (1)', default=None)
- if not jsplayer_url_json and not age_gate:
- # We need the embed website after all
- if embed_webpage is None:
- embed_url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage')
- jsplayer_url_json = self._search_regex(
- ASSETS_RE, embed_webpage, 'JS player URL')
-
- player_url = json.loads(jsplayer_url_json)
- if player_url is None:
- player_url_json = self._search_regex(
- r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
- video_webpage, 'age gate player URL')
- player_url = json.loads(player_url_json)
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
+ format_id = fmt.get('itag') or url_data['itag'][0]
+ if not format_id:
+ continue
+ format_id = compat_str(format_id)
- if self._downloader.params.get('verbose'):
+ if cipher:
+ if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
+ ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
+ jsplayer_url_json = self._search_regex(
+ ASSETS_RE,
+ embed_webpage if age_gate else video_webpage,
+ 'JS player URL (1)', default=None)
+ if not jsplayer_url_json and not age_gate:
+ # We need the embed website after all
+ if embed_webpage is None:
+ embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+ embed_webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed webpage')
+ jsplayer_url_json = self._search_regex(
+ ASSETS_RE, embed_webpage, 'JS player URL')
+
+ player_url = json.loads(jsplayer_url_json)
if player_url is None:
- player_version = 'unknown'
- player_desc = 'unknown'
- else:
- if player_url.endswith('swf'):
- player_version = self._search_regex(
- r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
- 'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
+ player_url_json = self._search_regex(
+ r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+ video_webpage, 'age gate player URL')
+ player_url = json.loads(player_url_json)
+
+ if 'sig' in url_data:
+ url += '&signature=' + url_data['sig'][0]
+ elif 's' in url_data:
+ encrypted_sig = url_data['s'][0]
+
+ if self._downloader.params.get('verbose'):
+ if player_url is None:
+ player_version = 'unknown'
+ player_desc = 'unknown'
else:
- player_version = self._search_regex(
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
- r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
- player_url,
- 'html5 player', fatal=False)
- player_desc = 'html5 player %s' % player_version
-
- parts_sizes = self._signature_cache_id(encrypted_sig)
- self.to_screen('{%s} signature length %s, %s' %
- (format_id, parts_sizes, player_desc))
-
- signature = self._decrypt_signature(
- encrypted_sig, video_id, player_url, age_gate)
- sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
- url += '&%s=%s' % (sp, signature)
+ if player_url.endswith('swf'):
+ player_version = self._search_regex(
+ r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
+ 'flash player', fatal=False)
+ player_desc = 'flash player %s' % player_version
+ else:
+ player_version = self._search_regex(
+ [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+ r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
+ player_url,
+ 'html5 player', fatal=False)
+ player_desc = 'html5 player %s' % player_version
+
+ parts_sizes = self._signature_cache_id(encrypted_sig)
+ self.to_screen('{%s} signature length %s, %s' %
+ (format_id, parts_sizes, player_desc))
+
+ signature = self._decrypt_signature(
+ encrypted_sig, video_id, player_url, age_gate)
+ sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
+ url += '&%s=%s' % (sp, signature)
if 'ratebypass' not in url:
url += '&ratebypass=yes'
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
+ if width is None:
+ width = int_or_none(fmt.get('width'))
+ if height is None:
+ height = int_or_none(fmt.get('height'))
+
filesize = int_or_none(url_data.get(
'clen', [None])[0]) or _extract_filesize(url)
- quality = url_data.get('quality', [None])[0]
+ quality = url_data.get('quality', [None])[0] or fmt.get('quality')
+ quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
+
+ tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
+ or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
+ fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
more_fields = {
'filesize': filesize,
- 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
+ 'tbr': tbr,
'width': width,
'height': height,
- 'fps': int_or_none(url_data.get('fps', [None])[0]),
- 'format_note': url_data.get('quality_label', [None])[0] or quality,
- 'quality': q(quality),
+ 'fps': fps,
+ 'format_note': quality_label or quality,
}
for key, value in more_fields.items():
if value:
dct[key] = value
- type_ = url_data.get('type', [None])[0]
+ type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
if type_:
type_split = type_.split(';')
kind_ext = type_split[0].split('/')
page, 'title', default=None)
_UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
- uploader = self._search_regex(
+ uploader = self._html_search_regex(
r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
page, 'uploader', default=None)
mobj = re.search(
class ZDFIE(ZDFBaseIE):
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
+ _GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
from __future__ import unicode_literals
-__version__ = '2019.09.01'
+__version__ = '2019.09.28'