X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/9ed7fe4fe4c445eb7d9f3197bb300d0db8f1807a..9fb40a4e5f59ba243dee2edad23a2c5555338113:/youtube_dl/YoutubeDL.py diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9a8c7da..f725456 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1,10 +1,11 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import absolute_import, unicode_literals import collections import contextlib +import copy import datetime import errno import fileinput @@ -23,9 +24,7 @@ import sys import time import tokenize import traceback - -if os.name == 'nt': - import ctypes +import random from .compat import ( compat_basestring, @@ -34,6 +33,8 @@ from .compat import ( compat_get_terminal_size, compat_http_client, compat_kwargs, + compat_numeric_types, + compat_os_name, compat_str, compat_tokenize_tokenize, compat_urllib_error, @@ -41,29 +42,40 @@ from .compat import ( compat_urllib_request_DataHandler, ) from .utils import ( + age_restricted, + args_to_str, ContentTooShortError, date_from_str, DateRange, DEFAULT_OUTTMPL, determine_ext, + determine_protocol, DownloadError, + encode_compat_str, encodeFilename, + error_to_compat_str, ExtractorError, format_bytes, formatSeconds, + GeoRestrictedError, + ISO3166Utils, locked_file, make_HTTPS_handler, MaxDownloadsReached, PagedList, parse_filesize, PerRequestProxyHandler, - PostProcessingError, platform_name, + PostProcessingError, preferredencoding, + prepend_extension, + register_socks_protocols, render_table, + replace_extension, SameFileError, sanitize_filename, sanitize_path, + sanitize_url, sanitized_Request, std_headers, subtitles_filename, @@ -74,16 +86,13 @@ from .utils import ( write_string, YoutubeDLCookieProcessor, YoutubeDLHandler, - prepend_extension, - replace_extension, - args_to_str, - age_restricted, ) from .cache import Cache -from .extractor import get_info_extractor, gen_extractors +from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( + FFmpegFixupM3u8PP, FFmpegFixupM4aPP, FFmpegFixupStretchedPP, FFmpegMergerPP, @@ -92,6 +101,9 @@ from .postprocessor import ( ) from .version import __version__ +if compat_os_name == 'nt': + import ctypes + class YoutubeDL(object): """YoutubeDL class. @@ -123,6 +135,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. + ap_mso: Adobe Pass multiple-system operator identifier. + ap_username: Multiple-system operator account username. + ap_password: Multiple-system operator account password. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. @@ -148,6 +163,7 @@ class YoutubeDL(object): playlistend: Playlist item to end at. playlist_items: Specific indices of playlist to download. playlistreverse: Download playlist items in reverse order. + playlistrandom: Download playlist items in random order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. @@ -189,8 +205,8 @@ class YoutubeDL(object): prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. proxy: URL of the proxy server to use - cn_verification_proxy: URL of the proxy to use for IP address verification - on Chinese sites. (Experimental) + geo_verification_proxy: URL of the proxy to use for IP address verification + on geo-restricted sites. (Experimental) socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi @@ -241,7 +257,16 @@ class YoutubeDL(object): source_address: (Experimental) Client-side IP address to bind to. call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. - sleep_interval: Number of seconds to sleep before each download. + sleep_interval: Number of seconds to sleep before each download when + used alone or a lower bound of a range for randomized + sleep before each download (minimum possible number + of seconds to sleep) when used along with + max_sleep_interval. + max_sleep_interval:Upper bound of a range for randomized sleep before each + download (maximum possible number of seconds to sleep). + Must only be used along with sleep_interval. + Actual sleep time will be a random float from range + [sleep_interval; max_sleep_interval]. listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. match_filter: A function that gets called with the info_dict of @@ -250,17 +275,25 @@ class YoutubeDL(object): If it returns None, the video is downloaded. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. + geo_bypass: Bypass geographic restriction via faking X-Forwarded-For + HTTP header (experimental) + geo_bypass_country: + Two-letter ISO 3166-2 country code that will be used for + explicit geographic restriction bypassing via faking + X-Forwarded-For HTTP header (experimental) The following options determine which downloader is picked: external_downloader: Executable of the external downloader to call. None or unset for standard (built-in) downloader. - hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv. + hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv + if True, otherwise use ffmpeg/avconv if False, otherwise + use downloader suggested by extractor if None. The following parameters are not used by YoutubeDL itself, they are used by the downloader (see youtube_dl/downloader/common.py): nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize, external_downloader_args. + xattr_set_filesize, external_downloader_args, hls_use_mpegts. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, @@ -295,6 +328,21 @@ class YoutubeDL(object): self.params.update(params) self.cache = Cache(self) + def check_deprecated(param, option, suggestion): + if self.params.get(param) is not None: + self.report_warning( + '%s is deprecated. Use %s instead.' % (option, suggestion)) + return True + return False + + if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): + if self.params.get('geo_verification_proxy') is None: + self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] + + check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits') + check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') + check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') + if params.get('bidi_workaround', False): try: import pty @@ -317,7 +365,7 @@ class YoutubeDL(object): ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: - if ose.errno == 2: + if ose.errno == errno.ENOENT: self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise @@ -353,6 +401,8 @@ class YoutubeDL(object): for ph in self.params.get('progress_hooks', []): self.add_progress_hook(ph) + register_socks_protocols() + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ @@ -372,8 +422,9 @@ class YoutubeDL(object): def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) - self._ies_instances[ie.ie_key()] = ie - ie.set_downloader(self) + if not isinstance(ie, type): + self._ies_instances[ie.ie_key()] = ie + ie.set_downloader(self) def get_info_extractor(self, ie_key): """ @@ -391,7 +442,7 @@ class YoutubeDL(object): """ Add the InfoExtractors returned by gen_extractors to the end of the list """ - for ie in gen_extractors(): + for ie in gen_extractor_classes(): self.add_info_extractor(ie) def add_post_processor(self, pp): @@ -447,7 +498,7 @@ class YoutubeDL(object): def to_console_title(self, message): if not self.params.get('consoletitle', False): return - if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): + if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) @@ -495,7 +546,7 @@ class YoutubeDL(object): tb = '' if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) - tb += compat_str(traceback.format_exc()) + tb += encode_compat_str(traceback.format_exc()) else: tb_data = traceback.format_list(traceback.extract_stack()) tb = ''.join(tb_data) @@ -518,7 +569,7 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' @@ -530,7 +581,7 @@ class YoutubeDL(object): Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;31mERROR:\033[0m' else: _msg_header = 'ERROR:' @@ -553,28 +604,74 @@ class YoutubeDL(object): autonumber_size = self.params.get('autonumber_size') if autonumber_size is None: autonumber_size = 5 - autonumber_templ = '%0' + str(autonumber_size) + 'd' - template_dict['autonumber'] = autonumber_templ % self._num_downloads - if template_dict.get('playlist_index') is not None: - template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index']) + template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads if template_dict.get('resolution') is None: if template_dict.get('width') and template_dict.get('height'): template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) elif template_dict.get('height'): template_dict['resolution'] = '%sp' % template_dict['height'] elif template_dict.get('width'): - template_dict['resolution'] = '?x%d' % template_dict['width'] + template_dict['resolution'] = '%dx?' % template_dict['width'] sanitize = lambda k, v: sanitize_filename( compat_str(v), restricted=self.params.get('restrictfilenames'), is_id=(k == 'id')) - template_dict = dict((k, sanitize(k, v)) + template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) for k, v in template_dict.items() - if v is not None) + if v is not None and not isinstance(v, (list, tuple, dict))) template_dict = collections.defaultdict(lambda: 'NA', template_dict) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) + + # For fields playlist_index and autonumber convert all occurrences + # of %(field)s to %(field)0Nd for backward compatibility + field_size_compat_map = { + 'playlist_index': len(str(template_dict['n_entries'])), + 'autonumber': autonumber_size, + } + FIELD_SIZE_COMPAT_RE = r'(?autonumber|playlist_index)\)s' + mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl) + if mobj: + outtmpl = re.sub( + FIELD_SIZE_COMPAT_RE, + r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], + outtmpl) + + NUMERIC_FIELDS = set(( + 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', + 'upload_year', 'upload_month', 'upload_day', + 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', + 'average_rating', 'comment_count', 'age_limit', + 'start_time', 'end_time', + 'chapter_number', 'season_number', 'episode_number', + 'track_number', 'disc_number', 'release_year', + 'playlist_index', + )) + + # Missing numeric fields used together with integer presentation types + # in format specification will break the argument substitution since + # string 'NA' is returned for missing fields. We will patch output + # template for missing fields to meet string presentation type. + for numeric_field in NUMERIC_FIELDS: + if numeric_field not in template_dict: + # As of [1] format syntax is: + # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type + # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting + FORMAT_RE = r'''(?x) + (?ext|acodec|vcodec|container|protocol) + \s*(?Pext|acodec|vcodec|container|protocol|format_id) \s*(?P%s)(?P\s*\?)? - \s*(?P[a-zA-Z0-9_-]+) + \s*(?P[a-zA-Z0-9._-]+) \s*$ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) m = str_operator_rex.search(filter_spec) @@ -1030,9 +1148,9 @@ class YoutubeDL(object): if isinstance(selector, list): fs = [_build_selector_function(s) for s in selector] - def selector_function(formats): + def selector_function(ctx): for f in fs: - for format in f(formats): + for format in f(ctx): yield format return selector_function elif selector.type == GROUP: @@ -1040,17 +1158,17 @@ class YoutubeDL(object): elif selector.type == PICKFIRST: fs = [_build_selector_function(s) for s in selector.selector] - def selector_function(formats): + def selector_function(ctx): for f in fs: - picked_formats = list(f(formats)) + picked_formats = list(f(ctx)) if picked_formats: return picked_formats return [] elif selector.type == SINGLE: format_spec = selector.selector - def selector_function(formats): - formats = list(formats) + def selector_function(ctx): + formats = list(ctx['formats']) if not formats: return if format_spec == 'all': @@ -1063,9 +1181,10 @@ class YoutubeDL(object): if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] if audiovideo_formats: yield audiovideo_formats[format_idx] - # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format - elif (all(f.get('acodec') != 'none' for f in formats) or - all(f.get('vcodec') != 'none' for f in formats)): + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) we will fallback to best/worst + # {video,audio}-only format + elif ctx['incomplete_formats']: yield formats[format_idx] elif format_spec == 'bestaudio': audio_formats = [ @@ -1110,6 +1229,12 @@ class YoutubeDL(object): 'contain the video, try using ' '"-f %s+%s"' % (format_2, format_1)) return + # Formats must be opposite (video+audio) + if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none': + self.report_error( + 'Both formats %s and %s are video-only, you must specify "-f video+audio"' + % (format_1, format_2)) + return output_ext = ( formats_info[0]['ext'] if self.params.get('merge_output_format') is None @@ -1133,17 +1258,18 @@ class YoutubeDL(object): } video_selector, audio_selector = map(_build_selector_function, selector.selector) - def selector_function(formats): - formats = list(formats) - for pair in itertools.product(video_selector(formats), audio_selector(formats)): + def selector_function(ctx): + for pair in itertools.product( + video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))): yield _merge(pair) filters = [self._build_format_filter(f) for f in selector.filters] - def final_selector(formats): + def final_selector(ctx): + ctx_copy = copy.deepcopy(ctx) for _filter in filters: - formats = list(filter(_filter, formats)) - return selector_function(formats) + ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) + return selector_function(ctx_copy) return final_selector stream = io.BytesIO(format_spec.encode('utf-8')) @@ -1186,6 +1312,11 @@ class YoutubeDL(object): if cookies: res['Cookie'] = cookies + if 'X-Forwarded-For' not in res: + x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') + if x_forwarded_for_ip: + res['X-Forwarded-For'] = x_forwarded_for_ip + return res def _calc_cookies(self, info_dict): @@ -1201,6 +1332,10 @@ class YoutubeDL(object): if 'title' not in info_dict: raise ExtractorError('Missing "title" field in extractor result') + if not isinstance(info_dict['id'], compat_str): + self.report_warning('"id" field is not a string - forcing string conversion') + info_dict['id'] = compat_str(info_dict['id']) + if 'playlist' not in info_dict: # It isn't part of a playlist info_dict['playlist'] = None @@ -1213,15 +1348,25 @@ class YoutubeDL(object): info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( - t.get('preference'), t.get('width'), t.get('height'), - t.get('id'), t.get('url'))) + t.get('preference') if t.get('preference') is not None else -1, + t.get('width') if t.get('width') is not None else -1, + t.get('height') if t.get('height') is not None else -1, + t.get('id') if t.get('id') is not None else '', t.get('url'))) for i, t in enumerate(thumbnails): + t['url'] = sanitize_url(t['url']) if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) if t.get('id') is None: t['id'] = '%d' % i - if thumbnails and 'thumbnail' not in info_dict: + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + return + + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnail'] = sanitize_url(thumbnail) + elif thumbnails: info_dict['thumbnail'] = thumbnails[-1]['url'] if 'display_id' not in info_dict and 'id' in info_dict: @@ -1236,11 +1381,19 @@ class YoutubeDL(object): except (ValueError, OverflowError, OSError): pass + # Auto generate title fields corresponding to the *_number fields when missing + # in order to always have clean titles. This is very common for TV series. + for field in ('chapter', 'season', 'episode'): + if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + subtitles = info_dict.get('subtitles') if subtitles: for _, subtitle in subtitles.items(): for subtitle_format in subtitle: - if 'ext' not in subtitle_format: + if subtitle_format.get('url'): + subtitle_format['url'] = sanitize_url(subtitle_format['url']) + if subtitle_format.get('ext') is None: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() if self.params.get('listsubtitles', False): @@ -1269,8 +1422,13 @@ class YoutubeDL(object): if 'url' not in format: raise ExtractorError('Missing "url" key in result (index %d)' % i) + format['url'] = sanitize_url(format['url']) + if format.get('format_id') is None: format['format_id'] = compat_str(i) + else: + # Sanitize format_id from characters used in format selector expression + format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) format_id = format['format_id'] if format_id not in formats_dict: formats_dict[format_id] = [] @@ -1290,13 +1448,20 @@ class YoutubeDL(object): note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', ) # Automatically determine file extension if missing - if 'ext' not in format: + if format.get('ext') is None: format['ext'] = determine_ext(format['url']).lower() + # Automatically determine protocol if missing (useful for format + # selection purposes) + if format.get('protocol') is None: + format['protocol'] = determine_protocol(format) # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() full_format_info.update(format) format['http_headers'] = self._calc_headers(full_format_info) + # Remove private housekeeping stuff + if '__x_forwarded_for_ip' in info_dict: + del info_dict['__x_forwarded_for_ip'] # TODO Central sorting goes here @@ -1304,20 +1469,16 @@ class YoutubeDL(object): # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, - # wich can't be exported to json + # which can't be exported to json info_dict['formats'] = formats if self.params.get('listformats'): self.list_formats(info_dict) return - if self.params.get('list_thumbnails'): - self.list_thumbnails(info_dict) - return req_format = self.params.get('format') if req_format is None: req_format_list = [] if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and - info_dict['extractor'] in ['youtube', 'ted'] and not info_dict.get('is_live')): merger = FFmpegMergerPP(self) if merger.available and merger.can_merge(): @@ -1325,7 +1486,34 @@ class YoutubeDL(object): req_format_list.append('best') req_format = '/'.join(req_format_list) format_selector = self.build_format_selector(req_format) - formats_to_download = list(format_selector(formats)) + + # While in format selection we may need to have an access to the original + # format set in order to calculate some metrics or do some processing. + # For now we need to be able to guess whether original formats provided + # by extractor are incomplete or not (i.e. whether extractor provides only + # video-only or audio-only formats) for proper formats selection for + # extractors with such incomplete formats (see + # https://github.com/rg3/youtube-dl/pull/5556). + # Since formats may be filtered during format selection and may not match + # the original formats the results may be incorrect. Thus original formats + # or pre-calculated metrics should be passed to format selection routines + # as well. + # We will pass a context object containing all necessary additional data + # instead of just formats. + # This fixes incorrect format selection issue (see + # https://github.com/rg3/youtube-dl/issues/10083). + incomplete_formats = ( + # All formats are video-only or + all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or + # all formats are audio-only + all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) + + ctx = { + 'formats': formats, + 'incomplete_formats': incomplete_formats, + } + + formats_to_download = list(format_selector(ctx)) if not formats_to_download: raise ExtractorError('requested format not available', expected=True) @@ -1453,7 +1641,7 @@ class YoutubeDL(object): if dn and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: - self.report_error('unable to create directory ' + compat_str(err)) + self.report_error('unable to create directory ' + error_to_compat_str(err)) return if self.params.get('writedescription', False): @@ -1504,7 +1692,7 @@ class YoutubeDL(object): sub_info['url'], info_dict['id'], note=False) except ExtractorError as err: self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, compat_str(err.cause))) + (sub_lang, error_to_compat_str(err.cause))) continue try: sub_filename = subtitles_filename(filename, sub_lang, sub_format) @@ -1512,7 +1700,9 @@ class YoutubeDL(object): self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format)) else: self.to_screen('[info] Writing video subtitles to: ' + sub_filename) - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: + # Use newline='' to prevent conversion of newline characters + # See https://github.com/rg3/youtube-dl/issues/10268 + with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_data) except (OSError, IOError): self.report_error('Cannot write subtitles file ' + sub_filename) @@ -1560,7 +1750,7 @@ class YoutubeDL(object): video_ext, audio_ext = audio.get('ext'), video.get('ext') if video_ext and audio_ext: COMPATIBLE_EXTS = ( - ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'), + ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'), ('webm') ) for exts in COMPATIBLE_EXTS: @@ -1600,7 +1790,7 @@ class YoutubeDL(object): # Just a single file success = dl(filename, info_dict) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_error('unable to download video data: %s' % str(err)) + self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return except (OSError, IOError) as err: raise UnavailableVideoError(err) @@ -1608,12 +1798,14 @@ class YoutubeDL(object): self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return - if success: + if success and filename != '-': # Fixup content fixup_policy = self.params.get('fixup') if fixup_policy is None: fixup_policy = 'detect_or_warn' + INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.' + stretched_ratio = info_dict.get('stretched_ratio') if stretched_ratio is not None and stretched_ratio != 1: if fixup_policy == 'warn': @@ -1626,15 +1818,18 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(stretched_pp) else: self.report_warning( - '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( - info_dict['id'], stretched_ratio)) + '%s: Non-uniform pixel ratio (%s). %s' + % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') - if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash': + if (info_dict.get('requested_formats') is None and + info_dict.get('container') == 'm4a_dash'): if fixup_policy == 'warn': - self.report_warning('%s: writing DASH m4a. Only some players support this container.' % ( - info_dict['id'])) + self.report_warning( + '%s: writing DASH m4a. ' + 'Only some players support this container.' + % info_dict['id']) elif fixup_policy == 'detect_or_warn': fixup_pp = FFmpegFixupM4aPP(self) if fixup_pp.available: @@ -1642,8 +1837,27 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(fixup_pp) else: self.report_warning( - '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % ( - info_dict['id'])) + '%s: writing DASH m4a. ' + 'Only some players support this container. %s' + % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) + else: + assert fixup_policy in ('ignore', 'never') + + if (info_dict.get('protocol') == 'm3u8_native' or + info_dict.get('protocol') == 'm3u8' and + self.params.get('hls_prefer_native')): + if fixup_policy == 'warn': + self.report_warning('%s: malformated aac bitstream.' % ( + info_dict['id'])) + elif fixup_policy == 'detect_or_warn': + fixup_pp = FFmpegFixupM3u8PP(self) + if fixup_pp.available: + info_dict.setdefault('__postprocessors', []) + info_dict['__postprocessors'].append(fixup_pp) + else: + self.report_warning( + '%s: malformated aac bitstream. %s' + % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') @@ -1774,7 +1988,7 @@ class YoutubeDL(object): else: res = '%sp' % format['height'] elif format.get('width') is not None: - res = '?x%d' % format['width'] + res = '%dx?' % format['width'] else: res = default return res @@ -1783,6 +1997,10 @@ class YoutubeDL(object): res = '' if fdict.get('ext') in ['f4f', 'f4m']: res += '(unsupported) ' + if fdict.get('language'): + if res: + res += ' ' + res += '[%s] ' % fdict['language'] if fdict.get('format_note') is not None: res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: @@ -1803,7 +2021,9 @@ class YoutubeDL(object): if fdict.get('vbr') is not None: res += '%4dk' % fdict['vbr'] if fdict.get('fps') is not None: - res += ', %sfps' % fdict['fps'] + if res: + res += ', ' + res += '%sfps' % fdict['fps'] if fdict.get('acodec') is not None: if res: res += ', ' @@ -1846,13 +2066,8 @@ class YoutubeDL(object): def list_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') if not thumbnails: - tn_url = info_dict.get('thumbnail') - if tn_url: - thumbnails = [{'id': '0', 'url': tn_url}] - else: - self.to_screen( - '[info] No thumbnails present for %s' % info_dict['id']) - return + self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) + return self.to_screen( '[info] Thumbnails for %s:' % info_dict['id']) @@ -1897,6 +2112,8 @@ class YoutubeDL(object): write_string(encoding_str, encoding=None) self._write_string('[debug] youtube-dl version ' + __version__ + '\n') + if _LAZY_LOADER: + self._write_string('[debug] Lazy loading extractors enabled' + '\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -1952,6 +2169,7 @@ class YoutubeDL(object): if opts_cookiefile is None: self.cookiejar = compat_cookiejar.CookieJar() else: + opts_cookiefile = compat_expanduser(opts_cookiefile) self.cookiejar = compat_cookiejar.MozillaCookieJar( opts_cookiefile) if os.access(opts_cookiefile, os.R_OK): @@ -1974,8 +2192,19 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) data_handler = compat_urllib_request_DataHandler() + + # When passing our own FileHandler instance, build_opener won't add the + # default FileHandler and allows us to disable the file protocol, which + # can be used for malicious purposes (see + # https://github.com/rg3/youtube-dl/issues/8227) + file_handler = compat_urllib_request.FileHandler() + + def file_open(*args, **kwargs): + raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') + file_handler.file_open = file_open + opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler) + proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play @@ -2033,4 +2262,4 @@ class YoutubeDL(object): (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download thumbnail "%s": %s' % - (t['url'], compat_str(err))) + (t['url'], error_to_compat_str(err)))