X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/453698570f26bebd37b39df8537d993b57d77b8b..d4ff594119bc679aa175947eb59a97bee8f966f4:/youtube_dl/YoutubeDL.py diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 772fddd..9605f8f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -10,6 +10,7 @@ import io import itertools import json import locale +import operator import os import platform import re @@ -24,6 +25,7 @@ if os.name == 'nt': import ctypes from .compat import ( + compat_basestring, compat_cookiejar, compat_expanduser, compat_http_client, @@ -49,11 +51,14 @@ from .utils import ( make_HTTPS_handler, MaxDownloadsReached, PagedList, + parse_filesize, PostProcessingError, platform_name, preferredencoding, + render_table, SameFileError, sanitize_filename, + std_headers, subtitles_filename, takewhile_inclusive, UnavailableVideoError, @@ -71,6 +76,7 @@ from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( + FFmpegFixupM4aPP, FFmpegFixupStretchedPP, FFmpegMergerPP, FFmpegPostProcessor, @@ -132,6 +138,7 @@ class YoutubeDL(object): nooverwrites: Prevent overwriting files. playliststart: Playlist item to start at. playlistend: Playlist item to end at. + playlist_items: Specific indices of playlist to download. playlistreverse: Download playlist items in reverse order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. @@ -141,6 +148,7 @@ class YoutubeDL(object): writeinfojson: Write the video description to a .info.json file writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file + write_all_thumbnails: Write all thumbnail formats to files writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video @@ -191,11 +199,12 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * filename: The final filename - * status: One of "downloading" and "finished" - - The dict may also have some of the following entries: + * status: One of "downloading" and "finished". + Check this first and ignore unknown values. + If status is one of "downloading" or "finished", the + following properties may also be present: + * filename: The final filename (always present) * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown * tmpfilename: The filename we're currently writing to @@ -211,16 +220,21 @@ class YoutubeDL(object): - "never": do nothing - "warn": only emit a warning - "detect_or_warn": check whether we can do anything - about it, warn otherwise + about it, warn otherwise (default) source_address: (Experimental) Client-side IP address to bind to. call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. + sleep_interval: Number of seconds to sleep before each download. + external_downloader: Executable of the external downloader to call. + listformats: Print an overview of available video formats and exit. + list_thumbnails: Print a table of all thumbnails and exit. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, - noresizebuffer, retries, continuedl, noprogress, consoletitle + noresizebuffer, retries, continuedl, noprogress, consoletitle, + xattr_set_filesize. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, @@ -530,6 +544,11 @@ class YoutubeDL(object): outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict + # Temporary fix for #4787 + # 'Treat' all problem characters by passing filename through preferredencoding + # to workaround encoding issues with subprocess on python2 @ Windows + if sys.version_info < (3, 0) and sys.platform == 'win32': + filename = encodeFilename(filename, True).decode(preferredencoding()) return filename except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') @@ -693,24 +712,51 @@ class YoutubeDL(object): if playlistend == -1: playlistend = None + playlistitems_str = self.params.get('playlist_items', None) + playlistitems = None + if playlistitems_str is not None: + def iter_playlistitems(format): + for string_segment in format.split(','): + if '-' in string_segment: + start, end = string_segment.split('-') + for item in range(int(start), int(end) + 1): + yield int(item) + else: + yield int(string_segment) + playlistitems = iter_playlistitems(playlistitems_str) + ie_entries = ie_result['entries'] if isinstance(ie_entries, list): n_all_entries = len(ie_entries) - entries = ie_entries[playliststart:playlistend] + if playlistitems: + entries = [ie_entries[i - 1] for i in playlistitems] + else: + entries = ie_entries[playliststart:playlistend] n_entries = len(entries) self.to_screen( "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) elif isinstance(ie_entries, PagedList): - entries = ie_entries.getslice( - playliststart, playlistend) + if playlistitems: + entries = [] + for item in playlistitems: + entries.extend(ie_entries.getslice( + item - 1, item + )) + else: + entries = ie_entries.getslice( + playliststart, playlistend) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % (ie_result['extractor'], playlist, n_entries)) else: # iterable - entries = list(itertools.islice( - ie_entries, playliststart, playlistend)) + if playlistitems: + entry_list = list(ie_entries) + entries = [entry_list[i - 1] for i in playlistitems] + else: + entries = list(itertools.islice( + ie_entries, playliststart, playlistend)) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % @@ -768,7 +814,59 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) + def _apply_format_filter(self, format_spec, available_formats): + " Returns a tuple of the remaining format_spec and filtered formats " + + OPERATORS = { + '<': operator.lt, + '<=': operator.le, + '>': operator.gt, + '>=': operator.ge, + '=': operator.eq, + '!=': operator.ne, + } + operator_rex = re.compile(r'''(?x)\s*\[ + (?Pwidth|height|tbr|abr|vbr|filesize|fps) + \s*(?P%s)(?P\s*\?)?\s* + (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) + \]$ + ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + m = operator_rex.search(format_spec) + if not m: + raise ValueError('Invalid format specification %r' % format_spec) + + try: + comparison_value = int(m.group('value')) + except ValueError: + comparison_value = parse_filesize(m.group('value')) + if comparison_value is None: + comparison_value = parse_filesize(m.group('value') + 'B') + if comparison_value is None: + raise ValueError( + 'Invalid value %r in format specification %r' % ( + m.group('value'), format_spec)) + op = OPERATORS[m.group('op')] + + def _filter(f): + actual_value = f.get(m.group('key')) + if actual_value is None: + return m.group('none_inclusive') + return op(actual_value, comparison_value) + new_formats = [f for f in available_formats if _filter(f)] + + new_format_spec = format_spec[:-len(m.group(0))] + if not new_format_spec: + new_format_spec = 'best' + + return (new_format_spec, new_formats) + def select_format(self, format_spec, available_formats): + while format_spec.endswith(']'): + format_spec, available_formats = self._apply_format_filter( + format_spec, available_formats) + if not available_formats: + return None + if format_spec == 'best' or format_spec is None: return available_formats[-1] elif format_spec == 'worst': @@ -808,6 +906,42 @@ class YoutubeDL(object): return matches[-1] return None + def _calc_headers(self, info_dict): + res = std_headers.copy() + + add_headers = info_dict.get('http_headers') + if add_headers: + res.update(add_headers) + + cookies = self._calc_cookies(info_dict) + if cookies: + res['Cookie'] = cookies + + return res + + def _calc_cookies(self, info_dict): + class _PseudoRequest(object): + def __init__(self, url): + self.url = url + self.headers = {} + self.unverifiable = False + + def add_unredirected_header(self, k, v): + self.headers[k] = v + + def get_full_url(self): + return self.url + + def is_unverifiable(self): + return self.unverifiable + + def has_header(self, h): + return h in self.headers + + pr = _PseudoRequest(info_dict['url']) + self.cookiejar.add_cookie_header(pr) + return pr.headers.get('Cookie') + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -822,12 +956,19 @@ class YoutubeDL(object): info_dict['playlist_index'] = None thumbnails = info_dict.get('thumbnails') + if thumbnails is None: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( - t.get('width'), t.get('height'), t.get('url'))) - for t in thumbnails: + t.get('preference'), t.get('width'), t.get('height'), + t.get('id'), t.get('url'))) + for i, t in enumerate(thumbnails): if 'width' in t and 'height' in t: t['resolution'] = '%dx%d' % (t['width'], t['height']) + if t.get('id') is None: + t['id'] = '%d' % i if thumbnails and 'thumbnail' not in info_dict: info_dict['thumbnail'] = thumbnails[-1]['url'] @@ -876,6 +1017,11 @@ class YoutubeDL(object): # Automatically determine file extension if missing if 'ext' not in format: format['ext'] = determine_ext(format['url']).lower() + # Add HTTP headers, so that external programs can use them from the + # json output + full_format_info = info_dict.copy() + full_format_info.update(format) + format['http_headers'] = self._calc_headers(full_format_info) format_limit = self.params.get('format_limit', None) if format_limit: @@ -891,9 +1037,12 @@ class YoutubeDL(object): # element in the 'formats' field in info_dict is info_dict itself, # wich can't be exported to json info_dict['formats'] = formats - if self.params.get('listformats', None): + if self.params.get('listformats'): self.list_formats(info_dict) return + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + return req_format = self.params.get('format') if req_format is None: @@ -927,8 +1076,10 @@ class YoutubeDL(object): else self.params['merge_output_format']) selected_format = { 'requested_formats': formats_info, - 'format': rf, - 'ext': formats_info[0]['ext'], + 'format': '%s+%s' % (formats_info[0].get('format'), + formats_info[1].get('format')), + 'format_id': '%s+%s' % (formats_info[0].get('format_id'), + formats_info[1].get('format_id')), 'width': formats_info[0].get('width'), 'height': formats_info[0].get('height'), 'resolution': formats_info[0].get('resolution'), @@ -989,7 +1140,7 @@ class YoutubeDL(object): self._num_downloads += 1 - filename = self.prepare_filename(info_dict) + info_dict['_filename'] = filename = self.prepare_filename(info_dict) # Forced printings if self.params.get('forcetitle', False): @@ -1014,10 +1165,7 @@ class YoutubeDL(object): if self.params.get('forceformat', False): self.to_stdout(info_dict['format']) if self.params.get('forcejson', False): - info_dict['_filename'] = filename self.to_stdout(json.dumps(info_dict)) - if self.params.get('dump_single_json', False): - info_dict['_filename'] = filename # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -1100,35 +1248,18 @@ class YoutubeDL(object): self.report_error('Cannot write metadata to JSON file ' + infofn) return - if self.params.get('writethumbnail', False): - if info_dict.get('thumbnail') is not None: - thumb_format = determine_ext(info_dict['thumbnail'], 'jpg') - thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): - self.to_screen('[%s] %s: Thumbnail is already present' % - (info_dict['extractor'], info_dict['id'])) - else: - self.to_screen('[%s] %s: Downloading thumbnail ...' % - (info_dict['extractor'], info_dict['id'])) - try: - uf = self.urlopen(info_dict['thumbnail']) - with open(thumb_filename, 'wb') as thumbf: - shutil.copyfileobj(uf, thumbf) - self.to_screen('[%s] %s: Writing thumbnail to: %s' % - (info_dict['extractor'], info_dict['id'], thumb_filename)) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_warning('Unable to download thumbnail "%s": %s' % - (info_dict['thumbnail'], compat_str(err))) + self._write_thumbnails(info_dict, filename) if not self.params.get('skip_download', False): try: def dl(name, info): - fd = get_suitable_downloader(info)(self, self.params) + fd = get_suitable_downloader(info, self.params)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) if self.params.get('verbose'): self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) + if info_dict.get('requested_formats') is not None: downloaded = [] success = True @@ -1164,11 +1295,12 @@ class YoutubeDL(object): if success: # Fixup content + fixup_policy = self.params.get('fixup') + if fixup_policy is None: + fixup_policy = 'detect_or_warn' + stretched_ratio = info_dict.get('stretched_ratio') if stretched_ratio is not None and stretched_ratio != 1: - fixup_policy = self.params.get('fixup') - if fixup_policy is None: - fixup_policy = 'detect_or_warn' if fixup_policy == 'warn': self.report_warning('%s: Non-uniform pixel ratio (%s)' % ( info_dict['id'], stretched_ratio)) @@ -1182,7 +1314,23 @@ class YoutubeDL(object): '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( info_dict['id'], stretched_ratio)) else: - assert fixup_policy == 'ignore' + assert fixup_policy in ('ignore', 'never') + + if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash': + if fixup_policy == 'warn': + self.report_warning('%s: writing DASH m4a. Only some players support this container.' % ( + info_dict['id'])) + elif fixup_policy == 'detect_or_warn': + fixup_pp = FFmpegFixupM4aPP(self) + if fixup_pp.available: + info_dict.setdefault('__postprocessors', []) + info_dict['__postprocessors'].append(fixup_pp) + else: + self.report_warning( + '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % ( + info_dict['id'])) + else: + assert fixup_policy in ('ignore', 'never') try: self.post_process(filename, info_dict) @@ -1384,8 +1532,26 @@ class YoutubeDL(object): header_line = line({ 'format_id': 'format code', 'ext': 'extension', 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) - self.to_screen('[info] Available formats for %s:\n%s\n%s' % - (info_dict['id'], header_line, '\n'.join(formats_s))) + self.to_screen( + '[info] Available formats for %s:\n%s\n%s' % + (info_dict['id'], header_line, '\n'.join(formats_s))) + + def list_thumbnails(self, info_dict): + thumbnails = info_dict.get('thumbnails') + if not thumbnails: + tn_url = info_dict.get('thumbnail') + if tn_url: + thumbnails = [{'id': '0', 'url': tn_url}] + else: + self.to_screen( + '[info] No thumbnails present for %s' % info_dict['id']) + return + + self.to_screen( + '[info] Thumbnails for %s:' % info_dict['id']) + self.to_screen(render_table( + ['ID', 'width', 'height', 'URL'], + [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) def urlopen(self, req): """ Start an HTTP download """ @@ -1396,7 +1562,7 @@ class YoutubeDL(object): # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) # To work around aforementioned issue we will replace request's original URL with # percent-encoded one - req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) + req_is_string = isinstance(req, compat_basestring) url = req if req_is_string else req.get_full_url() url_escaped = escape_url(url) @@ -1531,3 +1697,39 @@ class YoutubeDL(object): if encoding is None: encoding = preferredencoding() return encoding + + def _write_thumbnails(self, info_dict, filename): + if self.params.get('writethumbnail', False): + thumbnails = info_dict.get('thumbnails') + if thumbnails: + thumbnails = [thumbnails[-1]] + elif self.params.get('write_all_thumbnails', False): + thumbnails = info_dict.get('thumbnails') + else: + return + + if not thumbnails: + # No thumbnails present, so return immediately + return + + for t in thumbnails: + thumb_ext = determine_ext(t['url'], 'jpg') + suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' + thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' + thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext + + if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): + self.to_screen('[%s] %s: Thumbnail %sis already present' % + (info_dict['extractor'], info_dict['id'], thumb_display_id)) + else: + self.to_screen('[%s] %s: Downloading thumbnail %s...' % + (info_dict['extractor'], info_dict['id'], thumb_display_id)) + try: + uf = self.urlopen(t['url']) + with open(thumb_filename, 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % + (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_warning('Unable to download thumbnail "%s": %s' % + (t['url'], compat_str(err)))