from __future__ import absolute_import, unicode_literals
import collections
+import contextlib
import datetime
import errno
+import fileinput
import io
import itertools
import json
import locale
+import operator
import os
import platform
import re
import ctypes
from .compat import (
+ compat_basestring,
compat_cookiejar,
compat_expanduser,
+ compat_get_terminal_size,
compat_http_client,
compat_kwargs,
compat_str,
ExtractorError,
format_bytes,
formatSeconds,
- get_term_width,
+ HEADRequest,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
PagedList,
+ parse_filesize,
+ PerRequestProxyHandler,
PostProcessingError,
platform_name,
preferredencoding,
+ render_table,
SameFileError,
sanitize_filename,
+ sanitize_path,
+ std_headers,
subtitles_filename,
- takewhile_inclusive,
UnavailableVideoError,
url_basename,
version_tuple,
write_string,
YoutubeDLHandler,
prepend_extension,
+ replace_extension,
args_to_str,
age_restricted,
)
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
from .postprocessor import (
+ FFmpegFixupM4aPP,
FFmpegFixupStretchedPP,
FFmpegMergerPP,
FFmpegPostProcessor,
username: Username for authentication purposes.
password: Password for authentication purposes.
- videopassword: Password for acces a video.
+ videopassword: Password for accessing a video.
usenetrc: Use netrc for authentication instead.
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
(or video) as a single JSON line.
simulate: Do not download the video files.
format: Video format code. See options.py for more information.
- format_limit: Highest quality format to try.
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names
ignoreerrors: Do not stop on download errors.
+ force_generic_extractor: Force downloader to use the generic extractor
nooverwrites: Prevent overwriting files.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
+ playlist_items: Specific indices of playlist to download.
playlistreverse: Download playlist items in reverse order.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
writeinfojson: Write the video description to a .info.json file
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
+ write_all_thumbnails: Write all thumbnail formats to files
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
- subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
+ subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
+ cn_verification_proxy: URL of the proxy to use for IP address verification
+ on Chinese sites. (Experimental)
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
postprocessor.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
- * filename: The final filename
- * status: One of "downloading" and "finished"
-
- The dict may also have some of the following entries:
+ * status: One of "downloading", "error", or "finished".
+ Check this first and ignore unknown values.
+ If status is one of "downloading", or "finished", the
+ following properties may also be present:
+ * filename: The final filename (always present)
+ * tmpfilename: The filename we're currently writing to
* downloaded_bytes: Bytes on disk
* total_bytes: Size of the whole file, None if unknown
- * tmpfilename: The filename we're currently writing to
+ * total_bytes_estimate: Guess of the eventual file size,
+ None if unavailable.
+ * elapsed: The number of seconds since download started.
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if
unknown
+ * fragment_index: The counter of the currently
+ downloaded video fragment.
+ * fragment_count: The number of fragments (= individual
+ files that will be merged)
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
- "never": do nothing
- "warn": only emit a warning
- "detect_or_warn": check whether we can do anything
- about it, warn otherwise
+ about it, warn otherwise (default)
source_address: (Experimental) Client-side IP address to bind to.
call_home: Boolean, true iff we are allowed to contact the
youtube-dl servers for debugging.
-
+ sleep_interval: Number of seconds to sleep before each download.
+ listformats: Print an overview of available video formats and exit.
+ list_thumbnails: Print a table of all thumbnails and exit.
+ match_filter: A function that gets called with the info_dict of
+ every video.
+ If it returns a message, the video is ignored.
+ If it returns None, the video is downloaded.
+ match_filter_func in utils.py is one example for this.
+ no_color: Do not emit color codes in output.
+
+ The following options determine which downloader is picked:
+ external_downloader: Executable of the external downloader to call.
+ None or unset for standard (built-in) downloader.
+ hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
The following parameters are not used by YoutubeDL itself, they are used by
- the FileDownloader:
+ the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
- noresizebuffer, retries, continuedl, noprogress, consoletitle
+ noresizebuffer, retries, continuedl, noprogress, consoletitle,
+ xattr_set_filesize, external_downloader_args.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
otherwise prefer avconv.
- exec_cmd: Arbitrary command to run after downloading
+ postprocessor_args: A list of additional command-line arguments for the
+ postprocessor.
"""
params = None
try:
import pty
master, slave = pty.openpty()
- width = get_term_width()
+ width = compat_get_terminal_size().columns
if width is None:
width_args = []
else:
raise
if (sys.version_info >= (3,) and sys.platform != 'win32' and
- sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
- and not params.get('restrictfilenames', False)):
+ sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
+ not params.get('restrictfilenames', False)):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
- if '%(stitle)s' in self.params.get('outtmpl', ''):
- self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+ if isinstance(params.get('outtmpl'), bytes):
+ self.report_warning(
+ 'Parameter outtmpl is bytes, but should be a unicode string. '
+ 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
self._setup_opener()
else:
if self.params.get('no_warnings'):
return
- if self._err_file.isatty() and os.name != 'nt':
+ if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
- if self._err_file.isatty() and os.name != 'nt':
+ if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;31mERROR:\033[0m'
else:
_msg_header = 'ERROR:'
if v is not None)
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict
+ # Temporary fix for #4787
+ # 'Treat' all problem characters by passing filename through preferredencoding
+ # to workaround encoding issues with subprocess on python2 @ Windows
+ if sys.version_info < (3, 0) and sys.platform == 'win32':
+ filename = encodeFilename(filename, True).decode(preferredencoding())
return filename
except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
- def _match_entry(self, info_dict):
+ def _match_entry(self, info_dict, incomplete):
""" Returns None iff the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', 'video'))
if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
- return 'Skipping "%s" because it is age restricted' % title
+ return 'Skipping "%s" because it is age restricted' % video_title
if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title
+
+ if not incomplete:
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ ret = match_filter(info_dict)
+ if ret is not None:
+ return ret
+
return None
@staticmethod
info_dict.setdefault(key, value)
def extract_info(self, url, download=True, ie_key=None, extra_info={},
- process=True):
+ process=True, force_generic_extractor=False):
'''
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result
- '''
+ '''
+
+ if not ie_key and force_generic_extractor:
+ ie_key = 'Generic'
if ie_key:
ies = [self.get_info_extractor(ie_key)]
if playlistend == -1:
playlistend = None
+ playlistitems_str = self.params.get('playlist_items', None)
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = iter_playlistitems(playlistitems_str)
+
ie_entries = ie_result['entries']
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
- entries = ie_entries[playliststart:playlistend]
+ if playlistitems:
+ entries = [
+ ie_entries[i - 1] for i in playlistitems
+ if -n_all_entries <= i - 1 < n_all_entries]
+ else:
+ entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
elif isinstance(ie_entries, PagedList):
- entries = ie_entries.getslice(
- playliststart, playlistend)
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
else: # iterable
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
+ if playlistitems:
+ entry_list = list(ie_entries)
+ entries = [entry_list[i - 1] for i in playlistitems]
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
'extractor_key': ie_result['extractor_key'],
}
- reason = self._match_entry(entry)
+ reason = self._match_entry(entry, incomplete=True)
if reason is not None:
self.to_screen('[download] ' + reason)
continue
else:
raise Exception('Invalid result type: %s' % result_type)
+ def _apply_format_filter(self, format_spec, available_formats):
+ " Returns a tuple of the remaining format_spec and filtered formats "
+
+ OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*\[
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
+ \]$
+ ''' % '|'.join(map(re.escape, OPERATORS.keys())))
+ m = operator_rex.search(format_spec)
+ if m:
+ try:
+ comparison_value = int(m.group('value'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('value'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('value') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid value %r in format specification %r' % (
+ m.group('value'), format_spec))
+ op = OPERATORS[m.group('op')]
+
+ if not m:
+ STR_OPERATORS = {
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ str_operator_rex = re.compile(r'''(?x)\s*\[
+ \s*(?P<key>ext|acodec|vcodec|container|protocol)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<value>[a-zA-Z0-9_-]+)
+ \s*\]$
+ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+ m = str_operator_rex.search(format_spec)
+ if m:
+ comparison_value = m.group('value')
+ op = STR_OPERATORS[m.group('op')]
+
+ if not m:
+ raise ValueError('Invalid format specification %r' % format_spec)
+
+ def _filter(f):
+ actual_value = f.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+ new_formats = [f for f in available_formats if _filter(f)]
+
+ new_format_spec = format_spec[:-len(m.group(0))]
+ if not new_format_spec:
+ new_format_spec = 'best'
+
+ return (new_format_spec, new_formats)
+
def select_format(self, format_spec, available_formats):
- if format_spec == 'best' or format_spec is None:
- return available_formats[-1]
- elif format_spec == 'worst':
- return available_formats[0]
+ while format_spec.endswith(']'):
+ format_spec, available_formats = self._apply_format_filter(
+ format_spec, available_formats)
+ if not available_formats:
+ return None
+
+ if format_spec in ['best', 'worst', None]:
+ format_idx = 0 if format_spec == 'worst' else -1
+ audiovideo_formats = [
+ f for f in available_formats
+ if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
+ if audiovideo_formats:
+ return audiovideo_formats[format_idx]
+ # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
+ elif (all(f.get('acodec') != 'none' for f in available_formats) or
+ all(f.get('vcodec') != 'none' for f in available_formats)):
+ return available_formats[format_idx]
elif format_spec == 'bestaudio':
audio_formats = [
f for f in available_formats
return matches[-1]
return None
+ def _calc_headers(self, info_dict):
+ res = std_headers.copy()
+
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ res.update(add_headers)
+
+ cookies = self._calc_cookies(info_dict)
+ if cookies:
+ res['Cookie'] = cookies
+
+ return res
+
+ def _calc_cookies(self, info_dict):
+ pr = compat_urllib_request.Request(info_dict['url'])
+ self.cookiejar.add_cookie_header(pr)
+ return pr.get_header('Cookie')
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
info_dict['playlist_index'] = None
thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
if thumbnails:
thumbnails.sort(key=lambda t: (
- t.get('width'), t.get('height'), t.get('url')))
- for t in thumbnails:
- if 'width' in t and 'height' in t:
+ t.get('preference'), t.get('width'), t.get('height'),
+ t.get('id'), t.get('url')))
+ for i, t in enumerate(thumbnails):
+ if t.get('width') and t.get('height'):
t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ if t.get('id') is None:
+ t['id'] = '%d' % i
if thumbnails and 'thumbnail' not in info_dict:
info_dict['thumbnail'] = thumbnails[-1]['url']
info_dict['display_id'] = info_dict['id']
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
- # Working around negative timestamps in Windows
- # (see http://bugs.python.org/issue1646728)
- if info_dict['timestamp'] < 0 and os.name == 'nt':
- info_dict['timestamp'] = 0
- upload_date = datetime.datetime.utcfromtimestamp(
- info_dict['timestamp'])
- info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
-
- # This extractors handle format selection themselves
- if info_dict['extractor'] in ['Youku']:
- if download:
- self.process_info(info_dict)
- return info_dict
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ try:
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
+ info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+ except (ValueError, OverflowError, OSError):
+ pass
+
+ if self.params.get('listsubtitles', False):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+ self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+ return
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], info_dict.get('subtitles'),
+ info_dict.get('automatic_captions'))
# We now pick which formats have to be downloaded
if info_dict.get('formats') is None:
if not formats:
raise ExtractorError('No video formats found!')
+ formats_dict = {}
+
# We check that all the formats have the format and format_id fields
for i, format in enumerate(formats):
if 'url' not in format:
if format.get('format_id') is None:
format['format_id'] = compat_str(i)
+ format_id = format['format_id']
+ if format_id not in formats_dict:
+ formats_dict[format_id] = []
+ formats_dict[format_id].append(format)
+
+ # Make sure all formats have unique format_id
+ for format_id, ambiguous_formats in formats_dict.items():
+ if len(ambiguous_formats) > 1:
+ for i, format in enumerate(ambiguous_formats):
+ format['format_id'] = '%s-%d' % (format_id, i)
+
+ for i, format in enumerate(formats):
if format.get('format') is None:
format['format'] = '{id} - {res}{note}'.format(
id=format['format_id'],
# Automatically determine file extension if missing
if 'ext' not in format:
format['ext'] = determine_ext(format['url']).lower()
-
- format_limit = self.params.get('format_limit', None)
- if format_limit:
- formats = list(takewhile_inclusive(
- lambda f: f['format_id'] != format_limit, formats
- ))
+ # Add HTTP headers, so that external programs can use them from the
+ # json output
+ full_format_info = info_dict.copy()
+ full_format_info.update(format)
+ format['http_headers'] = self._calc_headers(full_format_info)
# TODO Central sorting goes here
# element in the 'formats' field in info_dict is info_dict itself,
# wich can't be exported to json
info_dict['formats'] = formats
- if self.params.get('listformats', None):
+ if self.params.get('listformats'):
self.list_formats(info_dict)
return
+ if self.params.get('list_thumbnails'):
+ self.list_thumbnails(info_dict)
+ return
req_format = self.params.get('format')
if req_format is None:
- req_format = 'best'
+ req_format_list = []
+ if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
+ info_dict['extractor'] in ['youtube', 'ted']):
+ merger = FFmpegMergerPP(self)
+ if merger.available and merger.can_merge():
+ req_format_list.append('bestvideo+bestaudio')
+ req_format_list.append('best')
+ req_format = '/'.join(req_format_list)
formats_to_download = []
- # The -1 is for supporting YoutubeIE
- if req_format in ('-1', 'all'):
+ if req_format == 'all':
formats_to_download = formats
else:
for rfstr in req_format.split(','):
else self.params['merge_output_format'])
selected_format = {
'requested_formats': formats_info,
- 'format': rf,
- 'ext': formats_info[0]['ext'],
+ 'format': '%s+%s' % (formats_info[0].get('format'),
+ formats_info[1].get('format')),
+ 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
+ formats_info[1].get('format_id')),
'width': formats_info[0].get('width'),
'height': formats_info[0].get('height'),
'resolution': formats_info[0].get('resolution'),
info_dict.update(formats_to_download[-1])
return info_dict
+ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+ """Select the requested subtitles and their format"""
+ available_subs = {}
+ if normal_subtitles and self.params.get('writesubtitles'):
+ available_subs.update(normal_subtitles)
+ if automatic_captions and self.params.get('writeautomaticsub'):
+ for lang, cap_info in automatic_captions.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
+ if (not self.params.get('writesubtitles') and not
+ self.params.get('writeautomaticsub') or not
+ available_subs):
+ return None
+
+ if self.params.get('allsubtitles', False):
+ requested_langs = available_subs.keys()
+ else:
+ if self.params.get('subtitleslangs', False):
+ requested_langs = self.params.get('subtitleslangs')
+ elif 'en' in available_subs:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs.keys())[0]]
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+
def process_info(self, info_dict):
"""Process a single resolved IE result."""
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + '...'
- # Keep for backwards compatibility
- info_dict['stitle'] = info_dict['title']
-
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
- reason = self._match_entry(info_dict)
+ reason = self._match_entry(info_dict, incomplete=False)
if reason is not None:
self.to_screen('[download] ' + reason)
return
self._num_downloads += 1
- filename = self.prepare_filename(info_dict)
+ info_dict['_filename'] = filename = self.prepare_filename(info_dict)
# Forced printings
if self.params.get('forcetitle', False):
if self.params.get('forceformat', False):
self.to_stdout(info_dict['format'])
if self.params.get('forcejson', False):
- info_dict['_filename'] = filename
self.to_stdout(json.dumps(info_dict))
- if self.params.get('dump_single_json', False):
- info_dict['_filename'] = filename
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
return
try:
- dn = os.path.dirname(encodeFilename(filename))
+ dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
return
if self.params.get('writedescription', False):
- descfn = filename + '.description'
+ descfn = replace_extension(filename, 'description', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Video description is already present')
elif info_dict.get('description') is None:
return
if self.params.get('writeannotations', False):
- annofn = filename + '.annotations.xml'
+ annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
else:
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
- if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+ if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
- subtitles = info_dict['subtitles']
- sub_format = self.params.get('subtitlesformat', 'srt')
- for sub_lang in subtitles.keys():
- sub = subtitles[sub_lang]
- if sub is None:
- continue
+ subtitles = info_dict['requested_subtitles']
+ ie = self.get_info_extractor(info_dict['extractor_key'])
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ if sub_info.get('data') is not None:
+ sub_data = sub_info['data']
+ else:
+ try:
+ sub_data = ie._download_webpage(
+ sub_info['url'], info_dict['id'], note=False)
+ except ExtractorError as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, compat_str(err.cause)))
+ continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
- subfile.write(sub)
+ subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
if self.params.get('writeinfojson', False):
- infofn = os.path.splitext(filename)[0] + '.info.json'
+ infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
self.to_screen('[info] Video description metadata is already present')
else:
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
try:
- write_json_file(info_dict, infofn)
+ write_json_file(self.filter_requested_info(info_dict), infofn)
except (OSError, IOError):
self.report_error('Cannot write metadata to JSON file ' + infofn)
return
- if self.params.get('writethumbnail', False):
- if info_dict.get('thumbnail') is not None:
- thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
- thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
- self.to_screen('[%s] %s: Thumbnail is already present' %
- (info_dict['extractor'], info_dict['id']))
- else:
- self.to_screen('[%s] %s: Downloading thumbnail ...' %
- (info_dict['extractor'], info_dict['id']))
- try:
- uf = self.urlopen(info_dict['thumbnail'])
- with open(thumb_filename, 'wb') as thumbf:
- shutil.copyfileobj(uf, thumbf)
- self.to_screen('[%s] %s: Writing thumbnail to: %s' %
- (info_dict['extractor'], info_dict['id'], thumb_filename))
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self.report_warning('Unable to download thumbnail "%s": %s' %
- (info_dict['thumbnail'], compat_str(err)))
+ self._write_thumbnails(info_dict, filename)
if not self.params.get('skip_download', False):
try:
def dl(name, info):
- fd = get_suitable_downloader(info)(self, self.params)
+ fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)
+
if info_dict.get('requested_formats') is not None:
downloaded = []
success = True
- merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
- if not merger._executable:
+ merger = FFmpegMergerPP(self)
+ if not merger.available:
postprocessors = []
self.report_warning('You have requested multiple '
'formats but ffmpeg or avconv are not installed.'
- ' The formats won\'t be merged')
+ ' The formats won\'t be merged.')
else:
postprocessors = [merger]
- for f in info_dict['requested_formats']:
- new_info = dict(info_dict)
- new_info.update(f)
- fname = self.prepare_filename(new_info)
- fname = prepend_extension(fname, 'f%s' % f['format_id'])
- downloaded.append(fname)
- partial_success = dl(fname, new_info)
- success = success and partial_success
- info_dict['__postprocessors'] = postprocessors
- info_dict['__files_to_merge'] = downloaded
+
+ def compatible_formats(formats):
+ video, audio = formats
+ # Check extension
+ video_ext, audio_ext = audio.get('ext'), video.get('ext')
+ if video_ext and audio_ext:
+ COMPATIBLE_EXTS = (
+ ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
+ ('webm')
+ )
+ for exts in COMPATIBLE_EXTS:
+ if video_ext in exts and audio_ext in exts:
+ return True
+ # TODO: Check acodec/vcodec
+ return False
+
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext == info_dict['ext']
+ else filename)
+ requested_formats = info_dict['requested_formats']
+ if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
+ info_dict['ext'] = 'mkv'
+ self.report_warning(
+ 'Requested formats are incompatible for merge and will be merged into mkv.')
+ # Ensure filename always has a correct extension for successful merge
+ filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+ if os.path.exists(encodeFilename(filename)):
+ self.to_screen(
+ '[download] %s has already been downloaded and '
+ 'merged' % filename)
+ else:
+ for f in requested_formats:
+ new_info = dict(info_dict)
+ new_info.update(f)
+ fname = self.prepare_filename(new_info)
+ fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+ downloaded.append(fname)
+ partial_success = dl(fname, new_info)
+ success = success and partial_success
+ info_dict['__postprocessors'] = postprocessors
+ info_dict['__files_to_merge'] = downloaded
else:
# Just a single file
success = dl(filename, info_dict)
if success:
# Fixup content
+ fixup_policy = self.params.get('fixup')
+ if fixup_policy is None:
+ fixup_policy = 'detect_or_warn'
+
stretched_ratio = info_dict.get('stretched_ratio')
if stretched_ratio is not None and stretched_ratio != 1:
- fixup_policy = self.params.get('fixup')
- if fixup_policy is None:
- fixup_policy = 'detect_or_warn'
if fixup_policy == 'warn':
self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
info_dict['id'], stretched_ratio))
'%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
info_dict['id'], stretched_ratio))
else:
- assert fixup_policy == 'ignore'
+ assert fixup_policy in ('ignore', 'never')
+
+ if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
+ if fixup_policy == 'warn':
+ self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
+ info_dict['id']))
+ elif fixup_policy == 'detect_or_warn':
+ fixup_pp = FFmpegFixupM4aPP(self)
+ if fixup_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(fixup_pp)
+ else:
+ self.report_warning(
+ '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
+ info_dict['id']))
+ else:
+ assert fixup_policy in ('ignore', 'never')
try:
self.post_process(filename, info_dict)
"""Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
if (len(url_list) > 1 and
- '%' not in outtmpl
- and self.params.get('max_downloads') != 1):
+ '%' not in outtmpl and
+ self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl)
for url in url_list:
try:
# It also downloads the videos
- res = self.extract_info(url)
+ res = self.extract_info(
+ url, force_generic_extractor=self.params.get('force_generic_extractor', False))
except UnavailableVideoError:
self.report_error('unable to download video')
except MaxDownloadsReached:
return self._download_retcode
def download_with_info_file(self, info_filename):
- with io.open(info_filename, 'r', encoding='utf-8') as f:
- info = json.load(f)
+ with contextlib.closing(fileinput.FileInput(
+ [info_filename], mode='r',
+ openhook=fileinput.hook_encoded('utf-8'))) as f:
+ # FileInput doesn't have a read method, we can't call json.load
+ info = self.filter_requested_info(json.loads('\n'.join(f)))
try:
self.process_ie_result(info, download=True)
except DownloadError:
raise
return self._download_retcode
+ @staticmethod
+ def filter_requested_info(info_dict):
+ return dict(
+ (k, v) for k, v in info_dict.items()
+ if k not in ['requested_formats', 'requested_subtitles'])
+
def post_process(self, filename, ie_info):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
pps_chain.extend(ie_info['__postprocessors'])
pps_chain.extend(self._pps)
for pp in pps_chain:
- keep_video = None
- old_filename = info['filepath']
+ files_to_delete = []
try:
- keep_video_wish, info = pp.run(info)
- if keep_video_wish is not None:
- if keep_video_wish:
- keep_video = keep_video_wish
- elif keep_video is None:
- # No clear decision yet, let IE decide
- keep_video = keep_video_wish
+ files_to_delete, info = pp.run(info)
except PostProcessingError as e:
self.report_error(e.msg)
- if keep_video is False and not self.params.get('keepvideo', False):
- try:
+ if files_to_delete and not self.params.get('keepvideo', False):
+ for old_filename in files_to_delete:
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded video file')
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
def _make_archive_id(self, info_dict):
# Future-proof against any change in case
return res
def list_formats(self, info_dict):
- def line(format, idlen=20):
- return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
- format['format_id'],
- format['ext'],
- self.format_resolution(format),
- self._format_note(format),
- ))
-
formats = info_dict.get('formats', [info_dict])
- idlen = max(len('format code'),
- max(len(f['format_id']) for f in formats))
- formats_s = [
- line(f, idlen) for f in formats
+ table = [
+ [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
+ for f in formats
if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1:
- formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
- formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
+ table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
- header_line = line({
- 'format_id': 'format code', 'ext': 'extension',
- 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
- self.to_screen('[info] Available formats for %s:\n%s\n%s' %
- (info_dict['id'], header_line, '\n'.join(formats_s)))
+ header_line = ['format code', 'extension', 'resolution', 'note']
+ self.to_screen(
+ '[info] Available formats for %s:\n%s' %
+ (info_dict['id'], render_table(header_line, table)))
+
+ def list_thumbnails(self, info_dict):
+ thumbnails = info_dict.get('thumbnails')
+ if not thumbnails:
+ tn_url = info_dict.get('thumbnail')
+ if tn_url:
+ thumbnails = [{'id': '0', 'url': tn_url}]
+ else:
+ self.to_screen(
+ '[info] No thumbnails present for %s' % info_dict['id'])
+ return
+
+ self.to_screen(
+ '[info] Thumbnails for %s:' % info_dict['id'])
+ self.to_screen(render_table(
+ ['ID', 'width', 'height', 'URL'],
+ [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
+
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ if not subtitles:
+ self.to_screen('%s has no %s' % (video_id, name))
+ return
+ self.to_screen(
+ 'Available %s for %s:' % (name, video_id))
+ self.to_screen(render_table(
+ ['Language', 'formats'],
+ [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+ for lang, formats in subtitles.items()]))
def urlopen(self, req):
""" Start an HTTP download """
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
# To work around aforementioned issue we will replace request's original URL with
# percent-encoded one
- req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
+ req_is_string = isinstance(req, compat_basestring)
url = req if req_is_string else req.get_full_url()
url_escaped = escape_url(url)
if req_is_string:
req = url_escaped
else:
- req = compat_urllib_request.Request(
+ req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
+ req = req_type(
url_escaped, data=req.data, headers=req.headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
self._write_string('[debug] Git HEAD: ' + out + '\n')
- except:
+ except Exception:
try:
sys.exc_clear()
- except:
+ except Exception:
pass
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
- exe_versions = FFmpegPostProcessor.get_versions()
+ exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
exe_str = ', '.join(
'%s %s' % (exe, v)
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
- proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+ proxy_handler = PerRequestProxyHandler(proxies)
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
opener = compat_urllib_request.build_opener(
- https_handler, proxy_handler, cookie_processor, ydlh)
+ proxy_handler, https_handler, cookie_processor, ydlh)
+
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
if encoding is None:
encoding = preferredencoding()
return encoding
+
+ def _write_thumbnails(self, info_dict, filename):
+ if self.params.get('writethumbnail', False):
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails:
+ thumbnails = [thumbnails[-1]]
+ elif self.params.get('write_all_thumbnails', False):
+ thumbnails = info_dict.get('thumbnails')
+ else:
+ return
+
+ if not thumbnails:
+ # No thumbnails present, so return immediately
+ return
+
+ for t in thumbnails:
+ thumb_ext = determine_ext(t['url'], 'jpg')
+ suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
+ thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
+ t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+ self.to_screen('[%s] %s: Thumbnail %sis already present' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ else:
+ self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ try:
+ uf = self.urlopen(t['url'])
+ with open(thumb_filename, 'wb') as thumbf:
+ shutil.copyfileobj(uf, thumbf)
+ self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_warning('Unable to download thumbnail "%s": %s' %
+ (t['url'], compat_str(err)))