Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_get_terminal_size,
  33     compat_http_client,
  34     compat_kwargs,
  35     compat_numeric_types,
  36     compat_os_name,
  37     compat_str,
  38     compat_tokenize_tokenize,
  39     compat_urllib_error,
  40     compat_urllib_request,
  41     compat_urllib_request_DataHandler,
  42 )
  43 from .utils import (
  44     age_restricted,
  45     args_to_str,
  46     ContentTooShortError,
  47     date_from_str,
  48     DateRange,
  49     DEFAULT_OUTTMPL,
  50     determine_ext,
  51     determine_protocol,
  52     DownloadError,
  53     encode_compat_str,
  54     encodeFilename,
  55     error_to_compat_str,
  56     expand_path,
  57     ExtractorError,
  58     format_bytes,
  59     formatSeconds,
  60     GeoRestrictedError,
  61     ISO3166Utils,
  62     locked_file,
  63     make_HTTPS_handler,
  64     MaxDownloadsReached,
  65     PagedList,
  66     parse_filesize,
  67     PerRequestProxyHandler,
  68     platform_name,
  69     PostProcessingError,
  70     preferredencoding,
  71     prepend_extension,
  72     register_socks_protocols,
  73     render_table,
  74     replace_extension,
  75     SameFileError,
  76     sanitize_filename,
  77     sanitize_path,
  78     sanitize_url,
  79     sanitized_Request,
  80     std_headers,
  81     subtitles_filename,
  82     UnavailableVideoError,
  83     url_basename,
  84     version_tuple,
  85     write_json_file,
  86     write_string,
  87     YoutubeDLCookieProcessor,
  88     YoutubeDLHandler,
  89 )
  90 from .cache import Cache
  91 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  92 from .downloader import get_suitable_downloader
  93 from .downloader.rtmp import rtmpdump_version
  94 from .postprocessor import (
  95     FFmpegFixupM3u8PP,
  96     FFmpegFixupM4aPP,
  97     FFmpegFixupStretchedPP,
  98     FFmpegMergerPP,
  99     FFmpegPostProcessor,
 100     get_postprocessor,
 101 )
 102 from .version import __version__
 103
 104 if compat_os_name == 'nt':
 105     import ctypes
 106
 107
 108 class YoutubeDL(object):
 109     """YoutubeDL class.
 110
 111     YoutubeDL objects are the ones responsible of downloading the
 112     actual video file and writing it to disk if the user has requested
 113     it, among some other tasks. In most cases there should be one per
 114     program. As, given a video URL, the downloader doesn't know how to
 115     extract all the needed information, task that InfoExtractors do, it
 116     has to pass the URL to one of them.
 117
 118     For this, YoutubeDL objects have a method that allows
 119     InfoExtractors to be registered in a given order. When it is passed
 120     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 121     finds that reports being able to handle it. The InfoExtractor extracts
 122     all the information about the video or videos the URL refers to, and
 123     YoutubeDL process the extracted information, possibly using a File
 124     Downloader to download the video.
 125
 126     YoutubeDL objects accept a lot of parameters. In order not to saturate
 127     the object constructor with arguments, it receives a dictionary of
 128     options instead. These options are available through the params
 129     attribute for the InfoExtractors to use. The YoutubeDL also
 130     registers itself as the downloader in charge for the InfoExtractors
 131     that are added to it, so this is a "mutual registration".
 132
 133     Available options:
 134
 135     username:          Username for authentication purposes.
 136     password:          Password for authentication purposes.
 137     videopassword:     Password for accessing a video.
 138     ap_mso:            Adobe Pass multiple-system operator identifier.
 139     ap_username:       Multiple-system operator account username.
 140     ap_password:       Multiple-system operator account password.
 141     usenetrc:          Use netrc for authentication instead.
 142     verbose:           Print additional info to stdout.
 143     quiet:             Do not print messages to stdout.
 144     no_warnings:       Do not print out anything for warnings.
 145     forceurl:          Force printing final URL.
 146     forcetitle:        Force printing title.
 147     forceid:           Force printing ID.
 148     forcethumbnail:    Force printing thumbnail URL.
 149     forcedescription:  Force printing description.
 150     forcefilename:     Force printing final filename.
 151     forceduration:     Force printing duration.
 152     forcejson:         Force printing info_dict as JSON.
 153     dump_single_json:  Force printing the info_dict of the whole playlist
 154                        (or video) as a single JSON line.
 155     simulate:          Do not download the video files.
 156     format:            Video format code. See options.py for more information.
 157     outtmpl:           Template for output names.
 158     restrictfilenames: Do not allow "&" and spaces in file names
 159     ignoreerrors:      Do not stop on download errors.
 160     force_generic_extractor: Force downloader to use the generic extractor
 161     nooverwrites:      Prevent overwriting files.
 162     playliststart:     Playlist item to start at.
 163     playlistend:       Playlist item to end at.
 164     playlist_items:    Specific indices of playlist to download.
 165     playlistreverse:   Download playlist items in reverse order.
 166     playlistrandom:    Download playlist items in random order.
 167     matchtitle:        Download only matching titles.
 168     rejecttitle:       Reject downloads for matching titles.
 169     logger:            Log messages to a logging.Logger instance.
 170     logtostderr:       Log messages to stderr instead of stdout.
 171     writedescription:  Write the video description to a .description file
 172     writeinfojson:     Write the video description to a .info.json file
 173     writeannotations:  Write the video annotations to a .annotations.xml file
 174     writethumbnail:    Write the thumbnail image to a file
 175     write_all_thumbnails:  Write all thumbnail formats to files
 176     writesubtitles:    Write the video subtitles to a file
 177     writeautomaticsub: Write the automatically generated subtitles to a file
 178     allsubtitles:      Downloads all the subtitles of the video
 179                        (requires writesubtitles or writeautomaticsub)
 180     listsubtitles:     Lists all available subtitles for the video
 181     subtitlesformat:   The format code for subtitles
 182     subtitleslangs:    List of languages of the subtitles to download
 183     keepvideo:         Keep the video file after post-processing
 184     daterange:         A DateRange object, download only if the upload_date is in the range.
 185     skip_download:     Skip the actual download of the video file
 186     cachedir:          Location of the cache files in the filesystem.
 187                        False to disable filesystem cache.
 188     noplaylist:        Download single video instead of a playlist if in doubt.
 189     age_limit:         An integer representing the user's age in years.
 190                        Unsuitable videos for the given age are skipped.
 191     min_views:         An integer representing the minimum view count the video
 192                        must have in order to not be skipped.
 193                        Videos without view count information are always
 194                        downloaded. None for no limit.
 195     max_views:         An integer representing the maximum view count.
 196                        Videos that are more popular than that are not
 197                        downloaded.
 198                        Videos without view count information are always
 199                        downloaded. None for no limit.
 200     download_archive:  File name of a file where all downloads are recorded.
 201                        Videos already present in the file are not downloaded
 202                        again.
 203     cookiefile:        File name where cookies should be read from and dumped to.
 204     nocheckcertificate:Do not verify SSL certificates
 205     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 206                        At the moment, this is only supported by YouTube.
 207     proxy:             URL of the proxy server to use
 208     geo_verification_proxy:  URL of the proxy to use for IP address verification
 209                        on geo-restricted sites. (Experimental)
 210     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 211     bidi_workaround:   Work around buggy terminals without bidirectional text
 212                        support, using fridibi
 213     debug_printtraffic:Print out sent and received HTTP traffic
 214     include_ads:       Download ads as well
 215     default_search:    Prepend this string if an input url is not valid.
 216                        'auto' for elaborate guessing
 217     encoding:          Use this encoding instead of the system-specified.
 218     extract_flat:      Do not resolve URLs, return the immediate result.
 219                        Pass in 'in_playlist' to only show this behavior for
 220                        playlist items.
 221     postprocessors:    A list of dictionaries, each with an entry
 222                        * key:  The name of the postprocessor. See
 223                                youtube_dl/postprocessor/__init__.py for a list.
 224                        as well as any further keyword arguments for the
 225                        postprocessor.
 226     progress_hooks:    A list of functions that get called on download
 227                        progress, with a dictionary with the entries
 228                        * status: One of "downloading", "error", or "finished".
 229                                  Check this first and ignore unknown values.
 230
 231                        If status is one of "downloading", or "finished", the
 232                        following properties may also be present:
 233                        * filename: The final filename (always present)
 234                        * tmpfilename: The filename we're currently writing to
 235                        * downloaded_bytes: Bytes on disk
 236                        * total_bytes: Size of the whole file, None if unknown
 237                        * total_bytes_estimate: Guess of the eventual file size,
 238                                                None if unavailable.
 239                        * elapsed: The number of seconds since download started.
 240                        * eta: The estimated time in seconds, None if unknown
 241                        * speed: The download speed in bytes/second, None if
 242                                 unknown
 243                        * fragment_index: The counter of the currently
 244                                          downloaded video fragment.
 245                        * fragment_count: The number of fragments (= individual
 246                                          files that will be merged)
 247
 248                        Progress hooks are guaranteed to be called at least once
 249                        (with status "finished") if the download is successful.
 250     merge_output_format: Extension to use when merging formats.
 251     fixup:             Automatically correct known faults of the file.
 252                        One of:
 253                        - "never": do nothing
 254                        - "warn": only emit a warning
 255                        - "detect_or_warn": check whether we can do anything
 256                                            about it, warn otherwise (default)
 257     source_address:    (Experimental) Client-side IP address to bind to.
 258     call_home:         Boolean, true iff we are allowed to contact the
 259                        youtube-dl servers for debugging.
 260     sleep_interval:    Number of seconds to sleep before each download when
 261                        used alone or a lower bound of a range for randomized
 262                        sleep before each download (minimum possible number
 263                        of seconds to sleep) when used along with
 264                        max_sleep_interval.
 265     max_sleep_interval:Upper bound of a range for randomized sleep before each
 266                        download (maximum possible number of seconds to sleep).
 267                        Must only be used along with sleep_interval.
 268                        Actual sleep time will be a random float from range
 269                        [sleep_interval; max_sleep_interval].
 270     listformats:       Print an overview of available video formats and exit.
 271     list_thumbnails:   Print a table of all thumbnails and exit.
 272     match_filter:      A function that gets called with the info_dict of
 273                        every video.
 274                        If it returns a message, the video is ignored.
 275                        If it returns None, the video is downloaded.
 276                        match_filter_func in utils.py is one example for this.
 277     no_color:          Do not emit color codes in output.
 278     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 279                        HTTP header (experimental)
 280     geo_bypass_country:
 281                        Two-letter ISO 3166-2 country code that will be used for
 282                        explicit geographic restriction bypassing via faking
 283                        X-Forwarded-For HTTP header (experimental)
 284
 285     The following options determine which downloader is picked:
 286     external_downloader: Executable of the external downloader to call.
 287                        None or unset for standard (built-in) downloader.
 288     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 289                        if True, otherwise use ffmpeg/avconv if False, otherwise
 290                        use downloader suggested by extractor if None.
 291
 292     The following parameters are not used by YoutubeDL itself, they are used by
 293     the downloader (see youtube_dl/downloader/common.py):
 294     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 295     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 296     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
 297
 298     The following options are used by the post processors:
 299     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 300                        otherwise prefer avconv.
 301     postprocessor_args: A list of additional command-line arguments for the
 302                         postprocessor.
 303     """
 304
 305     params = None
 306     _ies = []
 307     _pps = []
 308     _download_retcode = None
 309     _num_downloads = None
 310     _screen_file = None
 311
 312     def __init__(self, params=None, auto_init=True):
 313         """Create a FileDownloader object with the given options."""
 314         if params is None:
 315             params = {}
 316         self._ies = []
 317         self._ies_instances = {}
 318         self._pps = []
 319         self._progress_hooks = []
 320         self._download_retcode = 0
 321         self._num_downloads = 0
 322         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 323         self._err_file = sys.stderr
 324         self.params = {
 325             # Default parameters
 326             'nocheckcertificate': False,
 327         }
 328         self.params.update(params)
 329         self.cache = Cache(self)
 330
 331         def check_deprecated(param, option, suggestion):
 332             if self.params.get(param) is not None:
 333                 self.report_warning(
 334                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 335                 return True
 336             return False
 337
 338         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 339             if self.params.get('geo_verification_proxy') is None:
 340                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 341
 342         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 343         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 344         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 345
 346         if params.get('bidi_workaround', False):
 347             try:
 348                 import pty
 349                 master, slave = pty.openpty()
 350                 width = compat_get_terminal_size().columns
 351                 if width is None:
 352                     width_args = []
 353                 else:
 354                     width_args = ['-w', str(width)]
 355                 sp_kwargs = dict(
 356                     stdin=subprocess.PIPE,
 357                     stdout=slave,
 358                     stderr=self._err_file)
 359                 try:
 360                     self._output_process = subprocess.Popen(
 361                         ['bidiv'] + width_args, **sp_kwargs
 362                     )
 363                 except OSError:
 364                     self._output_process = subprocess.Popen(
 365                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 366                 self._output_channel = os.fdopen(master, 'rb')
 367             except OSError as ose:
 368                 if ose.errno == errno.ENOENT:
 369                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 370                 else:
 371                     raise
 372
 373         if (sys.platform != 'win32' and
 374                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 375                 not params.get('restrictfilenames', False)):
 376             # Unicode filesystem API will throw errors (#1474, #13027)
 377             self.report_warning(
 378                 'Assuming --restrict-filenames since file system encoding '
 379                 'cannot encode all characters. '
 380                 'Set the LC_ALL environment variable to fix this.')
 381             self.params['restrictfilenames'] = True
 382
 383         if isinstance(params.get('outtmpl'), bytes):
 384             self.report_warning(
 385                 'Parameter outtmpl is bytes, but should be a unicode string. '
 386                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 387
 388         self._setup_opener()
 389
 390         if auto_init:
 391             self.print_debug_header()
 392             self.add_default_info_extractors()
 393
 394         for pp_def_raw in self.params.get('postprocessors', []):
 395             pp_class = get_postprocessor(pp_def_raw['key'])
 396             pp_def = dict(pp_def_raw)
 397             del pp_def['key']
 398             pp = pp_class(self, **compat_kwargs(pp_def))
 399             self.add_post_processor(pp)
 400
 401         for ph in self.params.get('progress_hooks', []):
 402             self.add_progress_hook(ph)
 403
 404         register_socks_protocols()
 405
 406     def warn_if_short_id(self, argv):
 407         # short YouTube ID starting with dash?
 408         idxs = [
 409             i for i, a in enumerate(argv)
 410             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 411         if idxs:
 412             correct_argv = (
 413                 ['youtube-dl'] +
 414                 [a for i, a in enumerate(argv) if i not in idxs] +
 415                 ['--'] + [argv[i] for i in idxs]
 416             )
 417             self.report_warning(
 418                 'Long argument string detected. '
 419                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 420                 args_to_str(correct_argv))
 421
 422     def add_info_extractor(self, ie):
 423         """Add an InfoExtractor object to the end of the list."""
 424         self._ies.append(ie)
 425         if not isinstance(ie, type):
 426             self._ies_instances[ie.ie_key()] = ie
 427             ie.set_downloader(self)
 428
 429     def get_info_extractor(self, ie_key):
 430         """
 431         Get an instance of an IE with name ie_key, it will try to get one from
 432         the _ies list, if there's no instance it will create a new one and add
 433         it to the extractor list.
 434         """
 435         ie = self._ies_instances.get(ie_key)
 436         if ie is None:
 437             ie = get_info_extractor(ie_key)()
 438             self.add_info_extractor(ie)
 439         return ie
 440
 441     def add_default_info_extractors(self):
 442         """
 443         Add the InfoExtractors returned by gen_extractors to the end of the list
 444         """
 445         for ie in gen_extractor_classes():
 446             self.add_info_extractor(ie)
 447
 448     def add_post_processor(self, pp):
 449         """Add a PostProcessor object to the end of the chain."""
 450         self._pps.append(pp)
 451         pp.set_downloader(self)
 452
 453     def add_progress_hook(self, ph):
 454         """Add the progress hook (currently only for the file downloader)"""
 455         self._progress_hooks.append(ph)
 456
 457     def _bidi_workaround(self, message):
 458         if not hasattr(self, '_output_channel'):
 459             return message
 460
 461         assert hasattr(self, '_output_process')
 462         assert isinstance(message, compat_str)
 463         line_count = message.count('\n') + 1
 464         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 465         self._output_process.stdin.flush()
 466         res = ''.join(self._output_channel.readline().decode('utf-8')
 467                       for _ in range(line_count))
 468         return res[:-len('\n')]
 469
 470     def to_screen(self, message, skip_eol=False):
 471         """Print message to stdout if not in quiet mode."""
 472         return self.to_stdout(message, skip_eol, check_quiet=True)
 473
 474     def _write_string(self, s, out=None):
 475         write_string(s, out=out, encoding=self.params.get('encoding'))
 476
 477     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 478         """Print message to stdout if not in quiet mode."""
 479         if self.params.get('logger'):
 480             self.params['logger'].debug(message)
 481         elif not check_quiet or not self.params.get('quiet', False):
 482             message = self._bidi_workaround(message)
 483             terminator = ['\n', ''][skip_eol]
 484             output = message + terminator
 485
 486             self._write_string(output, self._screen_file)
 487
 488     def to_stderr(self, message):
 489         """Print message to stderr."""
 490         assert isinstance(message, compat_str)
 491         if self.params.get('logger'):
 492             self.params['logger'].error(message)
 493         else:
 494             message = self._bidi_workaround(message)
 495             output = message + '\n'
 496             self._write_string(output, self._err_file)
 497
 498     def to_console_title(self, message):
 499         if not self.params.get('consoletitle', False):
 500             return
 501         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 502             # c_wchar_p() might not be necessary if `message` is
 503             # already of type unicode()
 504             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 505         elif 'TERM' in os.environ:
 506             self._write_string('\033]0;%s\007' % message, self._screen_file)
 507
 508     def save_console_title(self):
 509         if not self.params.get('consoletitle', False):
 510             return
 511         if 'TERM' in os.environ:
 512             # Save the title on stack
 513             self._write_string('\033[22;0t', self._screen_file)
 514
 515     def restore_console_title(self):
 516         if not self.params.get('consoletitle', False):
 517             return
 518         if 'TERM' in os.environ:
 519             # Restore the title from stack
 520             self._write_string('\033[23;0t', self._screen_file)
 521
 522     def __enter__(self):
 523         self.save_console_title()
 524         return self
 525
 526     def __exit__(self, *args):
 527         self.restore_console_title()
 528
 529         if self.params.get('cookiefile') is not None:
 530             self.cookiejar.save()
 531
 532     def trouble(self, message=None, tb=None):
 533         """Determine action to take when a download problem appears.
 534
 535         Depending on if the downloader has been configured to ignore
 536         download errors or not, this method may throw an exception or
 537         not when errors are found, after printing the message.
 538
 539         tb, if given, is additional traceback information.
 540         """
 541         if message is not None:
 542             self.to_stderr(message)
 543         if self.params.get('verbose'):
 544             if tb is None:
 545                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 546                     tb = ''
 547                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 548                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 549                     tb += encode_compat_str(traceback.format_exc())
 550                 else:
 551                     tb_data = traceback.format_list(traceback.extract_stack())
 552                     tb = ''.join(tb_data)
 553             self.to_stderr(tb)
 554         if not self.params.get('ignoreerrors', False):
 555             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 556                 exc_info = sys.exc_info()[1].exc_info
 557             else:
 558                 exc_info = sys.exc_info()
 559             raise DownloadError(message, exc_info)
 560         self._download_retcode = 1
 561
 562     def report_warning(self, message):
 563         '''
 564         Print the message to stderr, it will be prefixed with 'WARNING:'
 565         If stderr is a tty file the 'WARNING:' will be colored
 566         '''
 567         if self.params.get('logger') is not None:
 568             self.params['logger'].warning(message)
 569         else:
 570             if self.params.get('no_warnings'):
 571                 return
 572             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 573                 _msg_header = '\033[0;33mWARNING:\033[0m'
 574             else:
 575                 _msg_header = 'WARNING:'
 576             warning_message = '%s %s' % (_msg_header, message)
 577             self.to_stderr(warning_message)
 578
 579     def report_error(self, message, tb=None):
 580         '''
 581         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 582         in red if stderr is a tty file.
 583         '''
 584         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 585             _msg_header = '\033[0;31mERROR:\033[0m'
 586         else:
 587             _msg_header = 'ERROR:'
 588         error_message = '%s %s' % (_msg_header, message)
 589         self.trouble(error_message, tb)
 590
 591     def report_file_already_downloaded(self, file_name):
 592         """Report file has already been fully downloaded."""
 593         try:
 594             self.to_screen('[download] %s has already been downloaded' % file_name)
 595         except UnicodeEncodeError:
 596             self.to_screen('[download] The file has already been downloaded')
 597
 598     def prepare_filename(self, info_dict):
 599         """Generate the output filename."""
 600         try:
 601             template_dict = dict(info_dict)
 602
 603             template_dict['epoch'] = int(time.time())
 604             autonumber_size = self.params.get('autonumber_size')
 605             if autonumber_size is None:
 606                 autonumber_size = 5
 607             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 608             if template_dict.get('resolution') is None:
 609                 if template_dict.get('width') and template_dict.get('height'):
 610                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 611                 elif template_dict.get('height'):
 612                     template_dict['resolution'] = '%sp' % template_dict['height']
 613                 elif template_dict.get('width'):
 614                     template_dict['resolution'] = '%dx?' % template_dict['width']
 615
 616             sanitize = lambda k, v: sanitize_filename(
 617                 compat_str(v),
 618                 restricted=self.params.get('restrictfilenames'),
 619                 is_id=(k == 'id' or k.endswith('_id')))
 620             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 621                                  for k, v in template_dict.items()
 622                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 623             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 624
 625             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 626
 627             # For fields playlist_index and autonumber convert all occurrences
 628             # of %(field)s to %(field)0Nd for backward compatibility
 629             field_size_compat_map = {
 630                 'playlist_index': len(str(template_dict['n_entries'])),
 631                 'autonumber': autonumber_size,
 632             }
 633             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 634             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 635             if mobj:
 636                 outtmpl = re.sub(
 637                     FIELD_SIZE_COMPAT_RE,
 638                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 639                     outtmpl)
 640
 641             NUMERIC_FIELDS = set((
 642                 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 643                 'timestamp', 'upload_year', 'upload_month', 'upload_day',
 644                 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 645                 'average_rating', 'comment_count', 'age_limit',
 646                 'start_time', 'end_time',
 647                 'chapter_number', 'season_number', 'episode_number',
 648                 'track_number', 'disc_number', 'release_year',
 649                 'playlist_index',
 650             ))
 651
 652             # Missing numeric fields used together with integer presentation types
 653             # in format specification will break the argument substitution since
 654             # string 'NA' is returned for missing fields. We will patch output
 655             # template for missing fields to meet string presentation type.
 656             for numeric_field in NUMERIC_FIELDS:
 657                 if numeric_field not in template_dict:
 658                     # As of [1] format syntax is:
 659                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 660                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 661                     FORMAT_RE = r'''(?x)
 662                         (?<!%)
 663                         %
 664                         \({0}\)  # mapping key
 665                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 666                         (?:\d+)?  # minimum field width (optional)
 667                         (?:\.\d+)?  # precision (optional)
 668                         [hlL]?  # length modifier (optional)
 669                         [diouxXeEfFgGcrs%]  # conversion type
 670                     '''
 671                     outtmpl = re.sub(
 672                         FORMAT_RE.format(numeric_field),
 673                         r'%({0})s'.format(numeric_field), outtmpl)
 674
 675             filename = expand_path(outtmpl % template_dict)
 676             # Temporary fix for #4787
 677             # 'Treat' all problem characters by passing filename through preferredencoding
 678             # to workaround encoding issues with subprocess on python2 @ Windows
 679             if sys.version_info < (3, 0) and sys.platform == 'win32':
 680                 filename = encodeFilename(filename, True).decode(preferredencoding())
 681             return sanitize_path(filename)
 682         except ValueError as err:
 683             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 684             return None
 685
 686     def _match_entry(self, info_dict, incomplete):
 687         """ Returns None iff the file should be downloaded """
 688
 689         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 690         if 'title' in info_dict:
 691             # This can happen when we're just evaluating the playlist
 692             title = info_dict['title']
 693             matchtitle = self.params.get('matchtitle', False)
 694             if matchtitle:
 695                 if not re.search(matchtitle, title, re.IGNORECASE):
 696                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 697             rejecttitle = self.params.get('rejecttitle', False)
 698             if rejecttitle:
 699                 if re.search(rejecttitle, title, re.IGNORECASE):
 700                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 701         date = info_dict.get('upload_date')
 702         if date is not None:
 703             dateRange = self.params.get('daterange', DateRange())
 704             if date not in dateRange:
 705                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 706         view_count = info_dict.get('view_count')
 707         if view_count is not None:
 708             min_views = self.params.get('min_views')
 709             if min_views is not None and view_count < min_views:
 710                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 711             max_views = self.params.get('max_views')
 712             if max_views is not None and view_count > max_views:
 713                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 714         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 715             return 'Skipping "%s" because it is age restricted' % video_title
 716         if self.in_download_archive(info_dict):
 717             return '%s has already been recorded in archive' % video_title
 718
 719         if not incomplete:
 720             match_filter = self.params.get('match_filter')
 721             if match_filter is not None:
 722                 ret = match_filter(info_dict)
 723                 if ret is not None:
 724                     return ret
 725
 726         return None
 727
 728     @staticmethod
 729     def add_extra_info(info_dict, extra_info):
 730         '''Set the keys from extra_info in info dict if they are missing'''
 731         for key, value in extra_info.items():
 732             info_dict.setdefault(key, value)
 733
 734     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 735                      process=True, force_generic_extractor=False):
 736         '''
 737         Returns a list with a dictionary for each video we find.
 738         If 'download', also downloads the videos.
 739         extra_info is a dict containing the extra values to add to each result
 740         '''
 741
 742         if not ie_key and force_generic_extractor:
 743             ie_key = 'Generic'
 744
 745         if ie_key:
 746             ies = [self.get_info_extractor(ie_key)]
 747         else:
 748             ies = self._ies
 749
 750         for ie in ies:
 751             if not ie.suitable(url):
 752                 continue
 753
 754             ie = self.get_info_extractor(ie.ie_key())
 755             if not ie.working():
 756                 self.report_warning('The program functionality for this site has been marked as broken, '
 757                                     'and will probably not work.')
 758
 759             try:
 760                 ie_result = ie.extract(url)
 761                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 762                     break
 763                 if isinstance(ie_result, list):
 764                     # Backwards compatibility: old IE result format
 765                     ie_result = {
 766                         '_type': 'compat_list',
 767                         'entries': ie_result,
 768                     }
 769                 self.add_default_extra_info(ie_result, ie, url)
 770                 if process:
 771                     return self.process_ie_result(ie_result, download, extra_info)
 772                 else:
 773                     return ie_result
 774             except GeoRestrictedError as e:
 775                 msg = e.msg
 776                 if e.countries:
 777                     msg += '\nThis video is available in %s.' % ', '.join(
 778                         map(ISO3166Utils.short2full, e.countries))
 779                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 780                 self.report_error(msg)
 781                 break
 782             except ExtractorError as e:  # An error we somewhat expected
 783                 self.report_error(compat_str(e), e.format_traceback())
 784                 break
 785             except MaxDownloadsReached:
 786                 raise
 787             except Exception as e:
 788                 if self.params.get('ignoreerrors', False):
 789                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 790                     break
 791                 else:
 792                     raise
 793         else:
 794             self.report_error('no suitable InfoExtractor for URL %s' % url)
 795
 796     def add_default_extra_info(self, ie_result, ie, url):
 797         self.add_extra_info(ie_result, {
 798             'extractor': ie.IE_NAME,
 799             'webpage_url': url,
 800             'webpage_url_basename': url_basename(url),
 801             'extractor_key': ie.ie_key(),
 802         })
 803
 804     def process_ie_result(self, ie_result, download=True, extra_info={}):
 805         """
 806         Take the result of the ie(may be modified) and resolve all unresolved
 807         references (URLs, playlist items).
 808
 809         It will also download the videos if 'download'.
 810         Returns the resolved ie_result.
 811         """
 812         result_type = ie_result.get('_type', 'video')
 813
 814         if result_type in ('url', 'url_transparent'):
 815             ie_result['url'] = sanitize_url(ie_result['url'])
 816             extract_flat = self.params.get('extract_flat', False)
 817             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 818                     extract_flat is True):
 819                 if self.params.get('forcejson', False):
 820                     self.to_stdout(json.dumps(ie_result))
 821                 return ie_result
 822
 823         if result_type == 'video':
 824             self.add_extra_info(ie_result, extra_info)
 825             return self.process_video_result(ie_result, download=download)
 826         elif result_type == 'url':
 827             # We have to add extra_info to the results because it may be
 828             # contained in a playlist
 829             return self.extract_info(ie_result['url'],
 830                                      download,
 831                                      ie_key=ie_result.get('ie_key'),
 832                                      extra_info=extra_info)
 833         elif result_type == 'url_transparent':
 834             # Use the information from the embedding page
 835             info = self.extract_info(
 836                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 837                 extra_info=extra_info, download=False, process=False)
 838
 839             # extract_info may return None when ignoreerrors is enabled and
 840             # extraction failed with an error, don't crash and return early
 841             # in this case
 842             if not info:
 843                 return info
 844
 845             force_properties = dict(
 846                 (k, v) for k, v in ie_result.items() if v is not None)
 847             for f in ('_type', 'url', 'ie_key'):
 848                 if f in force_properties:
 849                     del force_properties[f]
 850             new_result = info.copy()
 851             new_result.update(force_properties)
 852
 853             # Extracted info may not be a video result (i.e.
 854             # info.get('_type', 'video') != video) but rather an url or
 855             # url_transparent. In such cases outer metadata (from ie_result)
 856             # should be propagated to inner one (info). For this to happen
 857             # _type of info should be overridden with url_transparent. This
 858             # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
 859             if new_result.get('_type') == 'url':
 860                 new_result['_type'] = 'url_transparent'
 861
 862             return self.process_ie_result(
 863                 new_result, download=download, extra_info=extra_info)
 864         elif result_type in ('playlist', 'multi_video'):
 865             # We process each entry in the playlist
 866             playlist = ie_result.get('title') or ie_result.get('id')
 867             self.to_screen('[download] Downloading playlist: %s' % playlist)
 868
 869             playlist_results = []
 870
 871             playliststart = self.params.get('playliststart', 1) - 1
 872             playlistend = self.params.get('playlistend')
 873             # For backwards compatibility, interpret -1 as whole list
 874             if playlistend == -1:
 875                 playlistend = None
 876
 877             playlistitems_str = self.params.get('playlist_items')
 878             playlistitems = None
 879             if playlistitems_str is not None:
 880                 def iter_playlistitems(format):
 881                     for string_segment in format.split(','):
 882                         if '-' in string_segment:
 883                             start, end = string_segment.split('-')
 884                             for item in range(int(start), int(end) + 1):
 885                                 yield int(item)
 886                         else:
 887                             yield int(string_segment)
 888                 playlistitems = iter_playlistitems(playlistitems_str)
 889
 890             ie_entries = ie_result['entries']
 891             if isinstance(ie_entries, list):
 892                 n_all_entries = len(ie_entries)
 893                 if playlistitems:
 894                     entries = [
 895                         ie_entries[i - 1] for i in playlistitems
 896                         if -n_all_entries <= i - 1 < n_all_entries]
 897                 else:
 898                     entries = ie_entries[playliststart:playlistend]
 899                 n_entries = len(entries)
 900                 self.to_screen(
 901                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 902                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 903             elif isinstance(ie_entries, PagedList):
 904                 if playlistitems:
 905                     entries = []
 906                     for item in playlistitems:
 907                         entries.extend(ie_entries.getslice(
 908                             item - 1, item
 909                         ))
 910                 else:
 911                     entries = ie_entries.getslice(
 912                         playliststart, playlistend)
 913                 n_entries = len(entries)
 914                 self.to_screen(
 915                     '[%s] playlist %s: Downloading %d videos' %
 916                     (ie_result['extractor'], playlist, n_entries))
 917             else:  # iterable
 918                 if playlistitems:
 919                     entry_list = list(ie_entries)
 920                     entries = [entry_list[i - 1] for i in playlistitems]
 921                 else:
 922                     entries = list(itertools.islice(
 923                         ie_entries, playliststart, playlistend))
 924                 n_entries = len(entries)
 925                 self.to_screen(
 926                     '[%s] playlist %s: Downloading %d videos' %
 927                     (ie_result['extractor'], playlist, n_entries))
 928
 929             if self.params.get('playlistreverse', False):
 930                 entries = entries[::-1]
 931
 932             if self.params.get('playlistrandom', False):
 933                 random.shuffle(entries)
 934
 935             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
 936
 937             for i, entry in enumerate(entries, 1):
 938                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 939                 # This __x_forwarded_for_ip thing is a bit ugly but requires
 940                 # minimal changes
 941                 if x_forwarded_for:
 942                     entry['__x_forwarded_for_ip'] = x_forwarded_for
 943                 extra = {
 944                     'n_entries': n_entries,
 945                     'playlist': playlist,
 946                     'playlist_id': ie_result.get('id'),
 947                     'playlist_title': ie_result.get('title'),
 948                     'playlist_index': i + playliststart,
 949                     'extractor': ie_result['extractor'],
 950                     'webpage_url': ie_result['webpage_url'],
 951                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 952                     'extractor_key': ie_result['extractor_key'],
 953                 }
 954
 955                 reason = self._match_entry(entry, incomplete=True)
 956                 if reason is not None:
 957                     self.to_screen('[download] ' + reason)
 958                     continue
 959
 960                 entry_result = self.process_ie_result(entry,
 961                                                       download=download,
 962                                                       extra_info=extra)
 963                 playlist_results.append(entry_result)
 964             ie_result['entries'] = playlist_results
 965             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 966             return ie_result
 967         elif result_type == 'compat_list':
 968             self.report_warning(
 969                 'Extractor %s returned a compat_list result. '
 970                 'It needs to be updated.' % ie_result.get('extractor'))
 971
 972             def _fixup(r):
 973                 self.add_extra_info(
 974                     r,
 975                     {
 976                         'extractor': ie_result['extractor'],
 977                         'webpage_url': ie_result['webpage_url'],
 978                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 979                         'extractor_key': ie_result['extractor_key'],
 980                     }
 981                 )
 982                 return r
 983             ie_result['entries'] = [
 984                 self.process_ie_result(_fixup(r), download, extra_info)
 985                 for r in ie_result['entries']
 986             ]
 987             return ie_result
 988         else:
 989             raise Exception('Invalid result type: %s' % result_type)
 990
 991     def _build_format_filter(self, filter_spec):
 992         " Returns a function to filter the formats according to the filter_spec "
 993
 994         OPERATORS = {
 995             '<': operator.lt,
 996             '<=': operator.le,
 997             '>': operator.gt,
 998             '>=': operator.ge,
 999             '=': operator.eq,
1000             '!=': operator.ne,
1001         }
1002         operator_rex = re.compile(r'''(?x)\s*
1003             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
1004             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1005             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1006             $
1007             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1008         m = operator_rex.search(filter_spec)
1009         if m:
1010             try:
1011                 comparison_value = int(m.group('value'))
1012             except ValueError:
1013                 comparison_value = parse_filesize(m.group('value'))
1014                 if comparison_value is None:
1015                     comparison_value = parse_filesize(m.group('value') + 'B')
1016                 if comparison_value is None:
1017                     raise ValueError(
1018                         'Invalid value %r in format specification %r' % (
1019                             m.group('value'), filter_spec))
1020             op = OPERATORS[m.group('op')]
1021
1022         if not m:
1023             STR_OPERATORS = {
1024                 '=': operator.eq,
1025                 '!=': operator.ne,
1026                 '^=': lambda attr, value: attr.startswith(value),
1027                 '$=': lambda attr, value: attr.endswith(value),
1028                 '*=': lambda attr, value: value in attr,
1029             }
1030             str_operator_rex = re.compile(r'''(?x)
1031                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1032                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
1033                 \s*(?P<value>[a-zA-Z0-9._-]+)
1034                 \s*$
1035                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1036             m = str_operator_rex.search(filter_spec)
1037             if m:
1038                 comparison_value = m.group('value')
1039                 op = STR_OPERATORS[m.group('op')]
1040
1041         if not m:
1042             raise ValueError('Invalid filter specification %r' % filter_spec)
1043
1044         def _filter(f):
1045             actual_value = f.get(m.group('key'))
1046             if actual_value is None:
1047                 return m.group('none_inclusive')
1048             return op(actual_value, comparison_value)
1049         return _filter
1050
1051     def build_format_selector(self, format_spec):
1052         def syntax_error(note, start):
1053             message = (
1054                 'Invalid format specification: '
1055                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1056             return SyntaxError(message)
1057
1058         PICKFIRST = 'PICKFIRST'
1059         MERGE = 'MERGE'
1060         SINGLE = 'SINGLE'
1061         GROUP = 'GROUP'
1062         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1063
1064         def _parse_filter(tokens):
1065             filter_parts = []
1066             for type, string, start, _, _ in tokens:
1067                 if type == tokenize.OP and string == ']':
1068                     return ''.join(filter_parts)
1069                 else:
1070                     filter_parts.append(string)
1071
1072         def _remove_unused_ops(tokens):
1073             # Remove operators that we don't use and join them with the surrounding strings
1074             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1075             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1076             last_string, last_start, last_end, last_line = None, None, None, None
1077             for type, string, start, end, line in tokens:
1078                 if type == tokenize.OP and string == '[':
1079                     if last_string:
1080                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1081                         last_string = None
1082                     yield type, string, start, end, line
1083                     # everything inside brackets will be handled by _parse_filter
1084                     for type, string, start, end, line in tokens:
1085                         yield type, string, start, end, line
1086                         if type == tokenize.OP and string == ']':
1087                             break
1088                 elif type == tokenize.OP and string in ALLOWED_OPS:
1089                     if last_string:
1090                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1091                         last_string = None
1092                     yield type, string, start, end, line
1093                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1094                     if not last_string:
1095                         last_string = string
1096                         last_start = start
1097                         last_end = end
1098                     else:
1099                         last_string += string
1100             if last_string:
1101                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1102
1103         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1104             selectors = []
1105             current_selector = None
1106             for type, string, start, _, _ in tokens:
1107                 # ENCODING is only defined in python 3.x
1108                 if type == getattr(tokenize, 'ENCODING', None):
1109                     continue
1110                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1111                     current_selector = FormatSelector(SINGLE, string, [])
1112                 elif type == tokenize.OP:
1113                     if string == ')':
1114                         if not inside_group:
1115                             # ')' will be handled by the parentheses group
1116                             tokens.restore_last_token()
1117                         break
1118                     elif inside_merge and string in ['/', ',']:
1119                         tokens.restore_last_token()
1120                         break
1121                     elif inside_choice and string == ',':
1122                         tokens.restore_last_token()
1123                         break
1124                     elif string == ',':
1125                         if not current_selector:
1126                             raise syntax_error('"," must follow a format selector', start)
1127                         selectors.append(current_selector)
1128                         current_selector = None
1129                     elif string == '/':
1130                         if not current_selector:
1131                             raise syntax_error('"/" must follow a format selector', start)
1132                         first_choice = current_selector
1133                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1134                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1135                     elif string == '[':
1136                         if not current_selector:
1137                             current_selector = FormatSelector(SINGLE, 'best', [])
1138                         format_filter = _parse_filter(tokens)
1139                         current_selector.filters.append(format_filter)
1140                     elif string == '(':
1141                         if current_selector:
1142                             raise syntax_error('Unexpected "("', start)
1143                         group = _parse_format_selection(tokens, inside_group=True)
1144                         current_selector = FormatSelector(GROUP, group, [])
1145                     elif string == '+':
1146                         video_selector = current_selector
1147                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1148                         if not video_selector or not audio_selector:
1149                             raise syntax_error('"+" must be between two format selectors', start)
1150                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1151                     else:
1152                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1153                 elif type == tokenize.ENDMARKER:
1154                     break
1155             if current_selector:
1156                 selectors.append(current_selector)
1157             return selectors
1158
1159         def _build_selector_function(selector):
1160             if isinstance(selector, list):
1161                 fs = [_build_selector_function(s) for s in selector]
1162
1163                 def selector_function(ctx):
1164                     for f in fs:
1165                         for format in f(ctx):
1166                             yield format
1167                 return selector_function
1168             elif selector.type == GROUP:
1169                 selector_function = _build_selector_function(selector.selector)
1170             elif selector.type == PICKFIRST:
1171                 fs = [_build_selector_function(s) for s in selector.selector]
1172
1173                 def selector_function(ctx):
1174                     for f in fs:
1175                         picked_formats = list(f(ctx))
1176                         if picked_formats:
1177                             return picked_formats
1178                     return []
1179             elif selector.type == SINGLE:
1180                 format_spec = selector.selector
1181
1182                 def selector_function(ctx):
1183                     formats = list(ctx['formats'])
1184                     if not formats:
1185                         return
1186                     if format_spec == 'all':
1187                         for f in formats:
1188                             yield f
1189                     elif format_spec in ['best', 'worst', None]:
1190                         format_idx = 0 if format_spec == 'worst' else -1
1191                         audiovideo_formats = [
1192                             f for f in formats
1193                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1194                         if audiovideo_formats:
1195                             yield audiovideo_formats[format_idx]
1196                         # for extractors with incomplete formats (audio only (soundcloud)
1197                         # or video only (imgur)) we will fallback to best/worst
1198                         # {video,audio}-only format
1199                         elif ctx['incomplete_formats']:
1200                             yield formats[format_idx]
1201                     elif format_spec == 'bestaudio':
1202                         audio_formats = [
1203                             f for f in formats
1204                             if f.get('vcodec') == 'none']
1205                         if audio_formats:
1206                             yield audio_formats[-1]
1207                     elif format_spec == 'worstaudio':
1208                         audio_formats = [
1209                             f for f in formats
1210                             if f.get('vcodec') == 'none']
1211                         if audio_formats:
1212                             yield audio_formats[0]
1213                     elif format_spec == 'bestvideo':
1214                         video_formats = [
1215                             f for f in formats
1216                             if f.get('acodec') == 'none']
1217                         if video_formats:
1218                             yield video_formats[-1]
1219                     elif format_spec == 'worstvideo':
1220                         video_formats = [
1221                             f for f in formats
1222                             if f.get('acodec') == 'none']
1223                         if video_formats:
1224                             yield video_formats[0]
1225                     else:
1226                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1227                         if format_spec in extensions:
1228                             filter_f = lambda f: f['ext'] == format_spec
1229                         else:
1230                             filter_f = lambda f: f['format_id'] == format_spec
1231                         matches = list(filter(filter_f, formats))
1232                         if matches:
1233                             yield matches[-1]
1234             elif selector.type == MERGE:
1235                 def _merge(formats_info):
1236                     format_1, format_2 = [f['format_id'] for f in formats_info]
1237                     # The first format must contain the video and the
1238                     # second the audio
1239                     if formats_info[0].get('vcodec') == 'none':
1240                         self.report_error('The first format must '
1241                                           'contain the video, try using '
1242                                           '"-f %s+%s"' % (format_2, format_1))
1243                         return
1244                     # Formats must be opposite (video+audio)
1245                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1246                         self.report_error(
1247                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1248                             % (format_1, format_2))
1249                         return
1250                     output_ext = (
1251                         formats_info[0]['ext']
1252                         if self.params.get('merge_output_format') is None
1253                         else self.params['merge_output_format'])
1254                     return {
1255                         'requested_formats': formats_info,
1256                         'format': '%s+%s' % (formats_info[0].get('format'),
1257                                              formats_info[1].get('format')),
1258                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1259                                                 formats_info[1].get('format_id')),
1260                         'width': formats_info[0].get('width'),
1261                         'height': formats_info[0].get('height'),
1262                         'resolution': formats_info[0].get('resolution'),
1263                         'fps': formats_info[0].get('fps'),
1264                         'vcodec': formats_info[0].get('vcodec'),
1265                         'vbr': formats_info[0].get('vbr'),
1266                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1267                         'acodec': formats_info[1].get('acodec'),
1268                         'abr': formats_info[1].get('abr'),
1269                         'ext': output_ext,
1270                     }
1271                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1272
1273                 def selector_function(ctx):
1274                     for pair in itertools.product(
1275                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1276                         yield _merge(pair)
1277
1278             filters = [self._build_format_filter(f) for f in selector.filters]
1279
1280             def final_selector(ctx):
1281                 ctx_copy = copy.deepcopy(ctx)
1282                 for _filter in filters:
1283                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1284                 return selector_function(ctx_copy)
1285             return final_selector
1286
1287         stream = io.BytesIO(format_spec.encode('utf-8'))
1288         try:
1289             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1290         except tokenize.TokenError:
1291             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1292
1293         class TokenIterator(object):
1294             def __init__(self, tokens):
1295                 self.tokens = tokens
1296                 self.counter = 0
1297
1298             def __iter__(self):
1299                 return self
1300
1301             def __next__(self):
1302                 if self.counter >= len(self.tokens):
1303                     raise StopIteration()
1304                 value = self.tokens[self.counter]
1305                 self.counter += 1
1306                 return value
1307
1308             next = __next__
1309
1310             def restore_last_token(self):
1311                 self.counter -= 1
1312
1313         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1314         return _build_selector_function(parsed_selector)
1315
1316     def _calc_headers(self, info_dict):
1317         res = std_headers.copy()
1318
1319         add_headers = info_dict.get('http_headers')
1320         if add_headers:
1321             res.update(add_headers)
1322
1323         cookies = self._calc_cookies(info_dict)
1324         if cookies:
1325             res['Cookie'] = cookies
1326
1327         if 'X-Forwarded-For' not in res:
1328             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1329             if x_forwarded_for_ip:
1330                 res['X-Forwarded-For'] = x_forwarded_for_ip
1331
1332         return res
1333
1334     def _calc_cookies(self, info_dict):
1335         pr = sanitized_Request(info_dict['url'])
1336         self.cookiejar.add_cookie_header(pr)
1337         return pr.get_header('Cookie')
1338
1339     def process_video_result(self, info_dict, download=True):
1340         assert info_dict.get('_type', 'video') == 'video'
1341
1342         if 'id' not in info_dict:
1343             raise ExtractorError('Missing "id" field in extractor result')
1344         if 'title' not in info_dict:
1345             raise ExtractorError('Missing "title" field in extractor result')
1346
1347         if not isinstance(info_dict['id'], compat_str):
1348             self.report_warning('"id" field is not a string - forcing string conversion')
1349             info_dict['id'] = compat_str(info_dict['id'])
1350
1351         if 'playlist' not in info_dict:
1352             # It isn't part of a playlist
1353             info_dict['playlist'] = None
1354             info_dict['playlist_index'] = None
1355
1356         thumbnails = info_dict.get('thumbnails')
1357         if thumbnails is None:
1358             thumbnail = info_dict.get('thumbnail')
1359             if thumbnail:
1360                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1361         if thumbnails:
1362             thumbnails.sort(key=lambda t: (
1363                 t.get('preference') if t.get('preference') is not None else -1,
1364                 t.get('width') if t.get('width') is not None else -1,
1365                 t.get('height') if t.get('height') is not None else -1,
1366                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1367             for i, t in enumerate(thumbnails):
1368                 t['url'] = sanitize_url(t['url'])
1369                 if t.get('width') and t.get('height'):
1370                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1371                 if t.get('id') is None:
1372                     t['id'] = '%d' % i
1373
1374         if self.params.get('list_thumbnails'):
1375             self.list_thumbnails(info_dict)
1376             return
1377
1378         thumbnail = info_dict.get('thumbnail')
1379         if thumbnail:
1380             info_dict['thumbnail'] = sanitize_url(thumbnail)
1381         elif thumbnails:
1382             info_dict['thumbnail'] = thumbnails[-1]['url']
1383
1384         if 'display_id' not in info_dict and 'id' in info_dict:
1385             info_dict['display_id'] = info_dict['id']
1386
1387         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1388             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1389             # see http://bugs.python.org/issue1646728)
1390             try:
1391                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1392                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1393             except (ValueError, OverflowError, OSError):
1394                 pass
1395
1396         # Auto generate title fields corresponding to the *_number fields when missing
1397         # in order to always have clean titles. This is very common for TV series.
1398         for field in ('chapter', 'season', 'episode'):
1399             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1400                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1401
1402         subtitles = info_dict.get('subtitles')
1403         if subtitles:
1404             for _, subtitle in subtitles.items():
1405                 for subtitle_format in subtitle:
1406                     if subtitle_format.get('url'):
1407                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1408                     if subtitle_format.get('ext') is None:
1409                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1410
1411         if self.params.get('listsubtitles', False):
1412             if 'automatic_captions' in info_dict:
1413                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1414             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1415             return
1416         info_dict['requested_subtitles'] = self.process_subtitles(
1417             info_dict['id'], subtitles,
1418             info_dict.get('automatic_captions'))
1419
1420         # We now pick which formats have to be downloaded
1421         if info_dict.get('formats') is None:
1422             # There's only one format available
1423             formats = [info_dict]
1424         else:
1425             formats = info_dict['formats']
1426
1427         if not formats:
1428             raise ExtractorError('No video formats found!')
1429
1430         formats_dict = {}
1431
1432         # We check that all the formats have the format and format_id fields
1433         for i, format in enumerate(formats):
1434             if 'url' not in format:
1435                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1436
1437             format['url'] = sanitize_url(format['url'])
1438
1439             if format.get('format_id') is None:
1440                 format['format_id'] = compat_str(i)
1441             else:
1442                 # Sanitize format_id from characters used in format selector expression
1443                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1444             format_id = format['format_id']
1445             if format_id not in formats_dict:
1446                 formats_dict[format_id] = []
1447             formats_dict[format_id].append(format)
1448
1449         # Make sure all formats have unique format_id
1450         for format_id, ambiguous_formats in formats_dict.items():
1451             if len(ambiguous_formats) > 1:
1452                 for i, format in enumerate(ambiguous_formats):
1453                     format['format_id'] = '%s-%d' % (format_id, i)
1454
1455         for i, format in enumerate(formats):
1456             if format.get('format') is None:
1457                 format['format'] = '{id} - {res}{note}'.format(
1458                     id=format['format_id'],
1459                     res=self.format_resolution(format),
1460                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1461                 )
1462             # Automatically determine file extension if missing
1463             if format.get('ext') is None:
1464                 format['ext'] = determine_ext(format['url']).lower()
1465             # Automatically determine protocol if missing (useful for format
1466             # selection purposes)
1467             if format.get('protocol') is None:
1468                 format['protocol'] = determine_protocol(format)
1469             # Add HTTP headers, so that external programs can use them from the
1470             # json output
1471             full_format_info = info_dict.copy()
1472             full_format_info.update(format)
1473             format['http_headers'] = self._calc_headers(full_format_info)
1474         # Remove private housekeeping stuff
1475         if '__x_forwarded_for_ip' in info_dict:
1476             del info_dict['__x_forwarded_for_ip']
1477
1478         # TODO Central sorting goes here
1479
1480         if formats[0] is not info_dict:
1481             # only set the 'formats' fields if the original info_dict list them
1482             # otherwise we end up with a circular reference, the first (and unique)
1483             # element in the 'formats' field in info_dict is info_dict itself,
1484             # which can't be exported to json
1485             info_dict['formats'] = formats
1486         if self.params.get('listformats'):
1487             self.list_formats(info_dict)
1488             return
1489
1490         req_format = self.params.get('format')
1491         if req_format is None:
1492             req_format_list = []
1493             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1494                     not info_dict.get('is_live')):
1495                 merger = FFmpegMergerPP(self)
1496                 if merger.available and merger.can_merge():
1497                     req_format_list.append('bestvideo+bestaudio')
1498             req_format_list.append('best')
1499             req_format = '/'.join(req_format_list)
1500         format_selector = self.build_format_selector(req_format)
1501
1502         # While in format selection we may need to have an access to the original
1503         # format set in order to calculate some metrics or do some processing.
1504         # For now we need to be able to guess whether original formats provided
1505         # by extractor are incomplete or not (i.e. whether extractor provides only
1506         # video-only or audio-only formats) for proper formats selection for
1507         # extractors with such incomplete formats (see
1508         # https://github.com/rg3/youtube-dl/pull/5556).
1509         # Since formats may be filtered during format selection and may not match
1510         # the original formats the results may be incorrect. Thus original formats
1511         # or pre-calculated metrics should be passed to format selection routines
1512         # as well.
1513         # We will pass a context object containing all necessary additional data
1514         # instead of just formats.
1515         # This fixes incorrect format selection issue (see
1516         # https://github.com/rg3/youtube-dl/issues/10083).
1517         incomplete_formats = (
1518             # All formats are video-only or
1519             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1520             # all formats are audio-only
1521             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1522
1523         ctx = {
1524             'formats': formats,
1525             'incomplete_formats': incomplete_formats,
1526         }
1527
1528         formats_to_download = list(format_selector(ctx))
1529         if not formats_to_download:
1530             raise ExtractorError('requested format not available',
1531                                  expected=True)
1532
1533         if download:
1534             if len(formats_to_download) > 1:
1535                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1536             for format in formats_to_download:
1537                 new_info = dict(info_dict)
1538                 new_info.update(format)
1539                 self.process_info(new_info)
1540         # We update the info dict with the best quality format (backwards compatibility)
1541         info_dict.update(formats_to_download[-1])
1542         return info_dict
1543
1544     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1545         """Select the requested subtitles and their format"""
1546         available_subs = {}
1547         if normal_subtitles and self.params.get('writesubtitles'):
1548             available_subs.update(normal_subtitles)
1549         if automatic_captions and self.params.get('writeautomaticsub'):
1550             for lang, cap_info in automatic_captions.items():
1551                 if lang not in available_subs:
1552                     available_subs[lang] = cap_info
1553
1554         if (not self.params.get('writesubtitles') and not
1555                 self.params.get('writeautomaticsub') or not
1556                 available_subs):
1557             return None
1558
1559         if self.params.get('allsubtitles', False):
1560             requested_langs = available_subs.keys()
1561         else:
1562             if self.params.get('subtitleslangs', False):
1563                 requested_langs = self.params.get('subtitleslangs')
1564             elif 'en' in available_subs:
1565                 requested_langs = ['en']
1566             else:
1567                 requested_langs = [list(available_subs.keys())[0]]
1568
1569         formats_query = self.params.get('subtitlesformat', 'best')
1570         formats_preference = formats_query.split('/') if formats_query else []
1571         subs = {}
1572         for lang in requested_langs:
1573             formats = available_subs.get(lang)
1574             if formats is None:
1575                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1576                 continue
1577             for ext in formats_preference:
1578                 if ext == 'best':
1579                     f = formats[-1]
1580                     break
1581                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1582                 if matches:
1583                     f = matches[-1]
1584                     break
1585             else:
1586                 f = formats[-1]
1587                 self.report_warning(
1588                     'No subtitle format found matching "%s" for language %s, '
1589                     'using %s' % (formats_query, lang, f['ext']))
1590             subs[lang] = f
1591         return subs
1592
1593     def process_info(self, info_dict):
1594         """Process a single resolved IE result."""
1595
1596         assert info_dict.get('_type', 'video') == 'video'
1597
1598         max_downloads = self.params.get('max_downloads')
1599         if max_downloads is not None:
1600             if self._num_downloads >= int(max_downloads):
1601                 raise MaxDownloadsReached()
1602
1603         info_dict['fulltitle'] = info_dict['title']
1604         if len(info_dict['title']) > 200:
1605             info_dict['title'] = info_dict['title'][:197] + '...'
1606
1607         if 'format' not in info_dict:
1608             info_dict['format'] = info_dict['ext']
1609
1610         reason = self._match_entry(info_dict, incomplete=False)
1611         if reason is not None:
1612             self.to_screen('[download] ' + reason)
1613             return
1614
1615         self._num_downloads += 1
1616
1617         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1618
1619         # Forced printings
1620         if self.params.get('forcetitle', False):
1621             self.to_stdout(info_dict['fulltitle'])
1622         if self.params.get('forceid', False):
1623             self.to_stdout(info_dict['id'])
1624         if self.params.get('forceurl', False):
1625             if info_dict.get('requested_formats') is not None:
1626                 for f in info_dict['requested_formats']:
1627                     self.to_stdout(f['url'] + f.get('play_path', ''))
1628             else:
1629                 # For RTMP URLs, also include the playpath
1630                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1631         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1632             self.to_stdout(info_dict['thumbnail'])
1633         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1634             self.to_stdout(info_dict['description'])
1635         if self.params.get('forcefilename', False) and filename is not None:
1636             self.to_stdout(filename)
1637         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1638             self.to_stdout(formatSeconds(info_dict['duration']))
1639         if self.params.get('forceformat', False):
1640             self.to_stdout(info_dict['format'])
1641         if self.params.get('forcejson', False):
1642             self.to_stdout(json.dumps(info_dict))
1643
1644         # Do nothing else if in simulate mode
1645         if self.params.get('simulate', False):
1646             return
1647
1648         if filename is None:
1649             return
1650
1651         try:
1652             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1653             if dn and not os.path.exists(dn):
1654                 os.makedirs(dn)
1655         except (OSError, IOError) as err:
1656             self.report_error('unable to create directory ' + error_to_compat_str(err))
1657             return
1658
1659         if self.params.get('writedescription', False):
1660             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1661             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1662                 self.to_screen('[info] Video description is already present')
1663             elif info_dict.get('description') is None:
1664                 self.report_warning('There\'s no description to write.')
1665             else:
1666                 try:
1667                     self.to_screen('[info] Writing video description to: ' + descfn)
1668                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1669                         descfile.write(info_dict['description'])
1670                 except (OSError, IOError):
1671                     self.report_error('Cannot write description file ' + descfn)
1672                     return
1673
1674         if self.params.get('writeannotations', False):
1675             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1676             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1677                 self.to_screen('[info] Video annotations are already present')
1678             else:
1679                 try:
1680                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1681                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1682                         annofile.write(info_dict['annotations'])
1683                 except (KeyError, TypeError):
1684                     self.report_warning('There are no annotations to write.')
1685                 except (OSError, IOError):
1686                     self.report_error('Cannot write annotations file: ' + annofn)
1687                     return
1688
1689         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1690                                        self.params.get('writeautomaticsub')])
1691
1692         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1693             # subtitles download errors are already managed as troubles in relevant IE
1694             # that way it will silently go on when used with unsupporting IE
1695             subtitles = info_dict['requested_subtitles']
1696             ie = self.get_info_extractor(info_dict['extractor_key'])
1697             for sub_lang, sub_info in subtitles.items():
1698                 sub_format = sub_info['ext']
1699                 if sub_info.get('data') is not None:
1700                     sub_data = sub_info['data']
1701                 else:
1702                     try:
1703                         sub_data = ie._download_webpage(
1704                             sub_info['url'], info_dict['id'], note=False)
1705                     except ExtractorError as err:
1706                         self.report_warning('Unable to download subtitle for "%s": %s' %
1707                                             (sub_lang, error_to_compat_str(err.cause)))
1708                         continue
1709                 try:
1710                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1711                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1712                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1713                     else:
1714                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1715                         # Use newline='' to prevent conversion of newline characters
1716                         # See https://github.com/rg3/youtube-dl/issues/10268
1717                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1718                             subfile.write(sub_data)
1719                 except (OSError, IOError):
1720                     self.report_error('Cannot write subtitles file ' + sub_filename)
1721                     return
1722
1723         if self.params.get('writeinfojson', False):
1724             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1725             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1726                 self.to_screen('[info] Video description metadata is already present')
1727             else:
1728                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1729                 try:
1730                     write_json_file(self.filter_requested_info(info_dict), infofn)
1731                 except (OSError, IOError):
1732                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1733                     return
1734
1735         self._write_thumbnails(info_dict, filename)
1736
1737         if not self.params.get('skip_download', False):
1738             try:
1739                 def dl(name, info):
1740                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1741                     for ph in self._progress_hooks:
1742                         fd.add_progress_hook(ph)
1743                     if self.params.get('verbose'):
1744                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1745                     return fd.download(name, info)
1746
1747                 if info_dict.get('requested_formats') is not None:
1748                     downloaded = []
1749                     success = True
1750                     merger = FFmpegMergerPP(self)
1751                     if not merger.available:
1752                         postprocessors = []
1753                         self.report_warning('You have requested multiple '
1754                                             'formats but ffmpeg or avconv are not installed.'
1755                                             ' The formats won\'t be merged.')
1756                     else:
1757                         postprocessors = [merger]
1758
1759                     def compatible_formats(formats):
1760                         video, audio = formats
1761                         # Check extension
1762                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1763                         if video_ext and audio_ext:
1764                             COMPATIBLE_EXTS = (
1765                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1766                                 ('webm')
1767                             )
1768                             for exts in COMPATIBLE_EXTS:
1769                                 if video_ext in exts and audio_ext in exts:
1770                                     return True
1771                         # TODO: Check acodec/vcodec
1772                         return False
1773
1774                     filename_real_ext = os.path.splitext(filename)[1][1:]
1775                     filename_wo_ext = (
1776                         os.path.splitext(filename)[0]
1777                         if filename_real_ext == info_dict['ext']
1778                         else filename)
1779                     requested_formats = info_dict['requested_formats']
1780                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1781                         info_dict['ext'] = 'mkv'
1782                         self.report_warning(
1783                             'Requested formats are incompatible for merge and will be merged into mkv.')
1784                     # Ensure filename always has a correct extension for successful merge
1785                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1786                     if os.path.exists(encodeFilename(filename)):
1787                         self.to_screen(
1788                             '[download] %s has already been downloaded and '
1789                             'merged' % filename)
1790                     else:
1791                         for f in requested_formats:
1792                             new_info = dict(info_dict)
1793                             new_info.update(f)
1794                             fname = self.prepare_filename(new_info)
1795                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1796                             downloaded.append(fname)
1797                             partial_success = dl(fname, new_info)
1798                             success = success and partial_success
1799                         info_dict['__postprocessors'] = postprocessors
1800                         info_dict['__files_to_merge'] = downloaded
1801                 else:
1802                     # Just a single file
1803                     success = dl(filename, info_dict)
1804             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1805                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1806                 return
1807             except (OSError, IOError) as err:
1808                 raise UnavailableVideoError(err)
1809             except (ContentTooShortError, ) as err:
1810                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1811                 return
1812
1813             if success and filename != '-':
1814                 # Fixup content
1815                 fixup_policy = self.params.get('fixup')
1816                 if fixup_policy is None:
1817                     fixup_policy = 'detect_or_warn'
1818
1819                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1820
1821                 stretched_ratio = info_dict.get('stretched_ratio')
1822                 if stretched_ratio is not None and stretched_ratio != 1:
1823                     if fixup_policy == 'warn':
1824                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1825                             info_dict['id'], stretched_ratio))
1826                     elif fixup_policy == 'detect_or_warn':
1827                         stretched_pp = FFmpegFixupStretchedPP(self)
1828                         if stretched_pp.available:
1829                             info_dict.setdefault('__postprocessors', [])
1830                             info_dict['__postprocessors'].append(stretched_pp)
1831                         else:
1832                             self.report_warning(
1833                                 '%s: Non-uniform pixel ratio (%s). %s'
1834                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1835                     else:
1836                         assert fixup_policy in ('ignore', 'never')
1837
1838                 if (info_dict.get('requested_formats') is None and
1839                         info_dict.get('container') == 'm4a_dash'):
1840                     if fixup_policy == 'warn':
1841                         self.report_warning(
1842                             '%s: writing DASH m4a. '
1843                             'Only some players support this container.'
1844                             % info_dict['id'])
1845                     elif fixup_policy == 'detect_or_warn':
1846                         fixup_pp = FFmpegFixupM4aPP(self)
1847                         if fixup_pp.available:
1848                             info_dict.setdefault('__postprocessors', [])
1849                             info_dict['__postprocessors'].append(fixup_pp)
1850                         else:
1851                             self.report_warning(
1852                                 '%s: writing DASH m4a. '
1853                                 'Only some players support this container. %s'
1854                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1855                     else:
1856                         assert fixup_policy in ('ignore', 'never')
1857
1858                 if (info_dict.get('protocol') == 'm3u8_native' or
1859                         info_dict.get('protocol') == 'm3u8' and
1860                         self.params.get('hls_prefer_native')):
1861                     if fixup_policy == 'warn':
1862                         self.report_warning('%s: malformated aac bitstream.' % (
1863                             info_dict['id']))
1864                     elif fixup_policy == 'detect_or_warn':
1865                         fixup_pp = FFmpegFixupM3u8PP(self)
1866                         if fixup_pp.available:
1867                             info_dict.setdefault('__postprocessors', [])
1868                             info_dict['__postprocessors'].append(fixup_pp)
1869                         else:
1870                             self.report_warning(
1871                                 '%s: malformated aac bitstream. %s'
1872                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1873                     else:
1874                         assert fixup_policy in ('ignore', 'never')
1875
1876                 try:
1877                     self.post_process(filename, info_dict)
1878                 except (PostProcessingError) as err:
1879                     self.report_error('postprocessing: %s' % str(err))
1880                     return
1881                 self.record_download_archive(info_dict)
1882
1883     def download(self, url_list):
1884         """Download a given list of URLs."""
1885         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1886         if (len(url_list) > 1 and
1887                 outtmpl != '-' and
1888                 '%' not in outtmpl and
1889                 self.params.get('max_downloads') != 1):
1890             raise SameFileError(outtmpl)
1891
1892         for url in url_list:
1893             try:
1894                 # It also downloads the videos
1895                 res = self.extract_info(
1896                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1897             except UnavailableVideoError:
1898                 self.report_error('unable to download video')
1899             except MaxDownloadsReached:
1900                 self.to_screen('[info] Maximum number of downloaded files reached.')
1901                 raise
1902             else:
1903                 if self.params.get('dump_single_json', False):
1904                     self.to_stdout(json.dumps(res))
1905
1906         return self._download_retcode
1907
1908     def download_with_info_file(self, info_filename):
1909         with contextlib.closing(fileinput.FileInput(
1910                 [info_filename], mode='r',
1911                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1912             # FileInput doesn't have a read method, we can't call json.load
1913             info = self.filter_requested_info(json.loads('\n'.join(f)))
1914         try:
1915             self.process_ie_result(info, download=True)
1916         except DownloadError:
1917             webpage_url = info.get('webpage_url')
1918             if webpage_url is not None:
1919                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1920                 return self.download([webpage_url])
1921             else:
1922                 raise
1923         return self._download_retcode
1924
1925     @staticmethod
1926     def filter_requested_info(info_dict):
1927         return dict(
1928             (k, v) for k, v in info_dict.items()
1929             if k not in ['requested_formats', 'requested_subtitles'])
1930
1931     def post_process(self, filename, ie_info):
1932         """Run all the postprocessors on the given file."""
1933         info = dict(ie_info)
1934         info['filepath'] = filename
1935         pps_chain = []
1936         if ie_info.get('__postprocessors') is not None:
1937             pps_chain.extend(ie_info['__postprocessors'])
1938         pps_chain.extend(self._pps)
1939         for pp in pps_chain:
1940             files_to_delete = []
1941             try:
1942                 files_to_delete, info = pp.run(info)
1943             except PostProcessingError as e:
1944                 self.report_error(e.msg)
1945             if files_to_delete and not self.params.get('keepvideo', False):
1946                 for old_filename in files_to_delete:
1947                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1948                     try:
1949                         os.remove(encodeFilename(old_filename))
1950                     except (IOError, OSError):
1951                         self.report_warning('Unable to remove downloaded original file')
1952
1953     def _make_archive_id(self, info_dict):
1954         # Future-proof against any change in case
1955         # and backwards compatibility with prior versions
1956         extractor = info_dict.get('extractor_key')
1957         if extractor is None:
1958             if 'id' in info_dict:
1959                 extractor = info_dict.get('ie_key')  # key in a playlist
1960         if extractor is None:
1961             return None  # Incomplete video information
1962         return extractor.lower() + ' ' + info_dict['id']
1963
1964     def in_download_archive(self, info_dict):
1965         fn = self.params.get('download_archive')
1966         if fn is None:
1967             return False
1968
1969         vid_id = self._make_archive_id(info_dict)
1970         if vid_id is None:
1971             return False  # Incomplete video information
1972
1973         try:
1974             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1975                 for line in archive_file:
1976                     if line.strip() == vid_id:
1977                         return True
1978         except IOError as ioe:
1979             if ioe.errno != errno.ENOENT:
1980                 raise
1981         return False
1982
1983     def record_download_archive(self, info_dict):
1984         fn = self.params.get('download_archive')
1985         if fn is None:
1986             return
1987         vid_id = self._make_archive_id(info_dict)
1988         assert vid_id
1989         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1990             archive_file.write(vid_id + '\n')
1991
1992     @staticmethod
1993     def format_resolution(format, default='unknown'):
1994         if format.get('vcodec') == 'none':
1995             return 'audio only'
1996         if format.get('resolution') is not None:
1997             return format['resolution']
1998         if format.get('height') is not None:
1999             if format.get('width') is not None:
2000                 res = '%sx%s' % (format['width'], format['height'])
2001             else:
2002                 res = '%sp' % format['height']
2003         elif format.get('width') is not None:
2004             res = '%dx?' % format['width']
2005         else:
2006             res = default
2007         return res
2008
2009     def _format_note(self, fdict):
2010         res = ''
2011         if fdict.get('ext') in ['f4f', 'f4m']:
2012             res += '(unsupported) '
2013         if fdict.get('language'):
2014             if res:
2015                 res += ' '
2016             res += '[%s] ' % fdict['language']
2017         if fdict.get('format_note') is not None:
2018             res += fdict['format_note'] + ' '
2019         if fdict.get('tbr') is not None:
2020             res += '%4dk ' % fdict['tbr']
2021         if fdict.get('container') is not None:
2022             if res:
2023                 res += ', '
2024             res += '%s container' % fdict['container']
2025         if (fdict.get('vcodec') is not None and
2026                 fdict.get('vcodec') != 'none'):
2027             if res:
2028                 res += ', '
2029             res += fdict['vcodec']
2030             if fdict.get('vbr') is not None:
2031                 res += '@'
2032         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2033             res += 'video@'
2034         if fdict.get('vbr') is not None:
2035             res += '%4dk' % fdict['vbr']
2036         if fdict.get('fps') is not None:
2037             if res:
2038                 res += ', '
2039             res += '%sfps' % fdict['fps']
2040         if fdict.get('acodec') is not None:
2041             if res:
2042                 res += ', '
2043             if fdict['acodec'] == 'none':
2044                 res += 'video only'
2045             else:
2046                 res += '%-5s' % fdict['acodec']
2047         elif fdict.get('abr') is not None:
2048             if res:
2049                 res += ', '
2050             res += 'audio'
2051         if fdict.get('abr') is not None:
2052             res += '@%3dk' % fdict['abr']
2053         if fdict.get('asr') is not None:
2054             res += ' (%5dHz)' % fdict['asr']
2055         if fdict.get('filesize') is not None:
2056             if res:
2057                 res += ', '
2058             res += format_bytes(fdict['filesize'])
2059         elif fdict.get('filesize_approx') is not None:
2060             if res:
2061                 res += ', '
2062             res += '~' + format_bytes(fdict['filesize_approx'])
2063         return res
2064
2065     def list_formats(self, info_dict):
2066         formats = info_dict.get('formats', [info_dict])
2067         table = [
2068             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2069             for f in formats
2070             if f.get('preference') is None or f['preference'] >= -1000]
2071         if len(formats) > 1:
2072             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2073
2074         header_line = ['format code', 'extension', 'resolution', 'note']
2075         self.to_screen(
2076             '[info] Available formats for %s:\n%s' %
2077             (info_dict['id'], render_table(header_line, table)))
2078
2079     def list_thumbnails(self, info_dict):
2080         thumbnails = info_dict.get('thumbnails')
2081         if not thumbnails:
2082             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2083             return
2084
2085         self.to_screen(
2086             '[info] Thumbnails for %s:' % info_dict['id'])
2087         self.to_screen(render_table(
2088             ['ID', 'width', 'height', 'URL'],
2089             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2090
2091     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2092         if not subtitles:
2093             self.to_screen('%s has no %s' % (video_id, name))
2094             return
2095         self.to_screen(
2096             'Available %s for %s:' % (name, video_id))
2097         self.to_screen(render_table(
2098             ['Language', 'formats'],
2099             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2100                 for lang, formats in subtitles.items()]))
2101
2102     def urlopen(self, req):
2103         """ Start an HTTP download """
2104         if isinstance(req, compat_basestring):
2105             req = sanitized_Request(req)
2106         return self._opener.open(req, timeout=self._socket_timeout)
2107
2108     def print_debug_header(self):
2109         if not self.params.get('verbose'):
2110             return
2111
2112         if type('') is not compat_str:
2113             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2114             self.report_warning(
2115                 'Your Python is broken! Update to a newer and supported version')
2116
2117         stdout_encoding = getattr(
2118             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2119         encoding_str = (
2120             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2121                 locale.getpreferredencoding(),
2122                 sys.getfilesystemencoding(),
2123                 stdout_encoding,
2124                 self.get_encoding()))
2125         write_string(encoding_str, encoding=None)
2126
2127         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2128         if _LAZY_LOADER:
2129             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2130         try:
2131             sp = subprocess.Popen(
2132                 ['git', 'rev-parse', '--short', 'HEAD'],
2133                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2134                 cwd=os.path.dirname(os.path.abspath(__file__)))
2135             out, err = sp.communicate()
2136             out = out.decode().strip()
2137             if re.match('[0-9a-f]+', out):
2138                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2139         except Exception:
2140             try:
2141                 sys.exc_clear()
2142             except Exception:
2143                 pass
2144         self._write_string('[debug] Python version %s - %s\n' % (
2145             platform.python_version(), platform_name()))
2146
2147         exe_versions = FFmpegPostProcessor.get_versions(self)
2148         exe_versions['rtmpdump'] = rtmpdump_version()
2149         exe_str = ', '.join(
2150             '%s %s' % (exe, v)
2151             for exe, v in sorted(exe_versions.items())
2152             if v
2153         )
2154         if not exe_str:
2155             exe_str = 'none'
2156         self._write_string('[debug] exe versions: %s\n' % exe_str)
2157
2158         proxy_map = {}
2159         for handler in self._opener.handlers:
2160             if hasattr(handler, 'proxies'):
2161                 proxy_map.update(handler.proxies)
2162         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2163
2164         if self.params.get('call_home', False):
2165             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2166             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2167             latest_version = self.urlopen(
2168                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2169             if version_tuple(latest_version) > version_tuple(__version__):
2170                 self.report_warning(
2171                     'You are using an outdated version (newest version: %s)! '
2172                     'See https://yt-dl.org/update if you need help updating.' %
2173                     latest_version)
2174
2175     def _setup_opener(self):
2176         timeout_val = self.params.get('socket_timeout')
2177         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2178
2179         opts_cookiefile = self.params.get('cookiefile')
2180         opts_proxy = self.params.get('proxy')
2181
2182         if opts_cookiefile is None:
2183             self.cookiejar = compat_cookiejar.CookieJar()
2184         else:
2185             opts_cookiefile = expand_path(opts_cookiefile)
2186             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2187                 opts_cookiefile)
2188             if os.access(opts_cookiefile, os.R_OK):
2189                 self.cookiejar.load()
2190
2191         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2192         if opts_proxy is not None:
2193             if opts_proxy == '':
2194                 proxies = {}
2195             else:
2196                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2197         else:
2198             proxies = compat_urllib_request.getproxies()
2199             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2200             if 'http' in proxies and 'https' not in proxies:
2201                 proxies['https'] = proxies['http']
2202         proxy_handler = PerRequestProxyHandler(proxies)
2203
2204         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2205         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2206         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2207         data_handler = compat_urllib_request_DataHandler()
2208
2209         # When passing our own FileHandler instance, build_opener won't add the
2210         # default FileHandler and allows us to disable the file protocol, which
2211         # can be used for malicious purposes (see
2212         # https://github.com/rg3/youtube-dl/issues/8227)
2213         file_handler = compat_urllib_request.FileHandler()
2214
2215         def file_open(*args, **kwargs):
2216             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2217         file_handler.file_open = file_open
2218
2219         opener = compat_urllib_request.build_opener(
2220             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2221
2222         # Delete the default user-agent header, which would otherwise apply in
2223         # cases where our custom HTTP handler doesn't come into play
2224         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2225         opener.addheaders = []
2226         self._opener = opener
2227
2228     def encode(self, s):
2229         if isinstance(s, bytes):
2230             return s  # Already encoded
2231
2232         try:
2233             return s.encode(self.get_encoding())
2234         except UnicodeEncodeError as err:
2235             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2236             raise
2237
2238     def get_encoding(self):
2239         encoding = self.params.get('encoding')
2240         if encoding is None:
2241             encoding = preferredencoding()
2242         return encoding
2243
2244     def _write_thumbnails(self, info_dict, filename):
2245         if self.params.get('writethumbnail', False):
2246             thumbnails = info_dict.get('thumbnails')
2247             if thumbnails:
2248                 thumbnails = [thumbnails[-1]]
2249         elif self.params.get('write_all_thumbnails', False):
2250             thumbnails = info_dict.get('thumbnails')
2251         else:
2252             return
2253
2254         if not thumbnails:
2255             # No thumbnails present, so return immediately
2256             return
2257
2258         for t in thumbnails:
2259             thumb_ext = determine_ext(t['url'], 'jpg')
2260             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2261             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2262             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2263
2264             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2265                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2266                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2267             else:
2268                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2269                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2270                 try:
2271                     uf = self.urlopen(t['url'])
2272                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2273                         shutil.copyfileobj(uf, thumbf)
2274                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2275                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2276                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2277                     self.report_warning('Unable to download thumbnail "%s": %s' %
2278                                         (t['url'], error_to_compat_str(err)))