Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_http_client,
  28     compat_str,
  29     compat_urllib_error,
  30     compat_urllib_request,
  31     ContentTooShortError,
  32     date_from_str,
  33     DateRange,
  34     DEFAULT_OUTTMPL,
  35     determine_ext,
  36     DownloadError,
  37     encodeFilename,
  38     ExtractorError,
  39     format_bytes,
  40     formatSeconds,
  41     get_term_width,
  42     locked_file,
  43     make_HTTPS_handler,
  44     MaxDownloadsReached,
  45     PagedList,
  46     PostProcessingError,
  47     platform_name,
  48     preferredencoding,
  49     SameFileError,
  50     sanitize_filename,
  51     subtitles_filename,
  52     takewhile_inclusive,
  53     UnavailableVideoError,
  54     url_basename,
  55     write_json_file,
  56     write_string,
  57     YoutubeDLHandler,
  58     prepend_extension,
  59 )
  60 from .extractor import get_info_extractor, gen_extractors
  61 from .downloader import get_suitable_downloader
  62 from .postprocessor import FFmpegMergerPP
  63 from .version import __version__
  64
  65
  66 class YoutubeDL(object):
  67     """YoutubeDL class.
  68
  69     YoutubeDL objects are the ones responsible of downloading the
  70     actual video file and writing it to disk if the user has requested
  71     it, among some other tasks. In most cases there should be one per
  72     program. As, given a video URL, the downloader doesn't know how to
  73     extract all the needed information, task that InfoExtractors do, it
  74     has to pass the URL to one of them.
  75
  76     For this, YoutubeDL objects have a method that allows
  77     InfoExtractors to be registered in a given order. When it is passed
  78     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  79     finds that reports being able to handle it. The InfoExtractor extracts
  80     all the information about the video or videos the URL refers to, and
  81     YoutubeDL process the extracted information, possibly using a File
  82     Downloader to download the video.
  83
  84     YoutubeDL objects accept a lot of parameters. In order not to saturate
  85     the object constructor with arguments, it receives a dictionary of
  86     options instead. These options are available through the params
  87     attribute for the InfoExtractors to use. The YoutubeDL also
  88     registers itself as the downloader in charge for the InfoExtractors
  89     that are added to it, so this is a "mutual registration".
  90
  91     Available options:
  92
  93     username:          Username for authentication purposes.
  94     password:          Password for authentication purposes.
  95     videopassword:     Password for acces a video.
  96     usenetrc:          Use netrc for authentication instead.
  97     verbose:           Print additional info to stdout.
  98     quiet:             Do not print messages to stdout.
  99     no_warnings:       Do not print out anything for warnings.
 100     forceurl:          Force printing final URL.
 101     forcetitle:        Force printing title.
 102     forceid:           Force printing ID.
 103     forcethumbnail:    Force printing thumbnail URL.
 104     forcedescription:  Force printing description.
 105     forcefilename:     Force printing final filename.
 106     forceduration:     Force printing duration.
 107     forcejson:         Force printing info_dict as JSON.
 108     simulate:          Do not download the video files.
 109     format:            Video format code.
 110     format_limit:      Highest quality format to try.
 111     outtmpl:           Template for output names.
 112     restrictfilenames: Do not allow "&" and spaces in file names
 113     ignoreerrors:      Do not stop on download errors.
 114     nooverwrites:      Prevent overwriting files.
 115     playliststart:     Playlist item to start at.
 116     playlistend:       Playlist item to end at.
 117     matchtitle:        Download only matching titles.
 118     rejecttitle:       Reject downloads for matching titles.
 119     logger:            Log messages to a logging.Logger instance.
 120     logtostderr:       Log messages to stderr instead of stdout.
 121     writedescription:  Write the video description to a .description file
 122     writeinfojson:     Write the video description to a .info.json file
 123     writeannotations:  Write the video annotations to a .annotations.xml file
 124     writethumbnail:    Write the thumbnail image to a file
 125     writesubtitles:    Write the video subtitles to a file
 126     writeautomaticsub: Write the automatic subtitles to a file
 127     allsubtitles:      Downloads all the subtitles of the video
 128                        (requires writesubtitles or writeautomaticsub)
 129     listsubtitles:     Lists all available subtitles for the video
 130     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 131     subtitleslangs:    List of languages of the subtitles to download
 132     keepvideo:         Keep the video file after post-processing
 133     daterange:         A DateRange object, download only if the upload_date is in the range.
 134     skip_download:     Skip the actual download of the video file
 135     cachedir:          Location of the cache files in the filesystem.
 136                        None to disable filesystem cache.
 137     noplaylist:        Download single video instead of a playlist if in doubt.
 138     age_limit:         An integer representing the user's age in years.
 139                        Unsuitable videos for the given age are skipped.
 140     min_views:         An integer representing the minimum view count the video
 141                        must have in order to not be skipped.
 142                        Videos without view count information are always
 143                        downloaded. None for no limit.
 144     max_views:         An integer representing the maximum view count.
 145                        Videos that are more popular than that are not
 146                        downloaded.
 147                        Videos without view count information are always
 148                        downloaded. None for no limit.
 149     download_archive:  File name of a file where all downloads are recorded.
 150                        Videos already present in the file are not downloaded
 151                        again.
 152     cookiefile:        File name where cookies should be read from and dumped to.
 153     nocheckcertificate:Do not verify SSL certificates
 154     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 155                        At the moment, this is only supported by YouTube.
 156     proxy:             URL of the proxy server to use
 157     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 158     bidi_workaround:   Work around buggy terminals without bidirectional text
 159                        support, using fridibi
 160     debug_printtraffic:Print out sent and received HTTP traffic
 161     include_ads:       Download ads as well
 162     default_search:    Prepend this string if an input url is not valid.
 163                        'auto' for elaborate guessing
 164     encoding:          Use this encoding instead of the system-specified.
 165
 166     The following parameters are not used by YoutubeDL itself, they are used by
 167     the FileDownloader:
 168     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 169     noresizebuffer, retries, continuedl, noprogress, consoletitle
 170
 171     The following options are used by the post processors:
 172     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 173                        otherwise prefer avconv.
 174     """
 175
 176     params = None
 177     _ies = []
 178     _pps = []
 179     _download_retcode = None
 180     _num_downloads = None
 181     _screen_file = None
 182
 183     def __init__(self, params=None):
 184         """Create a FileDownloader object with the given options."""
 185         if params is None:
 186             params = {}
 187         self._ies = []
 188         self._ies_instances = {}
 189         self._pps = []
 190         self._progress_hooks = []
 191         self._download_retcode = 0
 192         self._num_downloads = 0
 193         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 194         self._err_file = sys.stderr
 195         self.params = params
 196
 197         if params.get('bidi_workaround', False):
 198             try:
 199                 import pty
 200                 master, slave = pty.openpty()
 201                 width = get_term_width()
 202                 if width is None:
 203                     width_args = []
 204                 else:
 205                     width_args = ['-w', str(width)]
 206                 sp_kwargs = dict(
 207                     stdin=subprocess.PIPE,
 208                     stdout=slave,
 209                     stderr=self._err_file)
 210                 try:
 211                     self._output_process = subprocess.Popen(
 212                         ['bidiv'] + width_args, **sp_kwargs
 213                     )
 214                 except OSError:
 215                     self._output_process = subprocess.Popen(
 216                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 217                 self._output_channel = os.fdopen(master, 'rb')
 218             except OSError as ose:
 219                 if ose.errno == 2:
 220                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 221                 else:
 222                     raise
 223
 224         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 225                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 226                 and not params['restrictfilenames']):
 227             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 228             self.report_warning(
 229                 'Assuming --restrict-filenames since file system encoding '
 230                 'cannot encode all charactes. '
 231                 'Set the LC_ALL environment variable to fix this.')
 232             self.params['restrictfilenames'] = True
 233
 234         if '%(stitle)s' in self.params.get('outtmpl', ''):
 235             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 236
 237         self._setup_opener()
 238
 239     def add_info_extractor(self, ie):
 240         """Add an InfoExtractor object to the end of the list."""
 241         self._ies.append(ie)
 242         self._ies_instances[ie.ie_key()] = ie
 243         ie.set_downloader(self)
 244
 245     def get_info_extractor(self, ie_key):
 246         """
 247         Get an instance of an IE with name ie_key, it will try to get one from
 248         the _ies list, if there's no instance it will create a new one and add
 249         it to the extractor list.
 250         """
 251         ie = self._ies_instances.get(ie_key)
 252         if ie is None:
 253             ie = get_info_extractor(ie_key)()
 254             self.add_info_extractor(ie)
 255         return ie
 256
 257     def add_default_info_extractors(self):
 258         """
 259         Add the InfoExtractors returned by gen_extractors to the end of the list
 260         """
 261         for ie in gen_extractors():
 262             self.add_info_extractor(ie)
 263
 264     def add_post_processor(self, pp):
 265         """Add a PostProcessor object to the end of the chain."""
 266         self._pps.append(pp)
 267         pp.set_downloader(self)
 268
 269     def add_progress_hook(self, ph):
 270         """Add the progress hook (currently only for the file downloader)"""
 271         self._progress_hooks.append(ph)
 272
 273     def _bidi_workaround(self, message):
 274         if not hasattr(self, '_output_channel'):
 275             return message
 276
 277         assert hasattr(self, '_output_process')
 278         assert isinstance(message, compat_str)
 279         line_count = message.count('\n') + 1
 280         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 281         self._output_process.stdin.flush()
 282         res = ''.join(self._output_channel.readline().decode('utf-8')
 283                        for _ in range(line_count))
 284         return res[:-len('\n')]
 285
 286     def to_screen(self, message, skip_eol=False):
 287         """Print message to stdout if not in quiet mode."""
 288         return self.to_stdout(message, skip_eol, check_quiet=True)
 289
 290     def _write_string(self, s, out=None):
 291         write_string(s, out=out, encoding=self.params.get('encoding'))
 292
 293     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 294         """Print message to stdout if not in quiet mode."""
 295         if self.params.get('logger'):
 296             self.params['logger'].debug(message)
 297         elif not check_quiet or not self.params.get('quiet', False):
 298             message = self._bidi_workaround(message)
 299             terminator = ['\n', ''][skip_eol]
 300             output = message + terminator
 301
 302             self._write_string(output, self._screen_file)
 303
 304     def to_stderr(self, message):
 305         """Print message to stderr."""
 306         assert isinstance(message, compat_str)
 307         if self.params.get('logger'):
 308             self.params['logger'].error(message)
 309         else:
 310             message = self._bidi_workaround(message)
 311             output = message + '\n'
 312             self._write_string(output, self._err_file)
 313
 314     def to_console_title(self, message):
 315         if not self.params.get('consoletitle', False):
 316             return
 317         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 318             # c_wchar_p() might not be necessary if `message` is
 319             # already of type unicode()
 320             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 321         elif 'TERM' in os.environ:
 322             self._write_string('\033]0;%s\007' % message, self._screen_file)
 323
 324     def save_console_title(self):
 325         if not self.params.get('consoletitle', False):
 326             return
 327         if 'TERM' in os.environ:
 328             # Save the title on stack
 329             self._write_string('\033[22;0t', self._screen_file)
 330
 331     def restore_console_title(self):
 332         if not self.params.get('consoletitle', False):
 333             return
 334         if 'TERM' in os.environ:
 335             # Restore the title from stack
 336             self._write_string('\033[23;0t', self._screen_file)
 337
 338     def __enter__(self):
 339         self.save_console_title()
 340         return self
 341
 342     def __exit__(self, *args):
 343         self.restore_console_title()
 344
 345         if self.params.get('cookiefile') is not None:
 346             self.cookiejar.save()
 347
 348     def trouble(self, message=None, tb=None):
 349         """Determine action to take when a download problem appears.
 350
 351         Depending on if the downloader has been configured to ignore
 352         download errors or not, this method may throw an exception or
 353         not when errors are found, after printing the message.
 354
 355         tb, if given, is additional traceback information.
 356         """
 357         if message is not None:
 358             self.to_stderr(message)
 359         if self.params.get('verbose'):
 360             if tb is None:
 361                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 362                     tb = ''
 363                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 364                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 365                     tb += compat_str(traceback.format_exc())
 366                 else:
 367                     tb_data = traceback.format_list(traceback.extract_stack())
 368                     tb = ''.join(tb_data)
 369             self.to_stderr(tb)
 370         if not self.params.get('ignoreerrors', False):
 371             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 372                 exc_info = sys.exc_info()[1].exc_info
 373             else:
 374                 exc_info = sys.exc_info()
 375             raise DownloadError(message, exc_info)
 376         self._download_retcode = 1
 377
 378     def report_warning(self, message):
 379         '''
 380         Print the message to stderr, it will be prefixed with 'WARNING:'
 381         If stderr is a tty file the 'WARNING:' will be colored
 382         '''
 383         if self.params.get('logger') is not None:
 384             self.params['logger'].warning(message)
 385         else:
 386             if self.params.get('no_warnings'):
 387                 return
 388             if self._err_file.isatty() and os.name != 'nt':
 389                 _msg_header = '\033[0;33mWARNING:\033[0m'
 390             else:
 391                 _msg_header = 'WARNING:'
 392             warning_message = '%s %s' % (_msg_header, message)
 393             self.to_stderr(warning_message)
 394
 395     def report_error(self, message, tb=None):
 396         '''
 397         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 398         in red if stderr is a tty file.
 399         '''
 400         if self._err_file.isatty() and os.name != 'nt':
 401             _msg_header = '\033[0;31mERROR:\033[0m'
 402         else:
 403             _msg_header = 'ERROR:'
 404         error_message = '%s %s' % (_msg_header, message)
 405         self.trouble(error_message, tb)
 406
 407     def report_file_already_downloaded(self, file_name):
 408         """Report file has already been fully downloaded."""
 409         try:
 410             self.to_screen('[download] %s has already been downloaded' % file_name)
 411         except UnicodeEncodeError:
 412             self.to_screen('[download] The file has already been downloaded')
 413
 414     def prepare_filename(self, info_dict):
 415         """Generate the output filename."""
 416         try:
 417             template_dict = dict(info_dict)
 418
 419             template_dict['epoch'] = int(time.time())
 420             autonumber_size = self.params.get('autonumber_size')
 421             if autonumber_size is None:
 422                 autonumber_size = 5
 423             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 424             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 425             if template_dict.get('playlist_index') is not None:
 426                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
 427             if template_dict.get('resolution') is None:
 428                 if template_dict.get('width') and template_dict.get('height'):
 429                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 430                 elif template_dict.get('height'):
 431                     template_dict['resolution'] = '%sp' % template_dict['height']
 432                 elif template_dict.get('width'):
 433                     template_dict['resolution'] = '?x%d' % template_dict['width']
 434
 435             sanitize = lambda k, v: sanitize_filename(
 436                 compat_str(v),
 437                 restricted=self.params.get('restrictfilenames'),
 438                 is_id=(k == 'id'))
 439             template_dict = dict((k, sanitize(k, v))
 440                                  for k, v in template_dict.items()
 441                                  if v is not None)
 442             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 443
 444             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 445             tmpl = os.path.expanduser(outtmpl)
 446             filename = tmpl % template_dict
 447             return filename
 448         except ValueError as err:
 449             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 450             return None
 451
 452     def _match_entry(self, info_dict):
 453         """ Returns None iff the file should be downloaded """
 454
 455         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 456         if 'title' in info_dict:
 457             # This can happen when we're just evaluating the playlist
 458             title = info_dict['title']
 459             matchtitle = self.params.get('matchtitle', False)
 460             if matchtitle:
 461                 if not re.search(matchtitle, title, re.IGNORECASE):
 462                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 463             rejecttitle = self.params.get('rejecttitle', False)
 464             if rejecttitle:
 465                 if re.search(rejecttitle, title, re.IGNORECASE):
 466                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 467         date = info_dict.get('upload_date', None)
 468         if date is not None:
 469             dateRange = self.params.get('daterange', DateRange())
 470             if date not in dateRange:
 471                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 472         view_count = info_dict.get('view_count', None)
 473         if view_count is not None:
 474             min_views = self.params.get('min_views')
 475             if min_views is not None and view_count < min_views:
 476                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 477             max_views = self.params.get('max_views')
 478             if max_views is not None and view_count > max_views:
 479                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 480         age_limit = self.params.get('age_limit')
 481         if age_limit is not None:
 482             if age_limit < info_dict.get('age_limit', 0):
 483                 return 'Skipping "' + title + '" because it is age restricted'
 484         if self.in_download_archive(info_dict):
 485             return '%s has already been recorded in archive' % video_title
 486         return None
 487
 488     @staticmethod
 489     def add_extra_info(info_dict, extra_info):
 490         '''Set the keys from extra_info in info dict if they are missing'''
 491         for key, value in extra_info.items():
 492             info_dict.setdefault(key, value)
 493
 494     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 495                      process=True):
 496         '''
 497         Returns a list with a dictionary for each video we find.
 498         If 'download', also downloads the videos.
 499         extra_info is a dict containing the extra values to add to each result
 500          '''
 501
 502         if ie_key:
 503             ies = [self.get_info_extractor(ie_key)]
 504         else:
 505             ies = self._ies
 506
 507         for ie in ies:
 508             if not ie.suitable(url):
 509                 continue
 510
 511             if not ie.working():
 512                 self.report_warning('The program functionality for this site has been marked as broken, '
 513                                     'and will probably not work.')
 514
 515             try:
 516                 ie_result = ie.extract(url)
 517                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 518                     break
 519                 if isinstance(ie_result, list):
 520                     # Backwards compatibility: old IE result format
 521                     ie_result = {
 522                         '_type': 'compat_list',
 523                         'entries': ie_result,
 524                     }
 525                 self.add_default_extra_info(ie_result, ie, url)
 526                 if process:
 527                     return self.process_ie_result(ie_result, download, extra_info)
 528                 else:
 529                     return ie_result
 530             except ExtractorError as de: # An error we somewhat expected
 531                 self.report_error(compat_str(de), de.format_traceback())
 532                 break
 533             except MaxDownloadsReached:
 534                 raise
 535             except Exception as e:
 536                 if self.params.get('ignoreerrors', False):
 537                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 538                     break
 539                 else:
 540                     raise
 541         else:
 542             self.report_error('no suitable InfoExtractor for URL %s' % url)
 543
 544     def add_default_extra_info(self, ie_result, ie, url):
 545         self.add_extra_info(ie_result, {
 546             'extractor': ie.IE_NAME,
 547             'webpage_url': url,
 548             'webpage_url_basename': url_basename(url),
 549             'extractor_key': ie.ie_key(),
 550         })
 551
 552     def process_ie_result(self, ie_result, download=True, extra_info={}):
 553         """
 554         Take the result of the ie(may be modified) and resolve all unresolved
 555         references (URLs, playlist items).
 556
 557         It will also download the videos if 'download'.
 558         Returns the resolved ie_result.
 559         """
 560
 561         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 562         if result_type == 'video':
 563             self.add_extra_info(ie_result, extra_info)
 564             return self.process_video_result(ie_result, download=download)
 565         elif result_type == 'url':
 566             # We have to add extra_info to the results because it may be
 567             # contained in a playlist
 568             return self.extract_info(ie_result['url'],
 569                                      download,
 570                                      ie_key=ie_result.get('ie_key'),
 571                                      extra_info=extra_info)
 572         elif result_type == 'url_transparent':
 573             # Use the information from the embedding page
 574             info = self.extract_info(
 575                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 576                 extra_info=extra_info, download=False, process=False)
 577
 578             def make_result(embedded_info):
 579                 new_result = ie_result.copy()
 580                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 581                           'entries', 'ie_key', 'duration',
 582                           'subtitles', 'annotations', 'format',
 583                           'thumbnail', 'thumbnails'):
 584                     if f in new_result:
 585                         del new_result[f]
 586                     if f in embedded_info:
 587                         new_result[f] = embedded_info[f]
 588                 return new_result
 589             new_result = make_result(info)
 590
 591             assert new_result.get('_type') != 'url_transparent'
 592             if new_result.get('_type') == 'compat_list':
 593                 new_result['entries'] = [
 594                     make_result(e) for e in new_result['entries']]
 595
 596             return self.process_ie_result(
 597                 new_result, download=download, extra_info=extra_info)
 598         elif result_type == 'playlist':
 599             # We process each entry in the playlist
 600             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 601             self.to_screen('[download] Downloading playlist: %s' % playlist)
 602
 603             playlist_results = []
 604
 605             playliststart = self.params.get('playliststart', 1) - 1
 606             playlistend = self.params.get('playlistend', None)
 607             # For backwards compatibility, interpret -1 as whole list
 608             if playlistend == -1:
 609                 playlistend = None
 610
 611             if isinstance(ie_result['entries'], list):
 612                 n_all_entries = len(ie_result['entries'])
 613                 entries = ie_result['entries'][playliststart:playlistend]
 614                 n_entries = len(entries)
 615                 self.to_screen(
 616                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 617                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 618             else:
 619                 assert isinstance(ie_result['entries'], PagedList)
 620                 entries = ie_result['entries'].getslice(
 621                     playliststart, playlistend)
 622                 n_entries = len(entries)
 623                 self.to_screen(
 624                     "[%s] playlist %s: Downloading %d videos" %
 625                     (ie_result['extractor'], playlist, n_entries))
 626
 627             for i, entry in enumerate(entries, 1):
 628                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 629                 extra = {
 630                     'playlist': playlist,
 631                     'playlist_index': i + playliststart,
 632                     'extractor': ie_result['extractor'],
 633                     'webpage_url': ie_result['webpage_url'],
 634                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 635                     'extractor_key': ie_result['extractor_key'],
 636                 }
 637
 638                 reason = self._match_entry(entry)
 639                 if reason is not None:
 640                     self.to_screen('[download] ' + reason)
 641                     continue
 642
 643                 entry_result = self.process_ie_result(entry,
 644                                                       download=download,
 645                                                       extra_info=extra)
 646                 playlist_results.append(entry_result)
 647             ie_result['entries'] = playlist_results
 648             return ie_result
 649         elif result_type == 'compat_list':
 650             def _fixup(r):
 651                 self.add_extra_info(r,
 652                     {
 653                         'extractor': ie_result['extractor'],
 654                         'webpage_url': ie_result['webpage_url'],
 655                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 656                         'extractor_key': ie_result['extractor_key'],
 657                     })
 658                 return r
 659             ie_result['entries'] = [
 660                 self.process_ie_result(_fixup(r), download, extra_info)
 661                 for r in ie_result['entries']
 662             ]
 663             return ie_result
 664         else:
 665             raise Exception('Invalid result type: %s' % result_type)
 666
 667     def select_format(self, format_spec, available_formats):
 668         if format_spec == 'best' or format_spec is None:
 669             return available_formats[-1]
 670         elif format_spec == 'worst':
 671             return available_formats[0]
 672         elif format_spec == 'bestaudio':
 673             audio_formats = [
 674                 f for f in available_formats
 675                 if f.get('vcodec') == 'none']
 676             if audio_formats:
 677                 return audio_formats[-1]
 678         elif format_spec == 'worstaudio':
 679             audio_formats = [
 680                 f for f in available_formats
 681                 if f.get('vcodec') == 'none']
 682             if audio_formats:
 683                 return audio_formats[0]
 684         elif format_spec == 'bestvideo':
 685             video_formats = [
 686                 f for f in available_formats
 687                 if f.get('acodec') == 'none']
 688             if video_formats:
 689                 return video_formats[-1]
 690         elif format_spec == 'worstvideo':
 691             video_formats = [
 692                 f for f in available_formats
 693                 if f.get('acodec') == 'none']
 694             if video_formats:
 695                 return video_formats[0]
 696         else:
 697             extensions = ['mp4', 'flv', 'webm', '3gp']
 698             if format_spec in extensions:
 699                 filter_f = lambda f: f['ext'] == format_spec
 700             else:
 701                 filter_f = lambda f: f['format_id'] == format_spec
 702             matches = list(filter(filter_f, available_formats))
 703             if matches:
 704                 return matches[-1]
 705         return None
 706
 707     def process_video_result(self, info_dict, download=True):
 708         assert info_dict.get('_type', 'video') == 'video'
 709
 710         if 'id' not in info_dict:
 711             raise ExtractorError('Missing "id" field in extractor result')
 712         if 'title' not in info_dict:
 713             raise ExtractorError('Missing "title" field in extractor result')
 714
 715         if 'playlist' not in info_dict:
 716             # It isn't part of a playlist
 717             info_dict['playlist'] = None
 718             info_dict['playlist_index'] = None
 719
 720         thumbnails = info_dict.get('thumbnails')
 721         if thumbnails:
 722             thumbnails.sort(key=lambda t: (
 723                 t.get('width'), t.get('height'), t.get('url')))
 724             for t in thumbnails:
 725                 if 'width' in t and 'height' in t:
 726                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 727
 728         if thumbnails and 'thumbnail' not in info_dict:
 729             info_dict['thumbnail'] = thumbnails[-1]['url']
 730
 731         if 'display_id' not in info_dict and 'id' in info_dict:
 732             info_dict['display_id'] = info_dict['id']
 733
 734         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 735             upload_date = datetime.datetime.utcfromtimestamp(
 736                 info_dict['timestamp'])
 737             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 738
 739         # This extractors handle format selection themselves
 740         if info_dict['extractor'] in ['Youku']:
 741             if download:
 742                 self.process_info(info_dict)
 743             return info_dict
 744
 745         # We now pick which formats have to be downloaded
 746         if info_dict.get('formats') is None:
 747             # There's only one format available
 748             formats = [info_dict]
 749         else:
 750             formats = info_dict['formats']
 751
 752         if not formats:
 753             raise ExtractorError('No video formats found!')
 754
 755         # We check that all the formats have the format and format_id fields
 756         for i, format in enumerate(formats):
 757             if 'url' not in format:
 758                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 759
 760             if format.get('format_id') is None:
 761                 format['format_id'] = compat_str(i)
 762             if format.get('format') is None:
 763                 format['format'] = '{id} - {res}{note}'.format(
 764                     id=format['format_id'],
 765                     res=self.format_resolution(format),
 766                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 767                 )
 768             # Automatically determine file extension if missing
 769             if 'ext' not in format:
 770                 format['ext'] = determine_ext(format['url']).lower()
 771
 772         format_limit = self.params.get('format_limit', None)
 773         if format_limit:
 774             formats = list(takewhile_inclusive(
 775                 lambda f: f['format_id'] != format_limit, formats
 776             ))
 777
 778         # TODO Central sorting goes here
 779
 780         if formats[0] is not info_dict:
 781             # only set the 'formats' fields if the original info_dict list them
 782             # otherwise we end up with a circular reference, the first (and unique)
 783             # element in the 'formats' field in info_dict is info_dict itself,
 784             # wich can't be exported to json
 785             info_dict['formats'] = formats
 786         if self.params.get('listformats', None):
 787             self.list_formats(info_dict)
 788             return
 789
 790         req_format = self.params.get('format')
 791         if req_format is None:
 792             req_format = 'best'
 793         formats_to_download = []
 794         # The -1 is for supporting YoutubeIE
 795         if req_format in ('-1', 'all'):
 796             formats_to_download = formats
 797         else:
 798             # We can accept formats requested in the format: 34/5/best, we pick
 799             # the first that is available, starting from left
 800             req_formats = req_format.split('/')
 801             for rf in req_formats:
 802                 if re.match(r'.+?\+.+?', rf) is not None:
 803                     # Two formats have been requested like '137+139'
 804                     format_1, format_2 = rf.split('+')
 805                     formats_info = (self.select_format(format_1, formats),
 806                         self.select_format(format_2, formats))
 807                     if all(formats_info):
 808                         selected_format = {
 809                             'requested_formats': formats_info,
 810                             'format': rf,
 811                             'ext': formats_info[0]['ext'],
 812                         }
 813                     else:
 814                         selected_format = None
 815                 else:
 816                     selected_format = self.select_format(rf, formats)
 817                 if selected_format is not None:
 818                     formats_to_download = [selected_format]
 819                     break
 820         if not formats_to_download:
 821             raise ExtractorError('requested format not available',
 822                                  expected=True)
 823
 824         if download:
 825             if len(formats_to_download) > 1:
 826                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 827             for format in formats_to_download:
 828                 new_info = dict(info_dict)
 829                 new_info.update(format)
 830                 self.process_info(new_info)
 831         # We update the info dict with the best quality format (backwards compatibility)
 832         info_dict.update(formats_to_download[-1])
 833         return info_dict
 834
 835     def process_info(self, info_dict):
 836         """Process a single resolved IE result."""
 837
 838         assert info_dict.get('_type', 'video') == 'video'
 839
 840         max_downloads = self.params.get('max_downloads')
 841         if max_downloads is not None:
 842             if self._num_downloads >= int(max_downloads):
 843                 raise MaxDownloadsReached()
 844
 845         info_dict['fulltitle'] = info_dict['title']
 846         if len(info_dict['title']) > 200:
 847             info_dict['title'] = info_dict['title'][:197] + '...'
 848
 849         # Keep for backwards compatibility
 850         info_dict['stitle'] = info_dict['title']
 851
 852         if 'format' not in info_dict:
 853             info_dict['format'] = info_dict['ext']
 854
 855         reason = self._match_entry(info_dict)
 856         if reason is not None:
 857             self.to_screen('[download] ' + reason)
 858             return
 859
 860         self._num_downloads += 1
 861
 862         filename = self.prepare_filename(info_dict)
 863
 864         # Forced printings
 865         if self.params.get('forcetitle', False):
 866             self.to_stdout(info_dict['fulltitle'])
 867         if self.params.get('forceid', False):
 868             self.to_stdout(info_dict['id'])
 869         if self.params.get('forceurl', False):
 870             # For RTMP URLs, also include the playpath
 871             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 872         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 873             self.to_stdout(info_dict['thumbnail'])
 874         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 875             self.to_stdout(info_dict['description'])
 876         if self.params.get('forcefilename', False) and filename is not None:
 877             self.to_stdout(filename)
 878         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 879             self.to_stdout(formatSeconds(info_dict['duration']))
 880         if self.params.get('forceformat', False):
 881             self.to_stdout(info_dict['format'])
 882         if self.params.get('forcejson', False):
 883             info_dict['_filename'] = filename
 884             self.to_stdout(json.dumps(info_dict))
 885
 886         # Do nothing else if in simulate mode
 887         if self.params.get('simulate', False):
 888             return
 889
 890         if filename is None:
 891             return
 892
 893         try:
 894             dn = os.path.dirname(encodeFilename(filename))
 895             if dn and not os.path.exists(dn):
 896                 os.makedirs(dn)
 897         except (OSError, IOError) as err:
 898             self.report_error('unable to create directory ' + compat_str(err))
 899             return
 900
 901         if self.params.get('writedescription', False):
 902             descfn = filename + '.description'
 903             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 904                 self.to_screen('[info] Video description is already present')
 905             else:
 906                 try:
 907                     self.to_screen('[info] Writing video description to: ' + descfn)
 908                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 909                         descfile.write(info_dict['description'])
 910                 except (KeyError, TypeError):
 911                     self.report_warning('There\'s no description to write.')
 912                 except (OSError, IOError):
 913                     self.report_error('Cannot write description file ' + descfn)
 914                     return
 915
 916         if self.params.get('writeannotations', False):
 917             annofn = filename + '.annotations.xml'
 918             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 919                 self.to_screen('[info] Video annotations are already present')
 920             else:
 921                 try:
 922                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 923                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 924                         annofile.write(info_dict['annotations'])
 925                 except (KeyError, TypeError):
 926                     self.report_warning('There are no annotations to write.')
 927                 except (OSError, IOError):
 928                     self.report_error('Cannot write annotations file: ' + annofn)
 929                     return
 930
 931         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 932                                        self.params.get('writeautomaticsub')])
 933
 934         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 935             # subtitles download errors are already managed as troubles in relevant IE
 936             # that way it will silently go on when used with unsupporting IE
 937             subtitles = info_dict['subtitles']
 938             sub_format = self.params.get('subtitlesformat', 'srt')
 939             for sub_lang in subtitles.keys():
 940                 sub = subtitles[sub_lang]
 941                 if sub is None:
 942                     continue
 943                 try:
 944                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 945                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 946                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 947                     else:
 948                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 949                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 950                                 subfile.write(sub)
 951                 except (OSError, IOError):
 952                     self.report_error('Cannot write subtitles file ' + sub_filename)
 953                     return
 954
 955         if self.params.get('writeinfojson', False):
 956             infofn = os.path.splitext(filename)[0] + '.info.json'
 957             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 958                 self.to_screen('[info] Video description metadata is already present')
 959             else:
 960                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 961                 try:
 962                     write_json_file(info_dict, encodeFilename(infofn))
 963                 except (OSError, IOError):
 964                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 965                     return
 966
 967         if self.params.get('writethumbnail', False):
 968             if info_dict.get('thumbnail') is not None:
 969                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 970                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 971                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 972                     self.to_screen('[%s] %s: Thumbnail is already present' %
 973                                    (info_dict['extractor'], info_dict['id']))
 974                 else:
 975                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 976                                    (info_dict['extractor'], info_dict['id']))
 977                     try:
 978                         uf = self.urlopen(info_dict['thumbnail'])
 979                         with open(thumb_filename, 'wb') as thumbf:
 980                             shutil.copyfileobj(uf, thumbf)
 981                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 982                             (info_dict['extractor'], info_dict['id'], thumb_filename))
 983                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 984                         self.report_warning('Unable to download thumbnail "%s": %s' %
 985                             (info_dict['thumbnail'], compat_str(err)))
 986
 987         if not self.params.get('skip_download', False):
 988             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 989                 success = True
 990             else:
 991                 try:
 992                     def dl(name, info):
 993                         fd = get_suitable_downloader(info)(self, self.params)
 994                         for ph in self._progress_hooks:
 995                             fd.add_progress_hook(ph)
 996                         if self.params.get('verbose'):
 997                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
 998                         return fd.download(name, info)
 999                     if info_dict.get('requested_formats') is not None:
1000                         downloaded = []
1001                         success = True
1002                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1003                         if not merger._get_executable():
1004                             postprocessors = []
1005                             self.report_warning('You have requested multiple '
1006                                 'formats but ffmpeg or avconv are not installed.'
1007                                 ' The formats won\'t be merged')
1008                         else:
1009                             postprocessors = [merger]
1010                         for f in info_dict['requested_formats']:
1011                             new_info = dict(info_dict)
1012                             new_info.update(f)
1013                             fname = self.prepare_filename(new_info)
1014                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1015                             downloaded.append(fname)
1016                             partial_success = dl(fname, new_info)
1017                             success = success and partial_success
1018                         info_dict['__postprocessors'] = postprocessors
1019                         info_dict['__files_to_merge'] = downloaded
1020                     else:
1021                         # Just a single file
1022                         success = dl(filename, info_dict)
1023                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1024                     self.report_error('unable to download video data: %s' % str(err))
1025                     return
1026                 except (OSError, IOError) as err:
1027                     raise UnavailableVideoError(err)
1028                 except (ContentTooShortError, ) as err:
1029                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1030                     return
1031
1032             if success:
1033                 try:
1034                     self.post_process(filename, info_dict)
1035                 except (PostProcessingError) as err:
1036                     self.report_error('postprocessing: %s' % str(err))
1037                     return
1038
1039         self.record_download_archive(info_dict)
1040
1041     def download(self, url_list):
1042         """Download a given list of URLs."""
1043         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1044         if (len(url_list) > 1 and
1045                 '%' not in outtmpl
1046                 and self.params.get('max_downloads') != 1):
1047             raise SameFileError(outtmpl)
1048
1049         for url in url_list:
1050             try:
1051                 #It also downloads the videos
1052                 self.extract_info(url)
1053             except UnavailableVideoError:
1054                 self.report_error('unable to download video')
1055             except MaxDownloadsReached:
1056                 self.to_screen('[info] Maximum number of downloaded files reached.')
1057                 raise
1058
1059         return self._download_retcode
1060
1061     def download_with_info_file(self, info_filename):
1062         with io.open(info_filename, 'r', encoding='utf-8') as f:
1063             info = json.load(f)
1064         try:
1065             self.process_ie_result(info, download=True)
1066         except DownloadError:
1067             webpage_url = info.get('webpage_url')
1068             if webpage_url is not None:
1069                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1070                 return self.download([webpage_url])
1071             else:
1072                 raise
1073         return self._download_retcode
1074
1075     def post_process(self, filename, ie_info):
1076         """Run all the postprocessors on the given file."""
1077         info = dict(ie_info)
1078         info['filepath'] = filename
1079         keep_video = None
1080         pps_chain = []
1081         if ie_info.get('__postprocessors') is not None:
1082             pps_chain.extend(ie_info['__postprocessors'])
1083         pps_chain.extend(self._pps)
1084         for pp in pps_chain:
1085             try:
1086                 keep_video_wish, new_info = pp.run(info)
1087                 if keep_video_wish is not None:
1088                     if keep_video_wish:
1089                         keep_video = keep_video_wish
1090                     elif keep_video is None:
1091                         # No clear decision yet, let IE decide
1092                         keep_video = keep_video_wish
1093             except PostProcessingError as e:
1094                 self.report_error(e.msg)
1095         if keep_video is False and not self.params.get('keepvideo', False):
1096             try:
1097                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1098                 os.remove(encodeFilename(filename))
1099             except (IOError, OSError):
1100                 self.report_warning('Unable to remove downloaded video file')
1101
1102     def _make_archive_id(self, info_dict):
1103         # Future-proof against any change in case
1104         # and backwards compatibility with prior versions
1105         extractor = info_dict.get('extractor_key')
1106         if extractor is None:
1107             if 'id' in info_dict:
1108                 extractor = info_dict.get('ie_key')  # key in a playlist
1109         if extractor is None:
1110             return None  # Incomplete video information
1111         return extractor.lower() + ' ' + info_dict['id']
1112
1113     def in_download_archive(self, info_dict):
1114         fn = self.params.get('download_archive')
1115         if fn is None:
1116             return False
1117
1118         vid_id = self._make_archive_id(info_dict)
1119         if vid_id is None:
1120             return False  # Incomplete video information
1121
1122         try:
1123             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1124                 for line in archive_file:
1125                     if line.strip() == vid_id:
1126                         return True
1127         except IOError as ioe:
1128             if ioe.errno != errno.ENOENT:
1129                 raise
1130         return False
1131
1132     def record_download_archive(self, info_dict):
1133         fn = self.params.get('download_archive')
1134         if fn is None:
1135             return
1136         vid_id = self._make_archive_id(info_dict)
1137         assert vid_id
1138         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1139             archive_file.write(vid_id + '\n')
1140
1141     @staticmethod
1142     def format_resolution(format, default='unknown'):
1143         if format.get('vcodec') == 'none':
1144             return 'audio only'
1145         if format.get('resolution') is not None:
1146             return format['resolution']
1147         if format.get('height') is not None:
1148             if format.get('width') is not None:
1149                 res = '%sx%s' % (format['width'], format['height'])
1150             else:
1151                 res = '%sp' % format['height']
1152         elif format.get('width') is not None:
1153             res = '?x%d' % format['width']
1154         else:
1155             res = default
1156         return res
1157
1158     def _format_note(self, fdict):
1159         res = ''
1160         if fdict.get('ext') in ['f4f', 'f4m']:
1161             res += '(unsupported) '
1162         if fdict.get('format_note') is not None:
1163             res += fdict['format_note'] + ' '
1164         if fdict.get('tbr') is not None:
1165             res += '%4dk ' % fdict['tbr']
1166         if fdict.get('container') is not None:
1167             if res:
1168                 res += ', '
1169             res += '%s container' % fdict['container']
1170         if (fdict.get('vcodec') is not None and
1171                 fdict.get('vcodec') != 'none'):
1172             if res:
1173                 res += ', '
1174             res += fdict['vcodec']
1175             if fdict.get('vbr') is not None:
1176                 res += '@'
1177         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1178             res += 'video@'
1179         if fdict.get('vbr') is not None:
1180             res += '%4dk' % fdict['vbr']
1181         if fdict.get('acodec') is not None:
1182             if res:
1183                 res += ', '
1184             if fdict['acodec'] == 'none':
1185                 res += 'video only'
1186             else:
1187                 res += '%-5s' % fdict['acodec']
1188         elif fdict.get('abr') is not None:
1189             if res:
1190                 res += ', '
1191             res += 'audio'
1192         if fdict.get('abr') is not None:
1193             res += '@%3dk' % fdict['abr']
1194         if fdict.get('asr') is not None:
1195             res += ' (%5dHz)' % fdict['asr']
1196         if fdict.get('filesize') is not None:
1197             if res:
1198                 res += ', '
1199             res += format_bytes(fdict['filesize'])
1200         elif fdict.get('filesize_approx') is not None:
1201             if res:
1202                 res += ', '
1203             res += '~' + format_bytes(fdict['filesize_approx'])
1204         return res
1205
1206     def list_formats(self, info_dict):
1207         def line(format, idlen=20):
1208             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1209                 format['format_id'],
1210                 format['ext'],
1211                 self.format_resolution(format),
1212                 self._format_note(format),
1213             ))
1214
1215         formats = info_dict.get('formats', [info_dict])
1216         idlen = max(len('format code'),
1217                     max(len(f['format_id']) for f in formats))
1218         formats_s = [line(f, idlen) for f in formats]
1219         if len(formats) > 1:
1220             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1221             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1222
1223         header_line = line({
1224             'format_id': 'format code', 'ext': 'extension',
1225             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1226         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1227                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1228
1229     def urlopen(self, req):
1230         """ Start an HTTP download """
1231         return self._opener.open(req, timeout=self._socket_timeout)
1232
1233     def print_debug_header(self):
1234         if not self.params.get('verbose'):
1235             return
1236
1237         if type('') is not compat_str:
1238             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1239             self.report_warning(
1240                 'Your Python is broken! Update to a newer and supported version')
1241
1242         encoding_str = (
1243             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1244                 locale.getpreferredencoding(),
1245                 sys.getfilesystemencoding(),
1246                 sys.stdout.encoding,
1247                 self.get_encoding()))
1248         write_string(encoding_str, encoding=None)
1249
1250         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1251         try:
1252             sp = subprocess.Popen(
1253                 ['git', 'rev-parse', '--short', 'HEAD'],
1254                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1255                 cwd=os.path.dirname(os.path.abspath(__file__)))
1256             out, err = sp.communicate()
1257             out = out.decode().strip()
1258             if re.match('[0-9a-f]+', out):
1259                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1260         except:
1261             try:
1262                 sys.exc_clear()
1263             except:
1264                 pass
1265         self._write_string('[debug] Python version %s - %s' %
1266                      (platform.python_version(), platform_name()) + '\n')
1267
1268         proxy_map = {}
1269         for handler in self._opener.handlers:
1270             if hasattr(handler, 'proxies'):
1271                 proxy_map.update(handler.proxies)
1272         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1273
1274     def _setup_opener(self):
1275         timeout_val = self.params.get('socket_timeout')
1276         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1277
1278         opts_cookiefile = self.params.get('cookiefile')
1279         opts_proxy = self.params.get('proxy')
1280
1281         if opts_cookiefile is None:
1282             self.cookiejar = compat_cookiejar.CookieJar()
1283         else:
1284             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1285                 opts_cookiefile)
1286             if os.access(opts_cookiefile, os.R_OK):
1287                 self.cookiejar.load()
1288
1289         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1290             self.cookiejar)
1291         if opts_proxy is not None:
1292             if opts_proxy == '':
1293                 proxies = {}
1294             else:
1295                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1296         else:
1297             proxies = compat_urllib_request.getproxies()
1298             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1299             if 'http' in proxies and 'https' not in proxies:
1300                 proxies['https'] = proxies['http']
1301         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1302
1303         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1304         https_handler = make_HTTPS_handler(
1305             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1306         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1307         opener = compat_urllib_request.build_opener(
1308             https_handler, proxy_handler, cookie_processor, ydlh)
1309         # Delete the default user-agent header, which would otherwise apply in
1310         # cases where our custom HTTP handler doesn't come into play
1311         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1312         opener.addheaders = []
1313         self._opener = opener
1314
1315     def encode(self, s):
1316         if isinstance(s, bytes):
1317             return s  # Already encoded
1318
1319         try:
1320             return s.encode(self.get_encoding())
1321         except UnicodeEncodeError as err:
1322             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1323             raise
1324
1325     def get_encoding(self):
1326         encoding = self.params.get('encoding')
1327         if encoding is None:
1328             encoding = preferredencoding()
1329         return encoding