4 from __future__ 
import absolute_import
, unicode_literals
 
  29 from string 
import ascii_letters
 
  34     compat_get_terminal_size
, 
  40     compat_tokenize_tokenize
, 
  42     compat_urllib_request
, 
  43     compat_urllib_request_DataHandler
, 
  71     PerRequestProxyHandler
, 
  76     register_socks_protocols
, 
  87     UnavailableVideoError
, 
  93     YoutubeDLCookieProcessor
, 
  96 from .cache 
import Cache
 
  97 from .extractor 
import get_info_extractor
, gen_extractor_classes
, _LAZY_LOADER
 
  98 from .extractor
.openload 
import PhantomJSwrapper
 
  99 from .downloader 
import get_suitable_downloader
 
 100 from .downloader
.rtmp 
import rtmpdump_version
 
 101 from .postprocessor 
import ( 
 104     FFmpegFixupStretchedPP
, 
 109 from .version 
import __version__
 
 111 if compat_os_name 
== 'nt': 
 115 class YoutubeDL(object): 
 118     YoutubeDL objects are the ones responsible of downloading the 
 119     actual video file and writing it to disk if the user has requested 
 120     it, among some other tasks. In most cases there should be one per 
 121     program. As, given a video URL, the downloader doesn't know how to 
 122     extract all the needed information, task that InfoExtractors do, it 
 123     has to pass the URL to one of them. 
 125     For this, YoutubeDL objects have a method that allows 
 126     InfoExtractors to be registered in a given order. When it is passed 
 127     a URL, the YoutubeDL object handles it to the first InfoExtractor it 
 128     finds that reports being able to handle it. The InfoExtractor extracts 
 129     all the information about the video or videos the URL refers to, and 
 130     YoutubeDL process the extracted information, possibly using a File 
 131     Downloader to download the video. 
 133     YoutubeDL objects accept a lot of parameters. In order not to saturate 
 134     the object constructor with arguments, it receives a dictionary of 
 135     options instead. These options are available through the params 
 136     attribute for the InfoExtractors to use. The YoutubeDL also 
 137     registers itself as the downloader in charge for the InfoExtractors 
 138     that are added to it, so this is a "mutual registration". 
 142     username:          Username for authentication purposes. 
 143     password:          Password for authentication purposes. 
 144     videopassword:     Password for accessing a video. 
 145     ap_mso:            Adobe Pass multiple-system operator identifier. 
 146     ap_username:       Multiple-system operator account username. 
 147     ap_password:       Multiple-system operator account password. 
 148     usenetrc:          Use netrc for authentication instead. 
 149     verbose:           Print additional info to stdout. 
 150     quiet:             Do not print messages to stdout. 
 151     no_warnings:       Do not print out anything for warnings. 
 152     forceurl:          Force printing final URL. 
 153     forcetitle:        Force printing title. 
 154     forceid:           Force printing ID. 
 155     forcethumbnail:    Force printing thumbnail URL. 
 156     forcedescription:  Force printing description. 
 157     forcefilename:     Force printing final filename. 
 158     forceduration:     Force printing duration. 
 159     forcejson:         Force printing info_dict as JSON. 
 160     dump_single_json:  Force printing the info_dict of the whole playlist 
 161                        (or video) as a single JSON line. 
 162     simulate:          Do not download the video files. 
 163     format:            Video format code. See options.py for more information. 
 164     outtmpl:           Template for output names. 
 165     restrictfilenames: Do not allow "&" and spaces in file names 
 166     ignoreerrors:      Do not stop on download errors. 
 167     force_generic_extractor: Force downloader to use the generic extractor 
 168     nooverwrites:      Prevent overwriting files. 
 169     playliststart:     Playlist item to start at. 
 170     playlistend:       Playlist item to end at. 
 171     playlist_items:    Specific indices of playlist to download. 
 172     playlistreverse:   Download playlist items in reverse order. 
 173     playlistrandom:    Download playlist items in random order. 
 174     matchtitle:        Download only matching titles. 
 175     rejecttitle:       Reject downloads for matching titles. 
 176     logger:            Log messages to a logging.Logger instance. 
 177     logtostderr:       Log messages to stderr instead of stdout. 
 178     writedescription:  Write the video description to a .description file 
 179     writeinfojson:     Write the video description to a .info.json file 
 180     writeannotations:  Write the video annotations to a .annotations.xml file 
 181     writethumbnail:    Write the thumbnail image to a file 
 182     write_all_thumbnails:  Write all thumbnail formats to files 
 183     writesubtitles:    Write the video subtitles to a file 
 184     writeautomaticsub: Write the automatically generated subtitles to a file 
 185     allsubtitles:      Downloads all the subtitles of the video 
 186                        (requires writesubtitles or writeautomaticsub) 
 187     listsubtitles:     Lists all available subtitles for the video 
 188     subtitlesformat:   The format code for subtitles 
 189     subtitleslangs:    List of languages of the subtitles to download 
 190     keepvideo:         Keep the video file after post-processing 
 191     daterange:         A DateRange object, download only if the upload_date is in the range. 
 192     skip_download:     Skip the actual download of the video file 
 193     cachedir:          Location of the cache files in the filesystem. 
 194                        False to disable filesystem cache. 
 195     noplaylist:        Download single video instead of a playlist if in doubt. 
 196     age_limit:         An integer representing the user's age in years. 
 197                        Unsuitable videos for the given age are skipped. 
 198     min_views:         An integer representing the minimum view count the video 
 199                        must have in order to not be skipped. 
 200                        Videos without view count information are always 
 201                        downloaded. None for no limit. 
 202     max_views:         An integer representing the maximum view count. 
 203                        Videos that are more popular than that are not 
 205                        Videos without view count information are always 
 206                        downloaded. None for no limit. 
 207     download_archive:  File name of a file where all downloads are recorded. 
 208                        Videos already present in the file are not downloaded 
 210     cookiefile:        File name where cookies should be read from and dumped to. 
 211     nocheckcertificate:Do not verify SSL certificates 
 212     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information. 
 213                        At the moment, this is only supported by YouTube. 
 214     proxy:             URL of the proxy server to use 
 215     geo_verification_proxy:  URL of the proxy to use for IP address verification 
 216                        on geo-restricted sites. 
 217     socket_timeout:    Time to wait for unresponsive hosts, in seconds 
 218     bidi_workaround:   Work around buggy terminals without bidirectional text 
 219                        support, using fridibi 
 220     debug_printtraffic:Print out sent and received HTTP traffic 
 221     include_ads:       Download ads as well 
 222     default_search:    Prepend this string if an input url is not valid. 
 223                        'auto' for elaborate guessing 
 224     encoding:          Use this encoding instead of the system-specified. 
 225     extract_flat:      Do not resolve URLs, return the immediate result. 
 226                        Pass in 'in_playlist' to only show this behavior for 
 228     postprocessors:    A list of dictionaries, each with an entry 
 229                        * key:  The name of the postprocessor. See 
 230                                youtube_dl/postprocessor/__init__.py for a list. 
 231                        as well as any further keyword arguments for the 
 233     progress_hooks:    A list of functions that get called on download 
 234                        progress, with a dictionary with the entries 
 235                        * status: One of "downloading", "error", or "finished". 
 236                                  Check this first and ignore unknown values. 
 238                        If status is one of "downloading", or "finished", the 
 239                        following properties may also be present: 
 240                        * filename: The final filename (always present) 
 241                        * tmpfilename: The filename we're currently writing to 
 242                        * downloaded_bytes: Bytes on disk 
 243                        * total_bytes: Size of the whole file, None if unknown 
 244                        * total_bytes_estimate: Guess of the eventual file size, 
 246                        * elapsed: The number of seconds since download started. 
 247                        * eta: The estimated time in seconds, None if unknown 
 248                        * speed: The download speed in bytes/second, None if 
 250                        * fragment_index: The counter of the currently 
 251                                          downloaded video fragment. 
 252                        * fragment_count: The number of fragments (= individual 
 253                                          files that will be merged) 
 255                        Progress hooks are guaranteed to be called at least once 
 256                        (with status "finished") if the download is successful. 
 257     merge_output_format: Extension to use when merging formats. 
 258     fixup:             Automatically correct known faults of the file. 
 260                        - "never": do nothing 
 261                        - "warn": only emit a warning 
 262                        - "detect_or_warn": check whether we can do anything 
 263                                            about it, warn otherwise (default) 
 264     source_address:    Client-side IP address to bind to. 
 265     call_home:         Boolean, true iff we are allowed to contact the 
 266                        youtube-dl servers for debugging. 
 267     sleep_interval:    Number of seconds to sleep before each download when 
 268                        used alone or a lower bound of a range for randomized 
 269                        sleep before each download (minimum possible number 
 270                        of seconds to sleep) when used along with 
 272     max_sleep_interval:Upper bound of a range for randomized sleep before each 
 273                        download (maximum possible number of seconds to sleep). 
 274                        Must only be used along with sleep_interval. 
 275                        Actual sleep time will be a random float from range 
 276                        [sleep_interval; max_sleep_interval]. 
 277     listformats:       Print an overview of available video formats and exit. 
 278     list_thumbnails:   Print a table of all thumbnails and exit. 
 279     match_filter:      A function that gets called with the info_dict of 
 281                        If it returns a message, the video is ignored. 
 282                        If it returns None, the video is downloaded. 
 283                        match_filter_func in utils.py is one example for this. 
 284     no_color:          Do not emit color codes in output. 
 285     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For 
 288                        Two-letter ISO 3166-2 country code that will be used for 
 289                        explicit geographic restriction bypassing via faking 
 290                        X-Forwarded-For HTTP header 
 292                        IP range in CIDR notation that will be used similarly to 
 295     The following options determine which downloader is picked: 
 296     external_downloader: Executable of the external downloader to call. 
 297                        None or unset for standard (built-in) downloader. 
 298     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv 
 299                        if True, otherwise use ffmpeg/avconv if False, otherwise 
 300                        use downloader suggested by extractor if None. 
 302     The following parameters are not used by YoutubeDL itself, they are used by 
 303     the downloader (see youtube_dl/downloader/common.py): 
 304     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, 
 305     noresizebuffer, retries, continuedl, noprogress, consoletitle, 
 306     xattr_set_filesize, external_downloader_args, hls_use_mpegts, 
 309     The following options are used by the post processors: 
 310     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available, 
 311                        otherwise prefer ffmpeg. 
 312     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path 
 313                        to the binary or its containing directory. 
 314     postprocessor_args: A list of additional command-line arguments for the 
 317     The following options are used by the Youtube extractor: 
 318     youtube_include_dash_manifest: If True (default), DASH manifests and related 
 319                         data will be downloaded and processed by extractor. 
 320                         You can reduce network I/O by disabling it if you don't 
 324     _NUMERIC_FIELDS 
= set(( 
 325         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', 
 326         'timestamp', 'upload_year', 'upload_month', 'upload_day', 
 327         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 
 328         'average_rating', 'comment_count', 'age_limit', 
 329         'start_time', 'end_time', 
 330         'chapter_number', 'season_number', 'episode_number', 
 331         'track_number', 'disc_number', 'release_year', 
 338     _download_retcode 
= None 
 339     _num_downloads 
= None 
 342     def __init__(self
, params
=None, auto_init
=True): 
 343         """Create a FileDownloader object with the given options.""" 
 347         self
._ies
_instances 
= {} 
 349         self
._progress
_hooks 
= [] 
 350         self
._download
_retcode 
= 0 
 351         self
._num
_downloads 
= 0 
 352         self
._screen
_file 
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)] 
 353         self
._err
_file 
= sys
.stderr
 
 356             'nocheckcertificate': False, 
 358         self
.params
.update(params
) 
 359         self
.cache 
= Cache(self
) 
 361         def check_deprecated(param
, option
, suggestion
): 
 362             if self
.params
.get(param
) is not None: 
 364                     '%s is deprecated. Use %s instead.' % (option
, suggestion
)) 
 368         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): 
 369             if self
.params
.get('geo_verification_proxy') is None: 
 370                 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy'] 
 372         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits') 
 373         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') 
 374         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') 
 376         if params
.get('bidi_workaround', False): 
 379                 master
, slave 
= pty
.openpty() 
 380                 width 
= compat_get_terminal_size().columns
 
 384                     width_args 
= ['-w', str(width
)] 
 386                     stdin
=subprocess
.PIPE
, 
 388                     stderr
=self
._err
_file
) 
 390                     self
._output
_process 
= subprocess
.Popen( 
 391                         ['bidiv'] + width_args
, **sp_kwargs
 
 394                     self
._output
_process 
= subprocess
.Popen( 
 395                         ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
) 
 396                 self
._output
_channel 
= os
.fdopen(master
, 'rb') 
 397             except OSError as ose
: 
 398                 if ose
.errno 
== errno
.ENOENT
: 
 399                     self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.') 
 403         if (sys
.platform 
!= 'win32' 
 404                 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] 
 405                 and not params
.get('restrictfilenames', False)): 
 406             # Unicode filesystem API will throw errors (#1474, #13027) 
 408                 'Assuming --restrict-filenames since file system encoding ' 
 409                 'cannot encode all characters. ' 
 410                 'Set the LC_ALL environment variable to fix this.') 
 411             self
.params
['restrictfilenames'] = True 
 413         if isinstance(params
.get('outtmpl'), bytes): 
 415                 'Parameter outtmpl is bytes, but should be a unicode string. ' 
 416                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.') 
 421             self
.print_debug_header() 
 422             self
.add_default_info_extractors() 
 424         for pp_def_raw 
in self
.params
.get('postprocessors', []): 
 425             pp_class 
= get_postprocessor(pp_def_raw
['key']) 
 426             pp_def 
= dict(pp_def_raw
) 
 428             pp 
= pp_class(self
, **compat_kwargs(pp_def
)) 
 429             self
.add_post_processor(pp
) 
 431         for ph 
in self
.params
.get('progress_hooks', []): 
 432             self
.add_progress_hook(ph
) 
 434         register_socks_protocols() 
 436     def warn_if_short_id(self
, argv
): 
 437         # short YouTube ID starting with dash? 
 439             i 
for i
, a 
in enumerate(argv
) 
 440             if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)] 
 444                 + [a 
for i
, a 
in enumerate(argv
) if i 
not in idxs
] 
 445                 + ['--'] + [argv
[i
] for i 
in idxs
] 
 448                 'Long argument string detected. ' 
 449                 'Use -- to separate parameters and URLs, like this:\n%s\n' % 
 450                 args_to_str(correct_argv
)) 
 452     def add_info_extractor(self
, ie
): 
 453         """Add an InfoExtractor object to the end of the list.""" 
 455         if not isinstance(ie
, type): 
 456             self
._ies
_instances
[ie
.ie_key()] = ie
 
 457             ie
.set_downloader(self
) 
 459     def get_info_extractor(self
, ie_key
): 
 461         Get an instance of an IE with name ie_key, it will try to get one from 
 462         the _ies list, if there's no instance it will create a new one and add 
 463         it to the extractor list. 
 465         ie 
= self
._ies
_instances
.get(ie_key
) 
 467             ie 
= get_info_extractor(ie_key
)() 
 468             self
.add_info_extractor(ie
) 
 471     def add_default_info_extractors(self
): 
 473         Add the InfoExtractors returned by gen_extractors to the end of the list 
 475         for ie 
in gen_extractor_classes(): 
 476             self
.add_info_extractor(ie
) 
 478     def add_post_processor(self
, pp
): 
 479         """Add a PostProcessor object to the end of the chain.""" 
 481         pp
.set_downloader(self
) 
 483     def add_progress_hook(self
, ph
): 
 484         """Add the progress hook (currently only for the file downloader)""" 
 485         self
._progress
_hooks
.append(ph
) 
 487     def _bidi_workaround(self
, message
): 
 488         if not hasattr(self
, '_output_channel'): 
 491         assert hasattr(self
, '_output_process') 
 492         assert isinstance(message
, compat_str
) 
 493         line_count 
= message
.count('\n') + 1 
 494         self
._output
_process
.stdin
.write((message 
+ '\n').encode('utf-8')) 
 495         self
._output
_process
.stdin
.flush() 
 496         res 
= ''.join(self
._output
_channel
.readline().decode('utf-8') 
 497                       for _ 
in range(line_count
)) 
 498         return res
[:-len('\n')] 
 500     def to_screen(self
, message
, skip_eol
=False): 
 501         """Print message to stdout if not in quiet mode.""" 
 502         return self
.to_stdout(message
, skip_eol
, check_quiet
=True) 
 504     def _write_string(self
, s
, out
=None): 
 505         write_string(s
, out
=out
, encoding
=self
.params
.get('encoding')) 
 507     def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False): 
 508         """Print message to stdout if not in quiet mode.""" 
 509         if self
.params
.get('logger'): 
 510             self
.params
['logger'].debug(message
) 
 511         elif not check_quiet 
or not self
.params
.get('quiet', False): 
 512             message 
= self
._bidi
_workaround
(message
) 
 513             terminator 
= ['\n', ''][skip_eol
] 
 514             output 
= message 
+ terminator
 
 516             self
._write
_string
(output
, self
._screen
_file
) 
 518     def to_stderr(self
, message
): 
 519         """Print message to stderr.""" 
 520         assert isinstance(message
, compat_str
) 
 521         if self
.params
.get('logger'): 
 522             self
.params
['logger'].error(message
) 
 524             message 
= self
._bidi
_workaround
(message
) 
 525             output 
= message 
+ '\n' 
 526             self
._write
_string
(output
, self
._err
_file
) 
 528     def to_console_title(self
, message
): 
 529         if not self
.params
.get('consoletitle', False): 
 531         if compat_os_name 
== 'nt': 
 532             if ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 533                 # c_wchar_p() might not be necessary if `message` is 
 534                 # already of type unicode() 
 535                 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 536         elif 'TERM' in os
.environ
: 
 537             self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
) 
 539     def save_console_title(self
): 
 540         if not self
.params
.get('consoletitle', False): 
 542         if self
.params
.get('simulate', False): 
 544         if compat_os_name 
!= 'nt' and 'TERM' in os
.environ
: 
 545             # Save the title on stack 
 546             self
._write
_string
('\033[22;0t', self
._screen
_file
) 
 548     def restore_console_title(self
): 
 549         if not self
.params
.get('consoletitle', False): 
 551         if self
.params
.get('simulate', False): 
 553         if compat_os_name 
!= 'nt' and 'TERM' in os
.environ
: 
 554             # Restore the title from stack 
 555             self
._write
_string
('\033[23;0t', self
._screen
_file
) 
 558         self
.save_console_title() 
 561     def __exit__(self
, *args
): 
 562         self
.restore_console_title() 
 564         if self
.params
.get('cookiefile') is not None: 
 565             self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True) 
 567     def trouble(self
, message
=None, tb
=None): 
 568         """Determine action to take when a download problem appears. 
 570         Depending on if the downloader has been configured to ignore 
 571         download errors or not, this method may throw an exception or 
 572         not when errors are found, after printing the message. 
 574         tb, if given, is additional traceback information. 
 576         if message 
is not None: 
 577             self
.to_stderr(message
) 
 578         if self
.params
.get('verbose'): 
 580                 if sys
.exc_info()[0]:  # if .trouble has been called from an except block 
 582                     if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]: 
 583                         tb 
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
)) 
 584                     tb 
+= encode_compat_str(traceback
.format_exc()) 
 586                     tb_data 
= traceback
.format_list(traceback
.extract_stack()) 
 587                     tb 
= ''.join(tb_data
) 
 589         if not self
.params
.get('ignoreerrors', False): 
 590             if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]: 
 591                 exc_info 
= sys
.exc_info()[1].exc_info
 
 593                 exc_info 
= sys
.exc_info() 
 594             raise DownloadError(message
, exc_info
) 
 595         self
._download
_retcode 
= 1 
 597     def report_warning(self
, message
): 
 599         Print the message to stderr, it will be prefixed with 'WARNING:' 
 600         If stderr is a tty file the 'WARNING:' will be colored 
 602         if self
.params
.get('logger') is not None: 
 603             self
.params
['logger'].warning(message
) 
 605             if self
.params
.get('no_warnings'): 
 607             if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name 
!= 'nt': 
 608                 _msg_header 
= '\033[0;33mWARNING:\033[0m' 
 610                 _msg_header 
= 'WARNING:' 
 611             warning_message 
= '%s %s' % (_msg_header
, message
) 
 612             self
.to_stderr(warning_message
) 
 614     def report_error(self
, message
, tb
=None): 
 616         Do the same as trouble, but prefixes the message with 'ERROR:', colored 
 617         in red if stderr is a tty file. 
 619         if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name 
!= 'nt': 
 620             _msg_header 
= '\033[0;31mERROR:\033[0m' 
 622             _msg_header 
= 'ERROR:' 
 623         error_message 
= '%s %s' % (_msg_header
, message
) 
 624         self
.trouble(error_message
, tb
) 
 626     def report_file_already_downloaded(self
, file_name
): 
 627         """Report file has already been fully downloaded.""" 
 629             self
.to_screen('[download] %s has already been downloaded' % file_name
) 
 630         except UnicodeEncodeError: 
 631             self
.to_screen('[download] The file has already been downloaded') 
 633     def prepare_filename(self
, info_dict
): 
 634         """Generate the output filename.""" 
 636             template_dict 
= dict(info_dict
) 
 638             template_dict
['epoch'] = int(time
.time()) 
 639             autonumber_size 
= self
.params
.get('autonumber_size') 
 640             if autonumber_size 
is None: 
 642             template_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
 
 643             if template_dict
.get('resolution') is None: 
 644                 if template_dict
.get('width') and template_dict
.get('height'): 
 645                     template_dict
['resolution'] = '%dx%d' % (template_dict
['width'], template_dict
['height']) 
 646                 elif template_dict
.get('height'): 
 647                     template_dict
['resolution'] = '%sp' % template_dict
['height'] 
 648                 elif template_dict
.get('width'): 
 649                     template_dict
['resolution'] = '%dx?' % template_dict
['width'] 
 651             sanitize 
= lambda k
, v
: sanitize_filename( 
 653                 restricted
=self
.params
.get('restrictfilenames'), 
 654                 is_id
=(k 
== 'id' or k
.endswith('_id'))) 
 655             template_dict 
= dict((k
, v 
if isinstance(v
, compat_numeric_types
) else sanitize(k
, v
)) 
 656                                  for k
, v 
in template_dict
.items() 
 657                                  if v 
is not None and not isinstance(v
, (list, tuple, dict))) 
 658             template_dict 
= collections
.defaultdict(lambda: 'NA', template_dict
) 
 660             outtmpl 
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) 
 662             # For fields playlist_index and autonumber convert all occurrences 
 663             # of %(field)s to %(field)0Nd for backward compatibility 
 664             field_size_compat_map 
= { 
 665                 'playlist_index': len(str(template_dict
['n_entries'])), 
 666                 'autonumber': autonumber_size
, 
 668             FIELD_SIZE_COMPAT_RE 
= r
'(?<!%)%\((?P<field>autonumber|playlist_index)\)s' 
 669             mobj 
= re
.search(FIELD_SIZE_COMPAT_RE
, outtmpl
) 
 672                     FIELD_SIZE_COMPAT_RE
, 
 673                     r
'%%(\1)0%dd' % field_size_compat_map
[mobj
.group('field')], 
 676             # Missing numeric fields used together with integer presentation types 
 677             # in format specification will break the argument substitution since 
 678             # string 'NA' is returned for missing fields. We will patch output 
 679             # template for missing fields to meet string presentation type. 
 680             for numeric_field 
in self
._NUMERIC
_FIELDS
: 
 681                 if numeric_field 
not in template_dict
: 
 682                     # As of [1] format syntax is: 
 683                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type 
 684                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting 
 688                         \({0}\)  # mapping key 
 689                         (?:[#0\-+ ]+)?  # conversion flags (optional) 
 690                         (?:\d+)?  # minimum field width (optional) 
 691                         (?:\.\d+)?  # precision (optional) 
 692                         [hlL]?  # length modifier (optional) 
 693                         [diouxXeEfFgGcrs%]  # conversion type 
 696                         FORMAT_RE
.format(numeric_field
), 
 697                         r
'%({0})s'.format(numeric_field
), outtmpl
) 
 699             # expand_path translates '%%' into '%' and '$$' into '$' 
 700             # correspondingly that is not what we want since we need to keep 
 701             # '%%' intact for template dict substitution step. Working around 
 702             # with boundary-alike separator hack. 
 703             sep 
= ''.join([random
.choice(ascii_letters
) for _ 
in range(32)]) 
 704             outtmpl 
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
)) 
 706             # outtmpl should be expand_path'ed before template dict substitution 
 707             # because meta fields may contain env variables we don't want to 
 708             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and 
 709             # title "Hello $PATH", we don't want `$PATH` to be expanded. 
 710             filename 
= expand_path(outtmpl
).replace(sep
, '') % template_dict
 
 712             # Temporary fix for #4787 
 713             # 'Treat' all problem characters by passing filename through preferredencoding 
 714             # to workaround encoding issues with subprocess on python2 @ Windows 
 715             if sys
.version_info 
< (3, 0) and sys
.platform 
== 'win32': 
 716                 filename 
= encodeFilename(filename
, True).decode(preferredencoding()) 
 717             return sanitize_path(filename
) 
 718         except ValueError as err
: 
 719             self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')') 
 722     def _match_entry(self
, info_dict
, incomplete
): 
 723         """ Returns None iff the file should be downloaded """ 
 725         video_title 
= info_dict
.get('title', info_dict
.get('id', 'video')) 
 726         if 'title' in info_dict
: 
 727             # This can happen when we're just evaluating the playlist 
 728             title 
= info_dict
['title'] 
 729             matchtitle 
= self
.params
.get('matchtitle', False) 
 731                 if not re
.search(matchtitle
, title
, re
.IGNORECASE
): 
 732                     return '"' + title 
+ '" title did not match pattern "' + matchtitle 
+ '"' 
 733             rejecttitle 
= self
.params
.get('rejecttitle', False) 
 735                 if re
.search(rejecttitle
, title
, re
.IGNORECASE
): 
 736                     return '"' + title 
+ '" title matched reject pattern "' + rejecttitle 
+ '"' 
 737         date 
= info_dict
.get('upload_date') 
 739             dateRange 
= self
.params
.get('daterange', DateRange()) 
 740             if date 
not in dateRange
: 
 741                 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
) 
 742         view_count 
= info_dict
.get('view_count') 
 743         if view_count 
is not None: 
 744             min_views 
= self
.params
.get('min_views') 
 745             if min_views 
is not None and view_count 
< min_views
: 
 746                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
) 
 747             max_views 
= self
.params
.get('max_views') 
 748             if max_views 
is not None and view_count 
> max_views
: 
 749                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
) 
 750         if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')): 
 751             return 'Skipping "%s" because it is age restricted' % video_title
 
 752         if self
.in_download_archive(info_dict
): 
 753             return '%s has already been recorded in archive' % video_title
 
 756             match_filter 
= self
.params
.get('match_filter') 
 757             if match_filter 
is not None: 
 758                 ret 
= match_filter(info_dict
) 
 765     def add_extra_info(info_dict
, extra_info
): 
 766         '''Set the keys from extra_info in info dict if they are missing''' 
 767         for key
, value 
in extra_info
.items(): 
 768             info_dict
.setdefault(key
, value
) 
 770     def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
={}, 
 771                      process
=True, force_generic_extractor
=False): 
 773         Returns a list with a dictionary for each video we find. 
 774         If 'download', also downloads the videos. 
 775         extra_info is a dict containing the extra values to add to each result 
 778         if not ie_key 
and force_generic_extractor
: 
 782             ies 
= [self
.get_info_extractor(ie_key
)] 
 787             if not ie
.suitable(url
): 
 790             ie 
= self
.get_info_extractor(ie
.ie_key()) 
 792                 self
.report_warning('The program functionality for this site has been marked as broken, ' 
 793                                     'and will probably not work.') 
 796                 ie_result 
= ie
.extract(url
) 
 797                 if ie_result 
is None:  # Finished already (backwards compatibility; listformats and friends should be moved here) 
 799                 if isinstance(ie_result
, list): 
 800                     # Backwards compatibility: old IE result format 
 802                         '_type': 'compat_list', 
 803                         'entries': ie_result
, 
 805                 self
.add_default_extra_info(ie_result
, ie
, url
) 
 807                     return self
.process_ie_result(ie_result
, download
, extra_info
) 
 810             except GeoRestrictedError 
as e
: 
 813                     msg 
+= '\nThis video is available in %s.' % ', '.join( 
 814                         map(ISO3166Utils
.short2full
, e
.countries
)) 
 815                 msg 
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' 
 816                 self
.report_error(msg
) 
 818             except ExtractorError 
as e
:  # An error we somewhat expected 
 819                 self
.report_error(compat_str(e
), e
.format_traceback()) 
 821             except MaxDownloadsReached
: 
 823             except Exception as e
: 
 824                 if self
.params
.get('ignoreerrors', False): 
 825                     self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc())) 
 830             self
.report_error('no suitable InfoExtractor for URL %s' % url
) 
 832     def add_default_extra_info(self
, ie_result
, ie
, url
): 
 833         self
.add_extra_info(ie_result
, { 
 834             'extractor': ie
.IE_NAME
, 
 836             'webpage_url_basename': url_basename(url
), 
 837             'extractor_key': ie
.ie_key(), 
 840     def process_ie_result(self
, ie_result
, download
=True, extra_info
={}): 
 842         Take the result of the ie(may be modified) and resolve all unresolved 
 843         references (URLs, playlist items). 
 845         It will also download the videos if 'download'. 
 846         Returns the resolved ie_result. 
 848         result_type 
= ie_result
.get('_type', 'video') 
 850         if result_type 
in ('url', 'url_transparent'): 
 851             ie_result
['url'] = sanitize_url(ie_result
['url']) 
 852             extract_flat 
= self
.params
.get('extract_flat', False) 
 853             if ((extract_flat 
== 'in_playlist' and 'playlist' in extra_info
) 
 854                     or extract_flat 
is True): 
 855                 self
.__forced
_printings
( 
 856                     ie_result
, self
.prepare_filename(ie_result
), 
 860         if result_type 
== 'video': 
 861             self
.add_extra_info(ie_result
, extra_info
) 
 862             return self
.process_video_result(ie_result
, download
=download
) 
 863         elif result_type 
== 'url': 
 864             # We have to add extra_info to the results because it may be 
 865             # contained in a playlist 
 866             return self
.extract_info(ie_result
['url'], 
 868                                      ie_key
=ie_result
.get('ie_key'), 
 869                                      extra_info
=extra_info
) 
 870         elif result_type 
== 'url_transparent': 
 871             # Use the information from the embedding page 
 872             info 
= self
.extract_info( 
 873                 ie_result
['url'], ie_key
=ie_result
.get('ie_key'), 
 874                 extra_info
=extra_info
, download
=False, process
=False) 
 876             # extract_info may return None when ignoreerrors is enabled and 
 877             # extraction failed with an error, don't crash and return early 
 882             force_properties 
= dict( 
 883                 (k
, v
) for k
, v 
in ie_result
.items() if v 
is not None) 
 884             for f 
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): 
 885                 if f 
in force_properties
: 
 886                     del force_properties
[f
] 
 887             new_result 
= info
.copy() 
 888             new_result
.update(force_properties
) 
 890             # Extracted info may not be a video result (i.e. 
 891             # info.get('_type', 'video') != video) but rather an url or 
 892             # url_transparent. In such cases outer metadata (from ie_result) 
 893             # should be propagated to inner one (info). For this to happen 
 894             # _type of info should be overridden with url_transparent. This 
 895             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163. 
 896             if new_result
.get('_type') == 'url': 
 897                 new_result
['_type'] = 'url_transparent' 
 899             return self
.process_ie_result( 
 900                 new_result
, download
=download
, extra_info
=extra_info
) 
 901         elif result_type 
in ('playlist', 'multi_video'): 
 902             # We process each entry in the playlist 
 903             playlist 
= ie_result
.get('title') or ie_result
.get('id') 
 904             self
.to_screen('[download] Downloading playlist: %s' % playlist
) 
 906             playlist_results 
= [] 
 908             playliststart 
= self
.params
.get('playliststart', 1) - 1 
 909             playlistend 
= self
.params
.get('playlistend') 
 910             # For backwards compatibility, interpret -1 as whole list 
 911             if playlistend 
== -1: 
 914             playlistitems_str 
= self
.params
.get('playlist_items') 
 916             if playlistitems_str 
is not None: 
 917                 def iter_playlistitems(format
): 
 918                     for string_segment 
in format
.split(','): 
 919                         if '-' in string_segment
: 
 920                             start
, end 
= string_segment
.split('-') 
 921                             for item 
in range(int(start
), int(end
) + 1): 
 924                             yield int(string_segment
) 
 925                 playlistitems 
= orderedSet(iter_playlistitems(playlistitems_str
)) 
 927             ie_entries 
= ie_result
['entries'] 
 929             def make_playlistitems_entries(list_ie_entries
): 
 930                 num_entries 
= len(list_ie_entries
) 
 932                     list_ie_entries
[i 
- 1] for i 
in playlistitems
 
 933                     if -num_entries 
<= i 
- 1 < num_entries
] 
 935             def report_download(num_entries
): 
 937                     '[%s] playlist %s: Downloading %d videos' % 
 938                     (ie_result
['extractor'], playlist
, num_entries
)) 
 940             if isinstance(ie_entries
, list): 
 941                 n_all_entries 
= len(ie_entries
) 
 943                     entries 
= make_playlistitems_entries(ie_entries
) 
 945                     entries 
= ie_entries
[playliststart
:playlistend
] 
 946                 n_entries 
= len(entries
) 
 948                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' % 
 949                     (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
)) 
 950             elif isinstance(ie_entries
, PagedList
): 
 953                     for item 
in playlistitems
: 
 954                         entries
.extend(ie_entries
.getslice( 
 958                     entries 
= ie_entries
.getslice( 
 959                         playliststart
, playlistend
) 
 960                 n_entries 
= len(entries
) 
 961                 report_download(n_entries
) 
 964                     entries 
= make_playlistitems_entries(list(itertools
.islice( 
 965                         ie_entries
, 0, max(playlistitems
)))) 
 967                     entries 
= list(itertools
.islice( 
 968                         ie_entries
, playliststart
, playlistend
)) 
 969                 n_entries 
= len(entries
) 
 970                 report_download(n_entries
) 
 972             if self
.params
.get('playlistreverse', False): 
 973                 entries 
= entries
[::-1] 
 975             if self
.params
.get('playlistrandom', False): 
 976                 random
.shuffle(entries
) 
 978             x_forwarded_for 
= ie_result
.get('__x_forwarded_for_ip') 
 980             for i
, entry 
in enumerate(entries
, 1): 
 981                 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
)) 
 982                 # This __x_forwarded_for_ip thing is a bit ugly but requires 
 985                     entry
['__x_forwarded_for_ip'] = x_forwarded_for
 
 987                     'n_entries': n_entries
, 
 988                     'playlist': playlist
, 
 989                     'playlist_id': ie_result
.get('id'), 
 990                     'playlist_title': ie_result
.get('title'), 
 991                     'playlist_uploader': ie_result
.get('uploader'), 
 992                     'playlist_uploader_id': ie_result
.get('uploader_id'), 
 993                     'playlist_index': i 
+ playliststart
, 
 994                     'extractor': ie_result
['extractor'], 
 995                     'webpage_url': ie_result
['webpage_url'], 
 996                     'webpage_url_basename': url_basename(ie_result
['webpage_url']), 
 997                     'extractor_key': ie_result
['extractor_key'], 
1000                 reason 
= self
._match
_entry
(entry
, incomplete
=True) 
1001                 if reason 
is not None: 
1002                     self
.to_screen('[download] ' + reason
) 
1005                 entry_result 
= self
.process_ie_result(entry
, 
1008                 playlist_results
.append(entry_result
) 
1009             ie_result
['entries'] = playlist_results
 
1010             self
.to_screen('[download] Finished downloading playlist: %s' % playlist
) 
1012         elif result_type 
== 'compat_list': 
1013             self
.report_warning( 
1014                 'Extractor %s returned a compat_list result. ' 
1015                 'It needs to be updated.' % ie_result
.get('extractor')) 
1018                 self
.add_extra_info( 
1021                         'extractor': ie_result
['extractor'], 
1022                         'webpage_url': ie_result
['webpage_url'], 
1023                         'webpage_url_basename': url_basename(ie_result
['webpage_url']), 
1024                         'extractor_key': ie_result
['extractor_key'], 
1028             ie_result
['entries'] = [ 
1029                 self
.process_ie_result(_fixup(r
), download
, extra_info
) 
1030                 for r 
in ie_result
['entries'] 
1034             raise Exception('Invalid result type: %s' % result_type
) 
1036     def _build_format_filter(self
, filter_spec
): 
1037         " Returns a function to filter the formats according to the filter_spec " 
1047         operator_rex 
= re
.compile(r
'''(?x)\s* 
1048             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps) 
1049             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
1050             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) 
1052             ''' % '|'.join(map(re
.escape
, OPERATORS
.keys()))) 
1053         m 
= operator_rex
.search(filter_spec
) 
1056                 comparison_value 
= int(m
.group('value')) 
1058                 comparison_value 
= parse_filesize(m
.group('value')) 
1059                 if comparison_value 
is None: 
1060                     comparison_value 
= parse_filesize(m
.group('value') + 'B') 
1061                 if comparison_value 
is None: 
1063                         'Invalid value %r in format specification %r' % ( 
1064                             m
.group('value'), filter_spec
)) 
1065             op 
= OPERATORS
[m
.group('op')] 
1070                 '^=': lambda attr
, value
: attr
.startswith(value
), 
1071                 '$=': lambda attr
, value
: attr
.endswith(value
), 
1072                 '*=': lambda attr
, value
: value 
in attr
, 
1074             str_operator_rex 
= re
.compile(r
'''(?x) 
1075                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id) 
1076                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)? 
1077                 \s*(?P<value>[a-zA-Z0-9._-]+) 
1079                 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys()))) 
1080             m 
= str_operator_rex
.search(filter_spec
) 
1082                 comparison_value 
= m
.group('value') 
1083                 str_op 
= STR_OPERATORS
[m
.group('op')] 
1084                 if m
.group('negation'): 
1085                     op 
= lambda attr
, value
: not str_op(attr
, value
) 
1090             raise ValueError('Invalid filter specification %r' % filter_spec
) 
1093             actual_value 
= f
.get(m
.group('key')) 
1094             if actual_value 
is None: 
1095                 return m
.group('none_inclusive') 
1096             return op(actual_value
, comparison_value
) 
1099     def _default_format_spec(self
, info_dict
, download
=True): 
1102             merger 
= FFmpegMergerPP(self
) 
1103             return merger
.available 
and merger
.can_merge() 
1106             if self
.params
.get('simulate', False): 
1110             if self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) == '-': 
1112             if info_dict
.get('is_live'): 
1118         req_format_list 
= ['bestvideo+bestaudio', 'best'] 
1120             req_format_list
.reverse() 
1121         return '/'.join(req_format_list
) 
1123     def build_format_selector(self
, format_spec
): 
1124         def syntax_error(note
, start
): 
1126                 'Invalid format specification: ' 
1127                 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1])) 
1128             return SyntaxError(message
) 
1130         PICKFIRST 
= 'PICKFIRST' 
1134         FormatSelector 
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters']) 
1136         def _parse_filter(tokens
): 
1138             for type, string
, start
, _
, _ 
in tokens
: 
1139                 if type == tokenize
.OP 
and string 
== ']': 
1140                     return ''.join(filter_parts
) 
1142                     filter_parts
.append(string
) 
1144         def _remove_unused_ops(tokens
): 
1145             # Remove operators that we don't use and join them with the surrounding strings 
1146             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' 
1147             ALLOWED_OPS 
= ('/', '+', ',', '(', ')') 
1148             last_string
, last_start
, last_end
, last_line 
= None, None, None, None 
1149             for type, string
, start
, end
, line 
in tokens
: 
1150                 if type == tokenize
.OP 
and string 
== '[': 
1152                         yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
 
1154                     yield type, string
, start
, end
, line
 
1155                     # everything inside brackets will be handled by _parse_filter 
1156                     for type, string
, start
, end
, line 
in tokens
: 
1157                         yield type, string
, start
, end
, line
 
1158                         if type == tokenize
.OP 
and string 
== ']': 
1160                 elif type == tokenize
.OP 
and string 
in ALLOWED_OPS
: 
1162                         yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
 
1164                     yield type, string
, start
, end
, line
 
1165                 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]: 
1167                         last_string 
= string
 
1171                         last_string 
+= string
 
1173                 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
 
1175         def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False): 
1177             current_selector 
= None 
1178             for type, string
, start
, _
, _ 
in tokens
: 
1179                 # ENCODING is only defined in python 3.x 
1180                 if type == getattr(tokenize
, 'ENCODING', None): 
1182                 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]: 
1183                     current_selector 
= FormatSelector(SINGLE
, string
, []) 
1184                 elif type == tokenize
.OP
: 
1186                         if not inside_group
: 
1187                             # ')' will be handled by the parentheses group 
1188                             tokens
.restore_last_token() 
1190                     elif inside_merge 
and string 
in ['/', ',']: 
1191                         tokens
.restore_last_token() 
1193                     elif inside_choice 
and string 
== ',': 
1194                         tokens
.restore_last_token() 
1197                         if not current_selector
: 
1198                             raise syntax_error('"," must follow a format selector', start
) 
1199                         selectors
.append(current_selector
) 
1200                         current_selector 
= None 
1202                         if not current_selector
: 
1203                             raise syntax_error('"/" must follow a format selector', start
) 
1204                         first_choice 
= current_selector
 
1205                         second_choice 
= _parse_format_selection(tokens
, inside_choice
=True) 
1206                         current_selector 
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), []) 
1208                         if not current_selector
: 
1209                             current_selector 
= FormatSelector(SINGLE
, 'best', []) 
1210                         format_filter 
= _parse_filter(tokens
) 
1211                         current_selector
.filters
.append(format_filter
) 
1213                         if current_selector
: 
1214                             raise syntax_error('Unexpected "("', start
) 
1215                         group 
= _parse_format_selection(tokens
, inside_group
=True) 
1216                         current_selector 
= FormatSelector(GROUP
, group
, []) 
1218                         video_selector 
= current_selector
 
1219                         audio_selector 
= _parse_format_selection(tokens
, inside_merge
=True) 
1220                         if not video_selector 
or not audio_selector
: 
1221                             raise syntax_error('"+" must be between two format selectors', start
) 
1222                         current_selector 
= FormatSelector(MERGE
, (video_selector
, audio_selector
), []) 
1224                         raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
) 
1225                 elif type == tokenize
.ENDMARKER
: 
1227             if current_selector
: 
1228                 selectors
.append(current_selector
) 
1231         def _build_selector_function(selector
): 
1232             if isinstance(selector
, list): 
1233                 fs 
= [_build_selector_function(s
) for s 
in selector
] 
1235                 def selector_function(ctx
): 
1237                         for format 
in f(ctx
): 
1239                 return selector_function
 
1240             elif selector
.type == GROUP
: 
1241                 selector_function 
= _build_selector_function(selector
.selector
) 
1242             elif selector
.type == PICKFIRST
: 
1243                 fs 
= [_build_selector_function(s
) for s 
in selector
.selector
] 
1245                 def selector_function(ctx
): 
1247                         picked_formats 
= list(f(ctx
)) 
1249                             return picked_formats
 
1251             elif selector
.type == SINGLE
: 
1252                 format_spec 
= selector
.selector
 
1254                 def selector_function(ctx
): 
1255                     formats 
= list(ctx
['formats']) 
1258                     if format_spec 
== 'all': 
1261                     elif format_spec 
in ['best', 'worst', None]: 
1262                         format_idx 
= 0 if format_spec 
== 'worst' else -1 
1263                         audiovideo_formats 
= [ 
1265                             if f
.get('vcodec') != 'none' and f
.get('acodec') != 'none'] 
1266                         if audiovideo_formats
: 
1267                             yield audiovideo_formats
[format_idx
] 
1268                         # for extractors with incomplete formats (audio only (soundcloud) 
1269                         # or video only (imgur)) we will fallback to best/worst 
1270                         # {video,audio}-only format 
1271                         elif ctx
['incomplete_formats']: 
1272                             yield formats
[format_idx
] 
1273                     elif format_spec 
== 'bestaudio': 
1276                             if f
.get('vcodec') == 'none'] 
1278                             yield audio_formats
[-1] 
1279                     elif format_spec 
== 'worstaudio': 
1282                             if f
.get('vcodec') == 'none'] 
1284                             yield audio_formats
[0] 
1285                     elif format_spec 
== 'bestvideo': 
1288                             if f
.get('acodec') == 'none'] 
1290                             yield video_formats
[-1] 
1291                     elif format_spec 
== 'worstvideo': 
1294                             if f
.get('acodec') == 'none'] 
1296                             yield video_formats
[0] 
1298                         extensions 
= ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] 
1299                         if format_spec 
in extensions
: 
1300                             filter_f 
= lambda f
: f
['ext'] == format_spec
 
1302                             filter_f 
= lambda f
: f
['format_id'] == format_spec
 
1303                         matches 
= list(filter(filter_f
, formats
)) 
1306             elif selector
.type == MERGE
: 
1307                 def _merge(formats_info
): 
1308                     format_1
, format_2 
= [f
['format_id'] for f 
in formats_info
] 
1309                     # The first format must contain the video and the 
1311                     if formats_info
[0].get('vcodec') == 'none': 
1312                         self
.report_error('The first format must ' 
1313                                           'contain the video, try using ' 
1314                                           '"-f %s+%s"' % (format_2
, format_1
)) 
1316                     # Formats must be opposite (video+audio) 
1317                     if formats_info
[0].get('acodec') == 'none' and formats_info
[1].get('acodec') == 'none': 
1319                             'Both formats %s and %s are video-only, you must specify "-f video+audio"' 
1320                             % (format_1
, format_2
)) 
1323                         formats_info
[0]['ext'] 
1324                         if self
.params
.get('merge_output_format') is None 
1325                         else self
.params
['merge_output_format']) 
1327                         'requested_formats': formats_info
, 
1328                         'format': '%s+%s' % (formats_info
[0].get('format'), 
1329                                              formats_info
[1].get('format')), 
1330                         'format_id': '%s+%s' % (formats_info
[0].get('format_id'), 
1331                                                 formats_info
[1].get('format_id')), 
1332                         'width': formats_info
[0].get('width'), 
1333                         'height': formats_info
[0].get('height'), 
1334                         'resolution': formats_info
[0].get('resolution'), 
1335                         'fps': formats_info
[0].get('fps'), 
1336                         'vcodec': formats_info
[0].get('vcodec'), 
1337                         'vbr': formats_info
[0].get('vbr'), 
1338                         'stretched_ratio': formats_info
[0].get('stretched_ratio'), 
1339                         'acodec': formats_info
[1].get('acodec'), 
1340                         'abr': formats_info
[1].get('abr'), 
1343                 video_selector
, audio_selector 
= map(_build_selector_function
, selector
.selector
) 
1345                 def selector_function(ctx
): 
1346                     for pair 
in itertools
.product( 
1347                             video_selector(copy
.deepcopy(ctx
)), audio_selector(copy
.deepcopy(ctx
))): 
1350             filters 
= [self
._build
_format
_filter
(f
) for f 
in selector
.filters
] 
1352             def final_selector(ctx
): 
1353                 ctx_copy 
= copy
.deepcopy(ctx
) 
1354                 for _filter 
in filters
: 
1355                     ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats'])) 
1356                 return selector_function(ctx_copy
) 
1357             return final_selector
 
1359         stream 
= io
.BytesIO(format_spec
.encode('utf-8')) 
1361             tokens 
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
))) 
1362         except tokenize
.TokenError
: 
1363             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
))) 
1365         class TokenIterator(object): 
1366             def __init__(self
, tokens
): 
1367                 self
.tokens 
= tokens
 
1374                 if self
.counter 
>= len(self
.tokens
): 
1375                     raise StopIteration() 
1376                 value 
= self
.tokens
[self
.counter
] 
1382             def restore_last_token(self
): 
1385         parsed_selector 
= _parse_format_selection(iter(TokenIterator(tokens
))) 
1386         return _build_selector_function(parsed_selector
) 
1388     def _calc_headers(self
, info_dict
): 
1389         res 
= std_headers
.copy() 
1391         add_headers 
= info_dict
.get('http_headers') 
1393             res
.update(add_headers
) 
1395         cookies 
= self
._calc
_cookies
(info_dict
) 
1397             res
['Cookie'] = cookies
 
1399         if 'X-Forwarded-For' not in res
: 
1400             x_forwarded_for_ip 
= info_dict
.get('__x_forwarded_for_ip') 
1401             if x_forwarded_for_ip
: 
1402                 res
['X-Forwarded-For'] = x_forwarded_for_ip
 
1406     def _calc_cookies(self
, info_dict
): 
1407         pr 
= sanitized_Request(info_dict
['url']) 
1408         self
.cookiejar
.add_cookie_header(pr
) 
1409         return pr
.get_header('Cookie') 
1411     def process_video_result(self
, info_dict
, download
=True): 
1412         assert info_dict
.get('_type', 'video') == 'video' 
1414         if 'id' not in info_dict
: 
1415             raise ExtractorError('Missing "id" field in extractor result') 
1416         if 'title' not in info_dict
: 
1417             raise ExtractorError('Missing "title" field in extractor result') 
1419         def report_force_conversion(field
, field_not
, conversion
): 
1420             self
.report_warning( 
1421                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor' 
1422                 % (field
, field_not
, conversion
)) 
1424         def sanitize_string_field(info
, string_field
): 
1425             field 
= info
.get(string_field
) 
1426             if field 
is None or isinstance(field
, compat_str
): 
1428             report_force_conversion(string_field
, 'a string', 'string') 
1429             info
[string_field
] = compat_str(field
) 
1431         def sanitize_numeric_fields(info
): 
1432             for numeric_field 
in self
._NUMERIC
_FIELDS
: 
1433                 field 
= info
.get(numeric_field
) 
1434                 if field 
is None or isinstance(field
, compat_numeric_types
): 
1436                 report_force_conversion(numeric_field
, 'numeric', 'int') 
1437                 info
[numeric_field
] = int_or_none(field
) 
1439         sanitize_string_field(info_dict
, 'id') 
1440         sanitize_numeric_fields(info_dict
) 
1442         if 'playlist' not in info_dict
: 
1443             # It isn't part of a playlist 
1444             info_dict
['playlist'] = None 
1445             info_dict
['playlist_index'] = None 
1447         thumbnails 
= info_dict
.get('thumbnails') 
1448         if thumbnails 
is None: 
1449             thumbnail 
= info_dict
.get('thumbnail') 
1451                 info_dict
['thumbnails'] = thumbnails 
= [{'url': thumbnail
}] 
1453             thumbnails
.sort(key
=lambda t
: ( 
1454                 t
.get('preference') if t
.get('preference') is not None else -1, 
1455                 t
.get('width') if t
.get('width') is not None else -1, 
1456                 t
.get('height') if t
.get('height') is not None else -1, 
1457                 t
.get('id') if t
.get('id') is not None else '', t
.get('url'))) 
1458             for i
, t 
in enumerate(thumbnails
): 
1459                 t
['url'] = sanitize_url(t
['url']) 
1460                 if t
.get('width') and t
.get('height'): 
1461                     t
['resolution'] = '%dx%d' % (t
['width'], t
['height']) 
1462                 if t
.get('id') is None: 
1465         if self
.params
.get('list_thumbnails'): 
1466             self
.list_thumbnails(info_dict
) 
1469         thumbnail 
= info_dict
.get('thumbnail') 
1471             info_dict
['thumbnail'] = sanitize_url(thumbnail
) 
1473             info_dict
['thumbnail'] = thumbnails
[-1]['url'] 
1475         if 'display_id' not in info_dict 
and 'id' in info_dict
: 
1476             info_dict
['display_id'] = info_dict
['id'] 
1478         if info_dict
.get('upload_date') is None and info_dict
.get('timestamp') is not None: 
1479             # Working around out-of-range timestamp values (e.g. negative ones on Windows, 
1480             # see http://bugs.python.org/issue1646728) 
1482                 upload_date 
= datetime
.datetime
.utcfromtimestamp(info_dict
['timestamp']) 
1483                 info_dict
['upload_date'] = upload_date
.strftime('%Y%m%d') 
1484             except (ValueError, OverflowError, OSError): 
1487         # Auto generate title fields corresponding to the *_number fields when missing 
1488         # in order to always have clean titles. This is very common for TV series. 
1489         for field 
in ('chapter', 'season', 'episode'): 
1490             if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
): 
1491                 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
]) 
1493         for cc_kind 
in ('subtitles', 'automatic_captions'): 
1494             cc 
= info_dict
.get(cc_kind
) 
1496                 for _
, subtitle 
in cc
.items(): 
1497                     for subtitle_format 
in subtitle
: 
1498                         if subtitle_format
.get('url'): 
1499                             subtitle_format
['url'] = sanitize_url(subtitle_format
['url']) 
1500                         if subtitle_format
.get('ext') is None: 
1501                             subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower() 
1503         automatic_captions 
= info_dict
.get('automatic_captions') 
1504         subtitles 
= info_dict
.get('subtitles') 
1506         if self
.params
.get('listsubtitles', False): 
1507             if 'automatic_captions' in info_dict
: 
1508                 self
.list_subtitles( 
1509                     info_dict
['id'], automatic_captions
, 'automatic captions') 
1510             self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles') 
1513         info_dict
['requested_subtitles'] = self
.process_subtitles( 
1514             info_dict
['id'], subtitles
, automatic_captions
) 
1516         # We now pick which formats have to be downloaded 
1517         if info_dict
.get('formats') is None: 
1518             # There's only one format available 
1519             formats 
= [info_dict
] 
1521             formats 
= info_dict
['formats'] 
1524             raise ExtractorError('No video formats found!') 
1526         def is_wellformed(f
): 
1529                 self
.report_warning( 
1530                     '"url" field is missing or empty - skipping format, ' 
1531                     'there is an error in extractor') 
1533             if isinstance(url
, bytes): 
1534                 sanitize_string_field(f
, 'url') 
1537         # Filter out malformed formats for better extraction robustness 
1538         formats 
= list(filter(is_wellformed
, formats
)) 
1542         # We check that all the formats have the format and format_id fields 
1543         for i
, format 
in enumerate(formats
): 
1544             sanitize_string_field(format
, 'format_id') 
1545             sanitize_numeric_fields(format
) 
1546             format
['url'] = sanitize_url(format
['url']) 
1547             if not format
.get('format_id'): 
1548                 format
['format_id'] = compat_str(i
) 
1550                 # Sanitize format_id from characters used in format selector expression 
1551                 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id']) 
1552             format_id 
= format
['format_id'] 
1553             if format_id 
not in formats_dict
: 
1554                 formats_dict
[format_id
] = [] 
1555             formats_dict
[format_id
].append(format
) 
1557         # Make sure all formats have unique format_id 
1558         for format_id
, ambiguous_formats 
in formats_dict
.items(): 
1559             if len(ambiguous_formats
) > 1: 
1560                 for i
, format 
in enumerate(ambiguous_formats
): 
1561                     format
['format_id'] = '%s-%d' % (format_id
, i
) 
1563         for i
, format 
in enumerate(formats
): 
1564             if format
.get('format') is None: 
1565                 format
['format'] = '{id} - {res}{note}'.format( 
1566                     id=format
['format_id'], 
1567                     res
=self
.format_resolution(format
), 
1568                     note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '', 
1570             # Automatically determine file extension if missing 
1571             if format
.get('ext') is None: 
1572                 format
['ext'] = determine_ext(format
['url']).lower() 
1573             # Automatically determine protocol if missing (useful for format 
1574             # selection purposes) 
1575             if format
.get('protocol') is None: 
1576                 format
['protocol'] = determine_protocol(format
) 
1577             # Add HTTP headers, so that external programs can use them from the 
1579             full_format_info 
= info_dict
.copy() 
1580             full_format_info
.update(format
) 
1581             format
['http_headers'] = self
._calc
_headers
(full_format_info
) 
1582         # Remove private housekeeping stuff 
1583         if '__x_forwarded_for_ip' in info_dict
: 
1584             del info_dict
['__x_forwarded_for_ip'] 
1586         # TODO Central sorting goes here 
1588         if formats
[0] is not info_dict
: 
1589             # only set the 'formats' fields if the original info_dict list them 
1590             # otherwise we end up with a circular reference, the first (and unique) 
1591             # element in the 'formats' field in info_dict is info_dict itself, 
1592             # which can't be exported to json 
1593             info_dict
['formats'] = formats
 
1594         if self
.params
.get('listformats'): 
1595             self
.list_formats(info_dict
) 
1598         req_format 
= self
.params
.get('format') 
1599         if req_format 
is None: 
1600             req_format 
= self
._default
_format
_spec
(info_dict
, download
=download
) 
1601             if self
.params
.get('verbose'): 
1602                 self
.to_stdout('[debug] Default format spec: %s' % req_format
) 
1604         format_selector 
= self
.build_format_selector(req_format
) 
1606         # While in format selection we may need to have an access to the original 
1607         # format set in order to calculate some metrics or do some processing. 
1608         # For now we need to be able to guess whether original formats provided 
1609         # by extractor are incomplete or not (i.e. whether extractor provides only 
1610         # video-only or audio-only formats) for proper formats selection for 
1611         # extractors with such incomplete formats (see 
1612         # https://github.com/ytdl-org/youtube-dl/pull/5556). 
1613         # Since formats may be filtered during format selection and may not match 
1614         # the original formats the results may be incorrect. Thus original formats 
1615         # or pre-calculated metrics should be passed to format selection routines 
1617         # We will pass a context object containing all necessary additional data 
1618         # instead of just formats. 
1619         # This fixes incorrect format selection issue (see 
1620         # https://github.com/ytdl-org/youtube-dl/issues/10083). 
1621         incomplete_formats 
= ( 
1622             # All formats are video-only or 
1623             all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f 
in formats
) 
1624             # all formats are audio-only 
1625             or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f 
in formats
)) 
1629             'incomplete_formats': incomplete_formats
, 
1632         formats_to_download 
= list(format_selector(ctx
)) 
1633         if not formats_to_download
: 
1634             raise ExtractorError('requested format not available', 
1638             if len(formats_to_download
) > 1: 
1639                 self
.to_screen('[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
))) 
1640             for format 
in formats_to_download
: 
1641                 new_info 
= dict(info_dict
) 
1642                 new_info
.update(format
) 
1643                 self
.process_info(new_info
) 
1644         # We update the info dict with the best quality format (backwards compatibility) 
1645         info_dict
.update(formats_to_download
[-1]) 
1648     def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
): 
1649         """Select the requested subtitles and their format""" 
1651         if normal_subtitles 
and self
.params
.get('writesubtitles'): 
1652             available_subs
.update(normal_subtitles
) 
1653         if automatic_captions 
and self
.params
.get('writeautomaticsub'): 
1654             for lang
, cap_info 
in automatic_captions
.items(): 
1655                 if lang 
not in available_subs
: 
1656                     available_subs
[lang
] = cap_info
 
1658         if (not self
.params
.get('writesubtitles') and not 
1659                 self
.params
.get('writeautomaticsub') or not 
1663         if self
.params
.get('allsubtitles', False): 
1664             requested_langs 
= available_subs
.keys() 
1666             if self
.params
.get('subtitleslangs', False): 
1667                 requested_langs 
= self
.params
.get('subtitleslangs') 
1668             elif 'en' in available_subs
: 
1669                 requested_langs 
= ['en'] 
1671                 requested_langs 
= [list(available_subs
.keys())[0]] 
1673         formats_query 
= self
.params
.get('subtitlesformat', 'best') 
1674         formats_preference 
= formats_query
.split('/') if formats_query 
else [] 
1676         for lang 
in requested_langs
: 
1677             formats 
= available_subs
.get(lang
) 
1679                 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
)) 
1681             for ext 
in formats_preference
: 
1685                 matches 
= list(filter(lambda f
: f
['ext'] == ext
, formats
)) 
1691                 self
.report_warning( 
1692                     'No subtitle format found matching "%s" for language %s, ' 
1693                     'using %s' % (formats_query
, lang
, f
['ext'])) 
1697     def __forced_printings(self
, info_dict
, filename
, incomplete
): 
1698         def print_mandatory(field
): 
1699             if (self
.params
.get('force%s' % field
, False) 
1700                     and (not incomplete 
or info_dict
.get(field
) is not None)): 
1701                 self
.to_stdout(info_dict
[field
]) 
1703         def print_optional(field
): 
1704             if (self
.params
.get('force%s' % field
, False) 
1705                     and info_dict
.get(field
) is not None): 
1706                 self
.to_stdout(info_dict
[field
]) 
1708         print_mandatory('title') 
1709         print_mandatory('id') 
1710         if self
.params
.get('forceurl', False) and not incomplete
: 
1711             if info_dict
.get('requested_formats') is not None: 
1712                 for f 
in info_dict
['requested_formats']: 
1713                     self
.to_stdout(f
['url'] + f
.get('play_path', '')) 
1715                 # For RTMP URLs, also include the playpath 
1716                 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', '')) 
1717         print_optional('thumbnail') 
1718         print_optional('description') 
1719         if self
.params
.get('forcefilename', False) and filename 
is not None: 
1720             self
.to_stdout(filename
) 
1721         if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None: 
1722             self
.to_stdout(formatSeconds(info_dict
['duration'])) 
1723         print_mandatory('format') 
1724         if self
.params
.get('forcejson', False): 
1725             self
.to_stdout(json
.dumps(info_dict
)) 
1727     def process_info(self
, info_dict
): 
1728         """Process a single resolved IE result.""" 
1730         assert info_dict
.get('_type', 'video') == 'video' 
1732         max_downloads 
= self
.params
.get('max_downloads') 
1733         if max_downloads 
is not None: 
1734             if self
._num
_downloads 
>= int(max_downloads
): 
1735                 raise MaxDownloadsReached() 
1737         # TODO: backward compatibility, to be removed 
1738         info_dict
['fulltitle'] = info_dict
['title'] 
1740         if 'format' not in info_dict
: 
1741             info_dict
['format'] = info_dict
['ext'] 
1743         reason 
= self
._match
_entry
(info_dict
, incomplete
=False) 
1744         if reason 
is not None: 
1745             self
.to_screen('[download] ' + reason
) 
1748         self
._num
_downloads 
+= 1 
1750         info_dict
['_filename'] = filename 
= self
.prepare_filename(info_dict
) 
1753         self
.__forced
_printings
(info_dict
, filename
, incomplete
=False) 
1755         # Do nothing else if in simulate mode 
1756         if self
.params
.get('simulate', False): 
1759         if filename 
is None: 
1762         def ensure_dir_exists(path
): 
1764                 dn 
= os
.path
.dirname(path
) 
1765                 if dn 
and not os
.path
.exists(dn
): 
1768             except (OSError, IOError) as err
: 
1769                 self
.report_error('unable to create directory ' + error_to_compat_str(err
)) 
1772         if not ensure_dir_exists(sanitize_path(encodeFilename(filename
))): 
1775         if self
.params
.get('writedescription', False): 
1776             descfn 
= replace_extension(filename
, 'description', info_dict
.get('ext')) 
1777             if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(descfn
)): 
1778                 self
.to_screen('[info] Video description is already present') 
1779             elif info_dict
.get('description') is None: 
1780                 self
.report_warning('There\'s no description to write.') 
1783                     self
.to_screen('[info] Writing video description to: ' + descfn
) 
1784                     with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
: 
1785                         descfile
.write(info_dict
['description']) 
1786                 except (OSError, IOError): 
1787                     self
.report_error('Cannot write description file ' + descfn
) 
1790         if self
.params
.get('writeannotations', False): 
1791             annofn 
= replace_extension(filename
, 'annotations.xml', info_dict
.get('ext')) 
1792             if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(annofn
)): 
1793                 self
.to_screen('[info] Video annotations are already present') 
1794             elif not info_dict
.get('annotations'): 
1795                 self
.report_warning('There are no annotations to write.') 
1798                     self
.to_screen('[info] Writing video annotations to: ' + annofn
) 
1799                     with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
: 
1800                         annofile
.write(info_dict
['annotations']) 
1801                 except (KeyError, TypeError): 
1802                     self
.report_warning('There are no annotations to write.') 
1803                 except (OSError, IOError): 
1804                     self
.report_error('Cannot write annotations file: ' + annofn
) 
1807         subtitles_are_requested 
= any([self
.params
.get('writesubtitles', False), 
1808                                        self
.params
.get('writeautomaticsub')]) 
1810         if subtitles_are_requested 
and info_dict
.get('requested_subtitles'): 
1811             # subtitles download errors are already managed as troubles in relevant IE 
1812             # that way it will silently go on when used with unsupporting IE 
1813             subtitles 
= info_dict
['requested_subtitles'] 
1814             ie 
= self
.get_info_extractor(info_dict
['extractor_key']) 
1815             for sub_lang
, sub_info 
in subtitles
.items(): 
1816                 sub_format 
= sub_info
['ext'] 
1817                 sub_filename 
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext')) 
1818                 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(sub_filename
)): 
1819                     self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
)) 
1821                     self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
) 
1822                     if sub_info
.get('data') is not None: 
1824                             # Use newline='' to prevent conversion of newline characters 
1825                             # See https://github.com/ytdl-org/youtube-dl/issues/10268 
1826                             with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
: 
1827                                 subfile
.write(sub_info
['data']) 
1828                         except (OSError, IOError): 
1829                             self
.report_error('Cannot write subtitles file ' + sub_filename
) 
1833                             sub_data 
= ie
._request
_webpage
( 
1834                                 sub_info
['url'], info_dict
['id'], note
=False).read() 
1835                             with io
.open(encodeFilename(sub_filename
), 'wb') as subfile
: 
1836                                 subfile
.write(sub_data
) 
1837                         except (ExtractorError
, IOError, OSError, ValueError) as err
: 
1838                             self
.report_warning('Unable to download subtitle for "%s": %s' % 
1839                                                 (sub_lang
, error_to_compat_str(err
))) 
1842         if self
.params
.get('writeinfojson', False): 
1843             infofn 
= replace_extension(filename
, 'info.json', info_dict
.get('ext')) 
1844             if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(infofn
)): 
1845                 self
.to_screen('[info] Video description metadata is already present') 
1847                 self
.to_screen('[info] Writing video description metadata as JSON to: ' + infofn
) 
1849                     write_json_file(self
.filter_requested_info(info_dict
), infofn
) 
1850                 except (OSError, IOError): 
1851                     self
.report_error('Cannot write metadata to JSON file ' + infofn
) 
1854         self
._write
_thumbnails
(info_dict
, filename
) 
1856         if not self
.params
.get('skip_download', False): 
1859                     fd 
= get_suitable_downloader(info
, self
.params
)(self
, self
.params
) 
1860                     for ph 
in self
._progress
_hooks
: 
1861                         fd
.add_progress_hook(ph
) 
1862                     if self
.params
.get('verbose'): 
1863                         self
.to_stdout('[debug] Invoking downloader on %r' % info
.get('url')) 
1864                     return fd
.download(name
, info
) 
1866                 if info_dict
.get('requested_formats') is not None: 
1869                     merger 
= FFmpegMergerPP(self
) 
1870                     if not merger
.available
: 
1872                         self
.report_warning('You have requested multiple ' 
1873                                             'formats but ffmpeg or avconv are not installed.' 
1874                                             ' The formats won\'t be merged.') 
1876                         postprocessors 
= [merger
] 
1878                     def compatible_formats(formats
): 
1879                         video
, audio 
= formats
 
1881                         video_ext
, audio_ext 
= video
.get('ext'), audio
.get('ext') 
1882                         if video_ext 
and audio_ext
: 
1884                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'), 
1887                             for exts 
in COMPATIBLE_EXTS
: 
1888                                 if video_ext 
in exts 
and audio_ext 
in exts
: 
1890                         # TODO: Check acodec/vcodec 
1893                     filename_real_ext 
= os
.path
.splitext(filename
)[1][1:] 
1895                         os
.path
.splitext(filename
)[0] 
1896                         if filename_real_ext 
== info_dict
['ext'] 
1898                     requested_formats 
= info_dict
['requested_formats'] 
1899                     if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
): 
1900                         info_dict
['ext'] = 'mkv' 
1901                         self
.report_warning( 
1902                             'Requested formats are incompatible for merge and will be merged into mkv.') 
1903                     # Ensure filename always has a correct extension for successful merge 
1904                     filename 
= '%s.%s' % (filename_wo_ext
, info_dict
['ext']) 
1905                     if os
.path
.exists(encodeFilename(filename
)): 
1907                             '[download] %s has already been downloaded and ' 
1908                             'merged' % filename
) 
1910                         for f 
in requested_formats
: 
1911                             new_info 
= dict(info_dict
) 
1913                             fname 
= prepend_extension( 
1914                                 self
.prepare_filename(new_info
), 
1915                                 'f%s' % f
['format_id'], new_info
['ext']) 
1916                             if not ensure_dir_exists(fname
): 
1918                             downloaded
.append(fname
) 
1919                             partial_success 
= dl(fname
, new_info
) 
1920                             success 
= success 
and partial_success
 
1921                         info_dict
['__postprocessors'] = postprocessors
 
1922                         info_dict
['__files_to_merge'] = downloaded
 
1924                     # Just a single file 
1925                     success 
= dl(filename
, info_dict
) 
1926             except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
1927                 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
)) 
1929             except (OSError, IOError) as err
: 
1930                 raise UnavailableVideoError(err
) 
1931             except (ContentTooShortError
, ) as err
: 
1932                 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
1935             if success 
and filename 
!= '-': 
1937                 fixup_policy 
= self
.params
.get('fixup') 
1938                 if fixup_policy 
is None: 
1939                     fixup_policy 
= 'detect_or_warn' 
1941                 INSTALL_FFMPEG_MESSAGE 
= 'Install ffmpeg or avconv to fix this automatically.' 
1943                 stretched_ratio 
= info_dict
.get('stretched_ratio') 
1944                 if stretched_ratio 
is not None and stretched_ratio 
!= 1: 
1945                     if fixup_policy 
== 'warn': 
1946                         self
.report_warning('%s: Non-uniform pixel ratio (%s)' % ( 
1947                             info_dict
['id'], stretched_ratio
)) 
1948                     elif fixup_policy 
== 'detect_or_warn': 
1949                         stretched_pp 
= FFmpegFixupStretchedPP(self
) 
1950                         if stretched_pp
.available
: 
1951                             info_dict
.setdefault('__postprocessors', []) 
1952                             info_dict
['__postprocessors'].append(stretched_pp
) 
1954                             self
.report_warning( 
1955                                 '%s: Non-uniform pixel ratio (%s). %s' 
1956                                 % (info_dict
['id'], stretched_ratio
, INSTALL_FFMPEG_MESSAGE
)) 
1958                         assert fixup_policy 
in ('ignore', 'never') 
1960                 if (info_dict
.get('requested_formats') is None 
1961                         and info_dict
.get('container') == 'm4a_dash'): 
1962                     if fixup_policy 
== 'warn': 
1963                         self
.report_warning( 
1964                             '%s: writing DASH m4a. ' 
1965                             'Only some players support this container.' 
1967                     elif fixup_policy 
== 'detect_or_warn': 
1968                         fixup_pp 
= FFmpegFixupM4aPP(self
) 
1969                         if fixup_pp
.available
: 
1970                             info_dict
.setdefault('__postprocessors', []) 
1971                             info_dict
['__postprocessors'].append(fixup_pp
) 
1973                             self
.report_warning( 
1974                                 '%s: writing DASH m4a. ' 
1975                                 'Only some players support this container. %s' 
1976                                 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
)) 
1978                         assert fixup_policy 
in ('ignore', 'never') 
1980                 if (info_dict
.get('protocol') == 'm3u8_native' 
1981                         or info_dict
.get('protocol') == 'm3u8' 
1982                         and self
.params
.get('hls_prefer_native')): 
1983                     if fixup_policy 
== 'warn': 
1984                         self
.report_warning('%s: malformed AAC bitstream detected.' % ( 
1986                     elif fixup_policy 
== 'detect_or_warn': 
1987                         fixup_pp 
= FFmpegFixupM3u8PP(self
) 
1988                         if fixup_pp
.available
: 
1989                             info_dict
.setdefault('__postprocessors', []) 
1990                             info_dict
['__postprocessors'].append(fixup_pp
) 
1992                             self
.report_warning( 
1993                                 '%s: malformed AAC bitstream detected. %s' 
1994                                 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
)) 
1996                         assert fixup_policy 
in ('ignore', 'never') 
1999                     self
.post_process(filename
, info_dict
) 
2000                 except (PostProcessingError
) as err
: 
2001                     self
.report_error('postprocessing: %s' % str(err
)) 
2003                 self
.record_download_archive(info_dict
) 
2005     def download(self
, url_list
): 
2006         """Download a given list of URLs.""" 
2007         outtmpl 
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) 
2008         if (len(url_list
) > 1 
2010                 and '%' not in outtmpl
 
2011                 and self
.params
.get('max_downloads') != 1): 
2012             raise SameFileError(outtmpl
) 
2014         for url 
in url_list
: 
2016                 # It also downloads the videos 
2017                 res 
= self
.extract_info( 
2018                     url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False)) 
2019             except UnavailableVideoError
: 
2020                 self
.report_error('unable to download video') 
2021             except MaxDownloadsReached
: 
2022                 self
.to_screen('[info] Maximum number of downloaded files reached.') 
2025                 if self
.params
.get('dump_single_json', False): 
2026                     self
.to_stdout(json
.dumps(res
)) 
2028         return self
._download
_retcode
 
2030     def download_with_info_file(self
, info_filename
): 
2031         with contextlib
.closing(fileinput
.FileInput( 
2032                 [info_filename
], mode
='r', 
2033                 openhook
=fileinput
.hook_encoded('utf-8'))) as f
: 
2034             # FileInput doesn't have a read method, we can't call json.load 
2035             info 
= self
.filter_requested_info(json
.loads('\n'.join(f
))) 
2037             self
.process_ie_result(info
, download
=True) 
2038         except DownloadError
: 
2039             webpage_url 
= info
.get('webpage_url') 
2040             if webpage_url 
is not None: 
2041                 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
) 
2042                 return self
.download([webpage_url
]) 
2045         return self
._download
_retcode
 
2048     def filter_requested_info(info_dict
): 
2050             (k
, v
) for k
, v 
in info_dict
.items() 
2051             if k 
not in ['requested_formats', 'requested_subtitles']) 
2053     def post_process(self
, filename
, ie_info
): 
2054         """Run all the postprocessors on the given file.""" 
2055         info 
= dict(ie_info
) 
2056         info
['filepath'] = filename
 
2058         if ie_info
.get('__postprocessors') is not None: 
2059             pps_chain
.extend(ie_info
['__postprocessors']) 
2060         pps_chain
.extend(self
._pps
) 
2061         for pp 
in pps_chain
: 
2062             files_to_delete 
= [] 
2064                 files_to_delete
, info 
= pp
.run(info
) 
2065             except PostProcessingError 
as e
: 
2066                 self
.report_error(e
.msg
) 
2067             if files_to_delete 
and not self
.params
.get('keepvideo', False): 
2068                 for old_filename 
in files_to_delete
: 
2069                     self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
) 
2071                         os
.remove(encodeFilename(old_filename
)) 
2072                     except (IOError, OSError): 
2073                         self
.report_warning('Unable to remove downloaded original file') 
2075     def _make_archive_id(self
, info_dict
): 
2076         video_id 
= info_dict
.get('id') 
2079         # Future-proof against any change in case 
2080         # and backwards compatibility with prior versions 
2081         extractor 
= info_dict
.get('extractor_key') or info_dict
.get('ie_key')  # key in a playlist 
2082         if extractor 
is None: 
2083             url 
= str_or_none(info_dict
.get('url')) 
2086             # Try to find matching extractor for the URL and take its ie_key 
2087             for ie 
in self
._ies
: 
2088                 if ie
.suitable(url
): 
2089                     extractor 
= ie
.ie_key() 
2093         return extractor
.lower() + ' ' + video_id
 
2095     def in_download_archive(self
, info_dict
): 
2096         fn 
= self
.params
.get('download_archive') 
2100         vid_id 
= self
._make
_archive
_id
(info_dict
) 
2102             return False  # Incomplete video information 
2105             with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
: 
2106                 for line 
in archive_file
: 
2107                     if line
.strip() == vid_id
: 
2109         except IOError as ioe
: 
2110             if ioe
.errno 
!= errno
.ENOENT
: 
2114     def record_download_archive(self
, info_dict
): 
2115         fn 
= self
.params
.get('download_archive') 
2118         vid_id 
= self
._make
_archive
_id
(info_dict
) 
2120         with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
: 
2121             archive_file
.write(vid_id 
+ '\n') 
2124     def format_resolution(format
, default
='unknown'): 
2125         if format
.get('vcodec') == 'none': 
2127         if format
.get('resolution') is not None: 
2128             return format
['resolution'] 
2129         if format
.get('height') is not None: 
2130             if format
.get('width') is not None: 
2131                 res 
= '%sx%s' % (format
['width'], format
['height']) 
2133                 res 
= '%sp' % format
['height'] 
2134         elif format
.get('width') is not None: 
2135             res 
= '%dx?' % format
['width'] 
2140     def _format_note(self
, fdict
): 
2142         if fdict
.get('ext') in ['f4f', 'f4m']: 
2143             res 
+= '(unsupported) ' 
2144         if fdict
.get('language'): 
2147             res 
+= '[%s] ' % fdict
['language'] 
2148         if fdict
.get('format_note') is not None: 
2149             res 
+= fdict
['format_note'] + ' ' 
2150         if fdict
.get('tbr') is not None: 
2151             res 
+= '%4dk ' % fdict
['tbr'] 
2152         if fdict
.get('container') is not None: 
2155             res 
+= '%s container' % fdict
['container'] 
2156         if (fdict
.get('vcodec') is not None 
2157                 and fdict
.get('vcodec') != 'none'): 
2160             res 
+= fdict
['vcodec'] 
2161             if fdict
.get('vbr') is not None: 
2163         elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None: 
2165         if fdict
.get('vbr') is not None: 
2166             res 
+= '%4dk' % fdict
['vbr'] 
2167         if fdict
.get('fps') is not None: 
2170             res 
+= '%sfps' % fdict
['fps'] 
2171         if fdict
.get('acodec') is not None: 
2174             if fdict
['acodec'] == 'none': 
2177                 res 
+= '%-5s' % fdict
['acodec'] 
2178         elif fdict
.get('abr') is not None: 
2182         if fdict
.get('abr') is not None: 
2183             res 
+= '@%3dk' % fdict
['abr'] 
2184         if fdict
.get('asr') is not None: 
2185             res 
+= ' (%5dHz)' % fdict
['asr'] 
2186         if fdict
.get('filesize') is not None: 
2189             res 
+= format_bytes(fdict
['filesize']) 
2190         elif fdict
.get('filesize_approx') is not None: 
2193             res 
+= '~' + format_bytes(fdict
['filesize_approx']) 
2196     def list_formats(self
, info_dict
): 
2197         formats 
= info_dict
.get('formats', [info_dict
]) 
2199             [f
['format_id'], f
['ext'], self
.format_resolution(f
), self
._format
_note
(f
)] 
2201             if f
.get('preference') is None or f
['preference'] >= -1000] 
2202         if len(formats
) > 1: 
2203             table
[-1][-1] += (' ' if table
[-1][-1] else '') + '(best)' 
2205         header_line 
= ['format code', 'extension', 'resolution', 'note'] 
2207             '[info] Available formats for %s:\n%s' % 
2208             (info_dict
['id'], render_table(header_line
, table
))) 
2210     def list_thumbnails(self
, info_dict
): 
2211         thumbnails 
= info_dict
.get('thumbnails') 
2213             self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id']) 
2217             '[info] Thumbnails for %s:' % info_dict
['id']) 
2218         self
.to_screen(render_table( 
2219             ['ID', 'width', 'height', 'URL'], 
2220             [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t 
in thumbnails
])) 
2222     def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'): 
2224             self
.to_screen('%s has no %s' % (video_id
, name
)) 
2227             'Available %s for %s:' % (name
, video_id
)) 
2228         self
.to_screen(render_table( 
2229             ['Language', 'formats'], 
2230             [[lang
, ', '.join(f
['ext'] for f 
in reversed(formats
))] 
2231                 for lang
, formats 
in subtitles
.items()])) 
2233     def urlopen(self
, req
): 
2234         """ Start an HTTP download """ 
2235         if isinstance(req
, compat_basestring
): 
2236             req 
= sanitized_Request(req
) 
2237         return self
._opener
.open(req
, timeout
=self
._socket
_timeout
) 
2239     def print_debug_header(self
): 
2240         if not self
.params
.get('verbose'): 
2243         if type('') is not compat_str
: 
2244             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326) 
2245             self
.report_warning( 
2246                 'Your Python is broken! Update to a newer and supported version') 
2248         stdout_encoding 
= getattr( 
2249             sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__) 
2251             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( 
2252                 locale
.getpreferredencoding(), 
2253                 sys
.getfilesystemencoding(), 
2255                 self
.get_encoding())) 
2256         write_string(encoding_str
, encoding
=None) 
2258         self
._write
_string
('[debug] youtube-dl version ' + __version__ 
+ '\n') 
2260             self
._write
_string
('[debug] Lazy loading extractors enabled' + '\n') 
2262             sp 
= subprocess
.Popen( 
2263                 ['git', 'rev-parse', '--short', 'HEAD'], 
2264                 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, 
2265                 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
))) 
2266             out
, err 
= sp
.communicate() 
2267             out 
= out
.decode().strip() 
2268             if re
.match('[0-9a-f]+', out
): 
2269                 self
._write
_string
('[debug] Git HEAD: ' + out 
+ '\n') 
2276         def python_implementation(): 
2277             impl_name 
= platform
.python_implementation() 
2278             if impl_name 
== 'PyPy' and hasattr(sys
, 'pypy_version_info'): 
2279                 return impl_name 
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3] 
2282         self
._write
_string
('[debug] Python version %s (%s) - %s\n' % ( 
2283             platform
.python_version(), python_implementation(), 
2286         exe_versions 
= FFmpegPostProcessor
.get_versions(self
) 
2287         exe_versions
['rtmpdump'] = rtmpdump_version() 
2288         exe_versions
['phantomjs'] = PhantomJSwrapper
._version
() 
2289         exe_str 
= ', '.join( 
2291             for exe
, v 
in sorted(exe_versions
.items()) 
2296         self
._write
_string
('[debug] exe versions: %s\n' % exe_str
) 
2299         for handler 
in self
._opener
.handlers
: 
2300             if hasattr(handler
, 'proxies'): 
2301                 proxy_map
.update(handler
.proxies
) 
2302         self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n') 
2304         if self
.params
.get('call_home', False): 
2305             ipaddr 
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8') 
2306             self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
) 
2307             latest_version 
= self
.urlopen( 
2308                 'https://yt-dl.org/latest/version').read().decode('utf-8') 
2309             if version_tuple(latest_version
) > version_tuple(__version__
): 
2310                 self
.report_warning( 
2311                     'You are using an outdated version (newest version: %s)! ' 
2312                     'See https://yt-dl.org/update if you need help updating.' % 
2315     def _setup_opener(self
): 
2316         timeout_val 
= self
.params
.get('socket_timeout') 
2317         self
._socket
_timeout 
= 600 if timeout_val 
is None else float(timeout_val
) 
2319         opts_cookiefile 
= self
.params
.get('cookiefile') 
2320         opts_proxy 
= self
.params
.get('proxy') 
2322         if opts_cookiefile 
is None: 
2323             self
.cookiejar 
= compat_cookiejar
.CookieJar() 
2325             opts_cookiefile 
= expand_path(opts_cookiefile
) 
2326             self
.cookiejar 
= YoutubeDLCookieJar(opts_cookiefile
) 
2327             if os
.access(opts_cookiefile
, os
.R_OK
): 
2328                 self
.cookiejar
.load(ignore_discard
=True, ignore_expires
=True) 
2330         cookie_processor 
= YoutubeDLCookieProcessor(self
.cookiejar
) 
2331         if opts_proxy 
is not None: 
2332             if opts_proxy 
== '': 
2335                 proxies 
= {'http': opts_proxy
, 'https': opts_proxy
} 
2337             proxies 
= compat_urllib_request
.getproxies() 
2338             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805) 
2339             if 'http' in proxies 
and 'https' not in proxies
: 
2340                 proxies
['https'] = proxies
['http'] 
2341         proxy_handler 
= PerRequestProxyHandler(proxies
) 
2343         debuglevel 
= 1 if self
.params
.get('debug_printtraffic') else 0 
2344         https_handler 
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
) 
2345         ydlh 
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
) 
2346         data_handler 
= compat_urllib_request_DataHandler() 
2348         # When passing our own FileHandler instance, build_opener won't add the 
2349         # default FileHandler and allows us to disable the file protocol, which 
2350         # can be used for malicious purposes (see 
2351         # https://github.com/ytdl-org/youtube-dl/issues/8227) 
2352         file_handler 
= compat_urllib_request
.FileHandler() 
2354         def file_open(*args
, **kwargs
): 
2355             raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') 
2356         file_handler
.file_open 
= file_open
 
2358         opener 
= compat_urllib_request
.build_opener( 
2359             proxy_handler
, https_handler
, cookie_processor
, ydlh
, data_handler
, file_handler
) 
2361         # Delete the default user-agent header, which would otherwise apply in 
2362         # cases where our custom HTTP handler doesn't come into play 
2363         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details) 
2364         opener
.addheaders 
= [] 
2365         self
._opener 
= opener
 
2367     def encode(self
, s
): 
2368         if isinstance(s
, bytes): 
2369             return s  
# Already encoded 
2372             return s
.encode(self
.get_encoding()) 
2373         except UnicodeEncodeError as err
: 
2374             err
.reason 
= err
.reason 
+ '. Check your system encoding configuration or use the --encoding option.' 
2377     def get_encoding(self
): 
2378         encoding 
= self
.params
.get('encoding') 
2379         if encoding 
is None: 
2380             encoding 
= preferredencoding() 
2383     def _write_thumbnails(self
, info_dict
, filename
): 
2384         if self
.params
.get('writethumbnail', False): 
2385             thumbnails 
= info_dict
.get('thumbnails') 
2387                 thumbnails 
= [thumbnails
[-1]] 
2388         elif self
.params
.get('write_all_thumbnails', False): 
2389             thumbnails 
= info_dict
.get('thumbnails') 
2394             # No thumbnails present, so return immediately 
2397         for t 
in thumbnails
: 
2398             thumb_ext 
= determine_ext(t
['url'], 'jpg') 
2399             suffix 
= '_%s' % t
['id'] if len(thumbnails
) > 1 else '' 
2400             thumb_display_id 
= '%s ' % t
['id'] if len(thumbnails
) > 1 else '' 
2401             t
['filename'] = thumb_filename 
= os
.path
.splitext(filename
)[0] + suffix 
+ '.' + thumb_ext
 
2403             if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(thumb_filename
)): 
2404                 self
.to_screen('[%s] %s: Thumbnail %sis already present' % 
2405                                (info_dict
['extractor'], info_dict
['id'], thumb_display_id
)) 
2407                 self
.to_screen('[%s] %s: Downloading thumbnail %s...' % 
2408                                (info_dict
['extractor'], info_dict
['id'], thumb_display_id
)) 
2410                     uf 
= self
.urlopen(t
['url']) 
2411                     with open(encodeFilename(thumb_filename
), 'wb') as thumbf
: 
2412                         shutil
.copyfileobj(uf
, thumbf
) 
2413                     self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' % 
2414                                    (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
)) 
2415                 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
2416                     self
.report_warning('Unable to download thumbnail "%s": %s' % 
2417                                         (t
['url'], error_to_compat_str(err
)))