4 from __future__ 
import absolute_import
, unicode_literals
 
  29 from string 
import ascii_letters
 
  34     compat_get_terminal_size
, 
  40     compat_tokenize_tokenize
, 
  42     compat_urllib_request
, 
  43     compat_urllib_request_DataHandler
, 
  71     PerRequestProxyHandler
, 
  76     register_socks_protocols
, 
  86     UnavailableVideoError
, 
  91     YoutubeDLCookieProcessor
, 
  94 from .cache 
import Cache
 
  95 from .extractor 
import get_info_extractor
, gen_extractor_classes
, _LAZY_LOADER
 
  96 from .extractor
.openload 
import PhantomJSwrapper
 
  97 from .downloader 
import get_suitable_downloader
 
  98 from .downloader
.rtmp 
import rtmpdump_version
 
  99 from .postprocessor 
import ( 
 102     FFmpegFixupStretchedPP
, 
 107 from .version 
import __version__
 
 109 if compat_os_name 
== 'nt': 
 113 class YoutubeDL(object): 
 116     YoutubeDL objects are the ones responsible of downloading the 
 117     actual video file and writing it to disk if the user has requested 
 118     it, among some other tasks. In most cases there should be one per 
 119     program. As, given a video URL, the downloader doesn't know how to 
 120     extract all the needed information, task that InfoExtractors do, it 
 121     has to pass the URL to one of them. 
 123     For this, YoutubeDL objects have a method that allows 
 124     InfoExtractors to be registered in a given order. When it is passed 
 125     a URL, the YoutubeDL object handles it to the first InfoExtractor it 
 126     finds that reports being able to handle it. The InfoExtractor extracts 
 127     all the information about the video or videos the URL refers to, and 
 128     YoutubeDL process the extracted information, possibly using a File 
 129     Downloader to download the video. 
 131     YoutubeDL objects accept a lot of parameters. In order not to saturate 
 132     the object constructor with arguments, it receives a dictionary of 
 133     options instead. These options are available through the params 
 134     attribute for the InfoExtractors to use. The YoutubeDL also 
 135     registers itself as the downloader in charge for the InfoExtractors 
 136     that are added to it, so this is a "mutual registration". 
 140     username:          Username for authentication purposes. 
 141     password:          Password for authentication purposes. 
 142     videopassword:     Password for accessing a video. 
 143     ap_mso:            Adobe Pass multiple-system operator identifier. 
 144     ap_username:       Multiple-system operator account username. 
 145     ap_password:       Multiple-system operator account password. 
 146     usenetrc:          Use netrc for authentication instead. 
 147     verbose:           Print additional info to stdout. 
 148     quiet:             Do not print messages to stdout. 
 149     no_warnings:       Do not print out anything for warnings. 
 150     forceurl:          Force printing final URL. 
 151     forcetitle:        Force printing title. 
 152     forceid:           Force printing ID. 
 153     forcethumbnail:    Force printing thumbnail URL. 
 154     forcedescription:  Force printing description. 
 155     forcefilename:     Force printing final filename. 
 156     forceduration:     Force printing duration. 
 157     forcejson:         Force printing info_dict as JSON. 
 158     dump_single_json:  Force printing the info_dict of the whole playlist 
 159                        (or video) as a single JSON line. 
 160     simulate:          Do not download the video files. 
 161     format:            Video format code. See options.py for more information. 
 162     outtmpl:           Template for output names. 
 163     restrictfilenames: Do not allow "&" and spaces in file names 
 164     ignoreerrors:      Do not stop on download errors. 
 165     force_generic_extractor: Force downloader to use the generic extractor 
 166     nooverwrites:      Prevent overwriting files. 
 167     playliststart:     Playlist item to start at. 
 168     playlistend:       Playlist item to end at. 
 169     playlist_items:    Specific indices of playlist to download. 
 170     playlistreverse:   Download playlist items in reverse order. 
 171     playlistrandom:    Download playlist items in random order. 
 172     matchtitle:        Download only matching titles. 
 173     rejecttitle:       Reject downloads for matching titles. 
 174     logger:            Log messages to a logging.Logger instance. 
 175     logtostderr:       Log messages to stderr instead of stdout. 
 176     writedescription:  Write the video description to a .description file 
 177     writeinfojson:     Write the video description to a .info.json file 
 178     writeannotations:  Write the video annotations to a .annotations.xml file 
 179     writethumbnail:    Write the thumbnail image to a file 
 180     write_all_thumbnails:  Write all thumbnail formats to files 
 181     writesubtitles:    Write the video subtitles to a file 
 182     writeautomaticsub: Write the automatically generated subtitles to a file 
 183     allsubtitles:      Downloads all the subtitles of the video 
 184                        (requires writesubtitles or writeautomaticsub) 
 185     listsubtitles:     Lists all available subtitles for the video 
 186     subtitlesformat:   The format code for subtitles 
 187     subtitleslangs:    List of languages of the subtitles to download 
 188     keepvideo:         Keep the video file after post-processing 
 189     daterange:         A DateRange object, download only if the upload_date is in the range. 
 190     skip_download:     Skip the actual download of the video file 
 191     cachedir:          Location of the cache files in the filesystem. 
 192                        False to disable filesystem cache. 
 193     noplaylist:        Download single video instead of a playlist if in doubt. 
 194     age_limit:         An integer representing the user's age in years. 
 195                        Unsuitable videos for the given age are skipped. 
 196     min_views:         An integer representing the minimum view count the video 
 197                        must have in order to not be skipped. 
 198                        Videos without view count information are always 
 199                        downloaded. None for no limit. 
 200     max_views:         An integer representing the maximum view count. 
 201                        Videos that are more popular than that are not 
 203                        Videos without view count information are always 
 204                        downloaded. None for no limit. 
 205     download_archive:  File name of a file where all downloads are recorded. 
 206                        Videos already present in the file are not downloaded 
 208     cookiefile:        File name where cookies should be read from and dumped to. 
 209     nocheckcertificate:Do not verify SSL certificates 
 210     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information. 
 211                        At the moment, this is only supported by YouTube. 
 212     proxy:             URL of the proxy server to use 
 213     geo_verification_proxy:  URL of the proxy to use for IP address verification 
 214                        on geo-restricted sites. (Experimental) 
 215     socket_timeout:    Time to wait for unresponsive hosts, in seconds 
 216     bidi_workaround:   Work around buggy terminals without bidirectional text 
 217                        support, using fridibi 
 218     debug_printtraffic:Print out sent and received HTTP traffic 
 219     include_ads:       Download ads as well 
 220     default_search:    Prepend this string if an input url is not valid. 
 221                        'auto' for elaborate guessing 
 222     encoding:          Use this encoding instead of the system-specified. 
 223     extract_flat:      Do not resolve URLs, return the immediate result. 
 224                        Pass in 'in_playlist' to only show this behavior for 
 226     postprocessors:    A list of dictionaries, each with an entry 
 227                        * key:  The name of the postprocessor. See 
 228                                youtube_dl/postprocessor/__init__.py for a list. 
 229                        as well as any further keyword arguments for the 
 231     progress_hooks:    A list of functions that get called on download 
 232                        progress, with a dictionary with the entries 
 233                        * status: One of "downloading", "error", or "finished". 
 234                                  Check this first and ignore unknown values. 
 236                        If status is one of "downloading", or "finished", the 
 237                        following properties may also be present: 
 238                        * filename: The final filename (always present) 
 239                        * tmpfilename: The filename we're currently writing to 
 240                        * downloaded_bytes: Bytes on disk 
 241                        * total_bytes: Size of the whole file, None if unknown 
 242                        * total_bytes_estimate: Guess of the eventual file size, 
 244                        * elapsed: The number of seconds since download started. 
 245                        * eta: The estimated time in seconds, None if unknown 
 246                        * speed: The download speed in bytes/second, None if 
 248                        * fragment_index: The counter of the currently 
 249                                          downloaded video fragment. 
 250                        * fragment_count: The number of fragments (= individual 
 251                                          files that will be merged) 
 253                        Progress hooks are guaranteed to be called at least once 
 254                        (with status "finished") if the download is successful. 
 255     merge_output_format: Extension to use when merging formats. 
 256     fixup:             Automatically correct known faults of the file. 
 258                        - "never": do nothing 
 259                        - "warn": only emit a warning 
 260                        - "detect_or_warn": check whether we can do anything 
 261                                            about it, warn otherwise (default) 
 262     source_address:    (Experimental) Client-side IP address to bind to. 
 263     call_home:         Boolean, true iff we are allowed to contact the 
 264                        youtube-dl servers for debugging. 
 265     sleep_interval:    Number of seconds to sleep before each download when 
 266                        used alone or a lower bound of a range for randomized 
 267                        sleep before each download (minimum possible number 
 268                        of seconds to sleep) when used along with 
 270     max_sleep_interval:Upper bound of a range for randomized sleep before each 
 271                        download (maximum possible number of seconds to sleep). 
 272                        Must only be used along with sleep_interval. 
 273                        Actual sleep time will be a random float from range 
 274                        [sleep_interval; max_sleep_interval]. 
 275     listformats:       Print an overview of available video formats and exit. 
 276     list_thumbnails:   Print a table of all thumbnails and exit. 
 277     match_filter:      A function that gets called with the info_dict of 
 279                        If it returns a message, the video is ignored. 
 280                        If it returns None, the video is downloaded. 
 281                        match_filter_func in utils.py is one example for this. 
 282     no_color:          Do not emit color codes in output. 
 283     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For 
 284                        HTTP header (experimental) 
 286                        Two-letter ISO 3166-2 country code that will be used for 
 287                        explicit geographic restriction bypassing via faking 
 288                        X-Forwarded-For HTTP header (experimental) 
 290     The following options determine which downloader is picked: 
 291     external_downloader: Executable of the external downloader to call. 
 292                        None or unset for standard (built-in) downloader. 
 293     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv 
 294                        if True, otherwise use ffmpeg/avconv if False, otherwise 
 295                        use downloader suggested by extractor if None. 
 297     The following parameters are not used by YoutubeDL itself, they are used by 
 298     the downloader (see youtube_dl/downloader/common.py): 
 299     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, 
 300     noresizebuffer, retries, continuedl, noprogress, consoletitle, 
 301     xattr_set_filesize, external_downloader_args, hls_use_mpegts. 
 303     The following options are used by the post processors: 
 304     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available, 
 305                        otherwise prefer avconv. 
 306     postprocessor_args: A list of additional command-line arguments for the 
 309     The following options are used by the Youtube extractor: 
 310     youtube_include_dash_manifest: If True (default), DASH manifests and related 
 311                         data will be downloaded and processed by extractor. 
 312                         You can reduce network I/O by disabling it if you don't 
 316     _NUMERIC_FIELDS 
= set(( 
 317         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', 
 318         'timestamp', 'upload_year', 'upload_month', 'upload_day', 
 319         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 
 320         'average_rating', 'comment_count', 'age_limit', 
 321         'start_time', 'end_time', 
 322         'chapter_number', 'season_number', 'episode_number', 
 323         'track_number', 'disc_number', 'release_year', 
 330     _download_retcode 
= None 
 331     _num_downloads 
= None 
 334     def __init__(self
, params
=None, auto_init
=True): 
 335         """Create a FileDownloader object with the given options.""" 
 339         self
._ies
_instances 
= {} 
 341         self
._progress
_hooks 
= [] 
 342         self
._download
_retcode 
= 0 
 343         self
._num
_downloads 
= 0 
 344         self
._screen
_file 
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)] 
 345         self
._err
_file 
= sys
.stderr
 
 348             'nocheckcertificate': False, 
 350         self
.params
.update(params
) 
 351         self
.cache 
= Cache(self
) 
 353         def check_deprecated(param
, option
, suggestion
): 
 354             if self
.params
.get(param
) is not None: 
 356                     '%s is deprecated. Use %s instead.' % (option
, suggestion
)) 
 360         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): 
 361             if self
.params
.get('geo_verification_proxy') is None: 
 362                 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy'] 
 364         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits') 
 365         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') 
 366         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') 
 368         if params
.get('bidi_workaround', False): 
 371                 master
, slave 
= pty
.openpty() 
 372                 width 
= compat_get_terminal_size().columns
 
 376                     width_args 
= ['-w', str(width
)] 
 378                     stdin
=subprocess
.PIPE
, 
 380                     stderr
=self
._err
_file
) 
 382                     self
._output
_process 
= subprocess
.Popen( 
 383                         ['bidiv'] + width_args
, **sp_kwargs
 
 386                     self
._output
_process 
= subprocess
.Popen( 
 387                         ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
) 
 388                 self
._output
_channel 
= os
.fdopen(master
, 'rb') 
 389             except OSError as ose
: 
 390                 if ose
.errno 
== errno
.ENOENT
: 
 391                     self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.') 
 395         if (sys
.platform 
!= 'win32' and 
 396                 sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and 
 397                 not params
.get('restrictfilenames', False)): 
 398             # Unicode filesystem API will throw errors (#1474, #13027) 
 400                 'Assuming --restrict-filenames since file system encoding ' 
 401                 'cannot encode all characters. ' 
 402                 'Set the LC_ALL environment variable to fix this.') 
 403             self
.params
['restrictfilenames'] = True 
 405         if isinstance(params
.get('outtmpl'), bytes): 
 407                 'Parameter outtmpl is bytes, but should be a unicode string. ' 
 408                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.') 
 413             self
.print_debug_header() 
 414             self
.add_default_info_extractors() 
 416         for pp_def_raw 
in self
.params
.get('postprocessors', []): 
 417             pp_class 
= get_postprocessor(pp_def_raw
['key']) 
 418             pp_def 
= dict(pp_def_raw
) 
 420             pp 
= pp_class(self
, **compat_kwargs(pp_def
)) 
 421             self
.add_post_processor(pp
) 
 423         for ph 
in self
.params
.get('progress_hooks', []): 
 424             self
.add_progress_hook(ph
) 
 426         register_socks_protocols() 
 428     def warn_if_short_id(self
, argv
): 
 429         # short YouTube ID starting with dash? 
 431             i 
for i
, a 
in enumerate(argv
) 
 432             if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)] 
 436                 [a 
for i
, a 
in enumerate(argv
) if i 
not in idxs
] + 
 437                 ['--'] + [argv
[i
] for i 
in idxs
] 
 440                 'Long argument string detected. ' 
 441                 'Use -- to separate parameters and URLs, like this:\n%s\n' % 
 442                 args_to_str(correct_argv
)) 
 444     def add_info_extractor(self
, ie
): 
 445         """Add an InfoExtractor object to the end of the list.""" 
 447         if not isinstance(ie
, type): 
 448             self
._ies
_instances
[ie
.ie_key()] = ie
 
 449             ie
.set_downloader(self
) 
 451     def get_info_extractor(self
, ie_key
): 
 453         Get an instance of an IE with name ie_key, it will try to get one from 
 454         the _ies list, if there's no instance it will create a new one and add 
 455         it to the extractor list. 
 457         ie 
= self
._ies
_instances
.get(ie_key
) 
 459             ie 
= get_info_extractor(ie_key
)() 
 460             self
.add_info_extractor(ie
) 
 463     def add_default_info_extractors(self
): 
 465         Add the InfoExtractors returned by gen_extractors to the end of the list 
 467         for ie 
in gen_extractor_classes(): 
 468             self
.add_info_extractor(ie
) 
 470     def add_post_processor(self
, pp
): 
 471         """Add a PostProcessor object to the end of the chain.""" 
 473         pp
.set_downloader(self
) 
 475     def add_progress_hook(self
, ph
): 
 476         """Add the progress hook (currently only for the file downloader)""" 
 477         self
._progress
_hooks
.append(ph
) 
 479     def _bidi_workaround(self
, message
): 
 480         if not hasattr(self
, '_output_channel'): 
 483         assert hasattr(self
, '_output_process') 
 484         assert isinstance(message
, compat_str
) 
 485         line_count 
= message
.count('\n') + 1 
 486         self
._output
_process
.stdin
.write((message 
+ '\n').encode('utf-8')) 
 487         self
._output
_process
.stdin
.flush() 
 488         res 
= ''.join(self
._output
_channel
.readline().decode('utf-8') 
 489                       for _ 
in range(line_count
)) 
 490         return res
[:-len('\n')] 
 492     def to_screen(self
, message
, skip_eol
=False): 
 493         """Print message to stdout if not in quiet mode.""" 
 494         return self
.to_stdout(message
, skip_eol
, check_quiet
=True) 
 496     def _write_string(self
, s
, out
=None): 
 497         write_string(s
, out
=out
, encoding
=self
.params
.get('encoding')) 
 499     def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False): 
 500         """Print message to stdout if not in quiet mode.""" 
 501         if self
.params
.get('logger'): 
 502             self
.params
['logger'].debug(message
) 
 503         elif not check_quiet 
or not self
.params
.get('quiet', False): 
 504             message 
= self
._bidi
_workaround
(message
) 
 505             terminator 
= ['\n', ''][skip_eol
] 
 506             output 
= message 
+ terminator
 
 508             self
._write
_string
(output
, self
._screen
_file
) 
 510     def to_stderr(self
, message
): 
 511         """Print message to stderr.""" 
 512         assert isinstance(message
, compat_str
) 
 513         if self
.params
.get('logger'): 
 514             self
.params
['logger'].error(message
) 
 516             message 
= self
._bidi
_workaround
(message
) 
 517             output 
= message 
+ '\n' 
 518             self
._write
_string
(output
, self
._err
_file
) 
 520     def to_console_title(self
, message
): 
 521         if not self
.params
.get('consoletitle', False): 
 523         if compat_os_name 
== 'nt': 
 524             if ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 525                 # c_wchar_p() might not be necessary if `message` is 
 526                 # already of type unicode() 
 527                 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 528         elif 'TERM' in os
.environ
: 
 529             self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
) 
 531     def save_console_title(self
): 
 532         if not self
.params
.get('consoletitle', False): 
 534         if compat_os_name 
!= 'nt' and 'TERM' in os
.environ
: 
 535             # Save the title on stack 
 536             self
._write
_string
('\033[22;0t', self
._screen
_file
) 
 538     def restore_console_title(self
): 
 539         if not self
.params
.get('consoletitle', False): 
 541         if compat_os_name 
!= 'nt' and 'TERM' in os
.environ
: 
 542             # Restore the title from stack 
 543             self
._write
_string
('\033[23;0t', self
._screen
_file
) 
 546         self
.save_console_title() 
 549     def __exit__(self
, *args
): 
 550         self
.restore_console_title() 
 552         if self
.params
.get('cookiefile') is not None: 
 553             self
.cookiejar
.save() 
 555     def trouble(self
, message
=None, tb
=None): 
 556         """Determine action to take when a download problem appears. 
 558         Depending on if the downloader has been configured to ignore 
 559         download errors or not, this method may throw an exception or 
 560         not when errors are found, after printing the message. 
 562         tb, if given, is additional traceback information. 
 564         if message 
is not None: 
 565             self
.to_stderr(message
) 
 566         if self
.params
.get('verbose'): 
 568                 if sys
.exc_info()[0]:  # if .trouble has been called from an except block 
 570                     if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]: 
 571                         tb 
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
)) 
 572                     tb 
+= encode_compat_str(traceback
.format_exc()) 
 574                     tb_data 
= traceback
.format_list(traceback
.extract_stack()) 
 575                     tb 
= ''.join(tb_data
) 
 577         if not self
.params
.get('ignoreerrors', False): 
 578             if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]: 
 579                 exc_info 
= sys
.exc_info()[1].exc_info
 
 581                 exc_info 
= sys
.exc_info() 
 582             raise DownloadError(message
, exc_info
) 
 583         self
._download
_retcode 
= 1 
 585     def report_warning(self
, message
): 
 587         Print the message to stderr, it will be prefixed with 'WARNING:' 
 588         If stderr is a tty file the 'WARNING:' will be colored 
 590         if self
.params
.get('logger') is not None: 
 591             self
.params
['logger'].warning(message
) 
 593             if self
.params
.get('no_warnings'): 
 595             if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name 
!= 'nt': 
 596                 _msg_header 
= '\033[0;33mWARNING:\033[0m' 
 598                 _msg_header 
= 'WARNING:' 
 599             warning_message 
= '%s %s' % (_msg_header
, message
) 
 600             self
.to_stderr(warning_message
) 
 602     def report_error(self
, message
, tb
=None): 
 604         Do the same as trouble, but prefixes the message with 'ERROR:', colored 
 605         in red if stderr is a tty file. 
 607         if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name 
!= 'nt': 
 608             _msg_header 
= '\033[0;31mERROR:\033[0m' 
 610             _msg_header 
= 'ERROR:' 
 611         error_message 
= '%s %s' % (_msg_header
, message
) 
 612         self
.trouble(error_message
, tb
) 
 614     def report_file_already_downloaded(self
, file_name
): 
 615         """Report file has already been fully downloaded.""" 
 617             self
.to_screen('[download] %s has already been downloaded' % file_name
) 
 618         except UnicodeEncodeError: 
 619             self
.to_screen('[download] The file has already been downloaded') 
 621     def prepare_filename(self
, info_dict
): 
 622         """Generate the output filename.""" 
 624             template_dict 
= dict(info_dict
) 
 626             template_dict
['epoch'] = int(time
.time()) 
 627             autonumber_size 
= self
.params
.get('autonumber_size') 
 628             if autonumber_size 
is None: 
 630             template_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
 
 631             if template_dict
.get('resolution') is None: 
 632                 if template_dict
.get('width') and template_dict
.get('height'): 
 633                     template_dict
['resolution'] = '%dx%d' % (template_dict
['width'], template_dict
['height']) 
 634                 elif template_dict
.get('height'): 
 635                     template_dict
['resolution'] = '%sp' % template_dict
['height'] 
 636                 elif template_dict
.get('width'): 
 637                     template_dict
['resolution'] = '%dx?' % template_dict
['width'] 
 639             sanitize 
= lambda k
, v
: sanitize_filename( 
 641                 restricted
=self
.params
.get('restrictfilenames'), 
 642                 is_id
=(k 
== 'id' or k
.endswith('_id'))) 
 643             template_dict 
= dict((k
, v 
if isinstance(v
, compat_numeric_types
) else sanitize(k
, v
)) 
 644                                  for k
, v 
in template_dict
.items() 
 645                                  if v 
is not None and not isinstance(v
, (list, tuple, dict))) 
 646             template_dict 
= collections
.defaultdict(lambda: 'NA', template_dict
) 
 648             outtmpl 
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) 
 650             # For fields playlist_index and autonumber convert all occurrences 
 651             # of %(field)s to %(field)0Nd for backward compatibility 
 652             field_size_compat_map 
= { 
 653                 'playlist_index': len(str(template_dict
['n_entries'])), 
 654                 'autonumber': autonumber_size
, 
 656             FIELD_SIZE_COMPAT_RE 
= r
'(?<!%)%\((?P<field>autonumber|playlist_index)\)s' 
 657             mobj 
= re
.search(FIELD_SIZE_COMPAT_RE
, outtmpl
) 
 660                     FIELD_SIZE_COMPAT_RE
, 
 661                     r
'%%(\1)0%dd' % field_size_compat_map
[mobj
.group('field')], 
 664             # Missing numeric fields used together with integer presentation types 
 665             # in format specification will break the argument substitution since 
 666             # string 'NA' is returned for missing fields. We will patch output 
 667             # template for missing fields to meet string presentation type. 
 668             for numeric_field 
in self
._NUMERIC
_FIELDS
: 
 669                 if numeric_field 
not in template_dict
: 
 670                     # As of [1] format syntax is: 
 671                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type 
 672                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting 
 676                         \({0}\)  # mapping key 
 677                         (?:[#0\-+ ]+)?  # conversion flags (optional) 
 678                         (?:\d+)?  # minimum field width (optional) 
 679                         (?:\.\d+)?  # precision (optional) 
 680                         [hlL]?  # length modifier (optional) 
 681                         [diouxXeEfFgGcrs%]  # conversion type 
 684                         FORMAT_RE
.format(numeric_field
), 
 685                         r
'%({0})s'.format(numeric_field
), outtmpl
) 
 687             # expand_path translates '%%' into '%' and '$$' into '$' 
 688             # correspondingly that is not what we want since we need to keep 
 689             # '%%' intact for template dict substitution step. Working around 
 690             # with boundary-alike separator hack. 
 691             sep 
= ''.join([random
.choice(ascii_letters
) for _ 
in range(32)]) 
 692             outtmpl 
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
)) 
 694             # outtmpl should be expand_path'ed before template dict substitution 
 695             # because meta fields may contain env variables we don't want to 
 696             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and 
 697             # title "Hello $PATH", we don't want `$PATH` to be expanded. 
 698             filename 
= expand_path(outtmpl
).replace(sep
, '') % template_dict
 
 700             # Temporary fix for #4787 
 701             # 'Treat' all problem characters by passing filename through preferredencoding 
 702             # to workaround encoding issues with subprocess on python2 @ Windows 
 703             if sys
.version_info 
< (3, 0) and sys
.platform 
== 'win32': 
 704                 filename 
= encodeFilename(filename
, True).decode(preferredencoding()) 
 705             return sanitize_path(filename
) 
 706         except ValueError as err
: 
 707             self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')') 
 710     def _match_entry(self
, info_dict
, incomplete
): 
 711         """ Returns None iff the file should be downloaded """ 
 713         video_title 
= info_dict
.get('title', info_dict
.get('id', 'video')) 
 714         if 'title' in info_dict
: 
 715             # This can happen when we're just evaluating the playlist 
 716             title 
= info_dict
['title'] 
 717             matchtitle 
= self
.params
.get('matchtitle', False) 
 719                 if not re
.search(matchtitle
, title
, re
.IGNORECASE
): 
 720                     return '"' + title 
+ '" title did not match pattern "' + matchtitle 
+ '"' 
 721             rejecttitle 
= self
.params
.get('rejecttitle', False) 
 723                 if re
.search(rejecttitle
, title
, re
.IGNORECASE
): 
 724                     return '"' + title 
+ '" title matched reject pattern "' + rejecttitle 
+ '"' 
 725         date 
= info_dict
.get('upload_date') 
 727             dateRange 
= self
.params
.get('daterange', DateRange()) 
 728             if date 
not in dateRange
: 
 729                 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
) 
 730         view_count 
= info_dict
.get('view_count') 
 731         if view_count 
is not None: 
 732             min_views 
= self
.params
.get('min_views') 
 733             if min_views 
is not None and view_count 
< min_views
: 
 734                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
) 
 735             max_views 
= self
.params
.get('max_views') 
 736             if max_views 
is not None and view_count 
> max_views
: 
 737                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
) 
 738         if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')): 
 739             return 'Skipping "%s" because it is age restricted' % video_title
 
 740         if self
.in_download_archive(info_dict
): 
 741             return '%s has already been recorded in archive' % video_title
 
 744             match_filter 
= self
.params
.get('match_filter') 
 745             if match_filter 
is not None: 
 746                 ret 
= match_filter(info_dict
) 
 753     def add_extra_info(info_dict
, extra_info
): 
 754         '''Set the keys from extra_info in info dict if they are missing''' 
 755         for key
, value 
in extra_info
.items(): 
 756             info_dict
.setdefault(key
, value
) 
 758     def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
={}, 
 759                      process
=True, force_generic_extractor
=False): 
 761         Returns a list with a dictionary for each video we find. 
 762         If 'download', also downloads the videos. 
 763         extra_info is a dict containing the extra values to add to each result 
 766         if not ie_key 
and force_generic_extractor
: 
 770             ies 
= [self
.get_info_extractor(ie_key
)] 
 775             if not ie
.suitable(url
): 
 778             ie 
= self
.get_info_extractor(ie
.ie_key()) 
 780                 self
.report_warning('The program functionality for this site has been marked as broken, ' 
 781                                     'and will probably not work.') 
 784                 ie_result 
= ie
.extract(url
) 
 785                 if ie_result 
is None:  # Finished already (backwards compatibility; listformats and friends should be moved here) 
 787                 if isinstance(ie_result
, list): 
 788                     # Backwards compatibility: old IE result format 
 790                         '_type': 'compat_list', 
 791                         'entries': ie_result
, 
 793                 self
.add_default_extra_info(ie_result
, ie
, url
) 
 795                     return self
.process_ie_result(ie_result
, download
, extra_info
) 
 798             except GeoRestrictedError 
as e
: 
 801                     msg 
+= '\nThis video is available in %s.' % ', '.join( 
 802                         map(ISO3166Utils
.short2full
, e
.countries
)) 
 803                 msg 
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' 
 804                 self
.report_error(msg
) 
 806             except ExtractorError 
as e
:  # An error we somewhat expected 
 807                 self
.report_error(compat_str(e
), e
.format_traceback()) 
 809             except MaxDownloadsReached
: 
 811             except Exception as e
: 
 812                 if self
.params
.get('ignoreerrors', False): 
 813                     self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc())) 
 818             self
.report_error('no suitable InfoExtractor for URL %s' % url
) 
 820     def add_default_extra_info(self
, ie_result
, ie
, url
): 
 821         self
.add_extra_info(ie_result
, { 
 822             'extractor': ie
.IE_NAME
, 
 824             'webpage_url_basename': url_basename(url
), 
 825             'extractor_key': ie
.ie_key(), 
 828     def process_ie_result(self
, ie_result
, download
=True, extra_info
={}): 
 830         Take the result of the ie(may be modified) and resolve all unresolved 
 831         references (URLs, playlist items). 
 833         It will also download the videos if 'download'. 
 834         Returns the resolved ie_result. 
 836         result_type 
= ie_result
.get('_type', 'video') 
 838         if result_type 
in ('url', 'url_transparent'): 
 839             ie_result
['url'] = sanitize_url(ie_result
['url']) 
 840             extract_flat 
= self
.params
.get('extract_flat', False) 
 841             if ((extract_flat 
== 'in_playlist' and 'playlist' in extra_info
) or 
 842                     extract_flat 
is True): 
 843                 if self
.params
.get('forcejson', False): 
 844                     self
.to_stdout(json
.dumps(ie_result
)) 
 847         if result_type 
== 'video': 
 848             self
.add_extra_info(ie_result
, extra_info
) 
 849             return self
.process_video_result(ie_result
, download
=download
) 
 850         elif result_type 
== 'url': 
 851             # We have to add extra_info to the results because it may be 
 852             # contained in a playlist 
 853             return self
.extract_info(ie_result
['url'], 
 855                                      ie_key
=ie_result
.get('ie_key'), 
 856                                      extra_info
=extra_info
) 
 857         elif result_type 
== 'url_transparent': 
 858             # Use the information from the embedding page 
 859             info 
= self
.extract_info( 
 860                 ie_result
['url'], ie_key
=ie_result
.get('ie_key'), 
 861                 extra_info
=extra_info
, download
=False, process
=False) 
 863             # extract_info may return None when ignoreerrors is enabled and 
 864             # extraction failed with an error, don't crash and return early 
 869             force_properties 
= dict( 
 870                 (k
, v
) for k
, v 
in ie_result
.items() if v 
is not None) 
 871             for f 
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): 
 872                 if f 
in force_properties
: 
 873                     del force_properties
[f
] 
 874             new_result 
= info
.copy() 
 875             new_result
.update(force_properties
) 
 877             # Extracted info may not be a video result (i.e. 
 878             # info.get('_type', 'video') != video) but rather an url or 
 879             # url_transparent. In such cases outer metadata (from ie_result) 
 880             # should be propagated to inner one (info). For this to happen 
 881             # _type of info should be overridden with url_transparent. This 
 882             # fixes issue from https://github.com/rg3/youtube-dl/pull/11163. 
 883             if new_result
.get('_type') == 'url': 
 884                 new_result
['_type'] = 'url_transparent' 
 886             return self
.process_ie_result( 
 887                 new_result
, download
=download
, extra_info
=extra_info
) 
 888         elif result_type 
in ('playlist', 'multi_video'): 
 889             # We process each entry in the playlist 
 890             playlist 
= ie_result
.get('title') or ie_result
.get('id') 
 891             self
.to_screen('[download] Downloading playlist: %s' % playlist
) 
 893             playlist_results 
= [] 
 895             playliststart 
= self
.params
.get('playliststart', 1) - 1 
 896             playlistend 
= self
.params
.get('playlistend') 
 897             # For backwards compatibility, interpret -1 as whole list 
 898             if playlistend 
== -1: 
 901             playlistitems_str 
= self
.params
.get('playlist_items') 
 903             if playlistitems_str 
is not None: 
 904                 def iter_playlistitems(format
): 
 905                     for string_segment 
in format
.split(','): 
 906                         if '-' in string_segment
: 
 907                             start
, end 
= string_segment
.split('-') 
 908                             for item 
in range(int(start
), int(end
) + 1): 
 911                             yield int(string_segment
) 
 912                 playlistitems 
= orderedSet(iter_playlistitems(playlistitems_str
)) 
 914             ie_entries 
= ie_result
['entries'] 
 916             def make_playlistitems_entries(list_ie_entries
): 
 917                 num_entries 
= len(list_ie_entries
) 
 919                     list_ie_entries
[i 
- 1] for i 
in playlistitems
 
 920                     if -num_entries 
<= i 
- 1 < num_entries
] 
 922             def report_download(num_entries
): 
 924                     '[%s] playlist %s: Downloading %d videos' % 
 925                     (ie_result
['extractor'], playlist
, num_entries
)) 
 927             if isinstance(ie_entries
, list): 
 928                 n_all_entries 
= len(ie_entries
) 
 930                     entries 
= make_playlistitems_entries(ie_entries
) 
 932                     entries 
= ie_entries
[playliststart
:playlistend
] 
 933                 n_entries 
= len(entries
) 
 935                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' % 
 936                     (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
)) 
 937             elif isinstance(ie_entries
, PagedList
): 
 940                     for item 
in playlistitems
: 
 941                         entries
.extend(ie_entries
.getslice( 
 945                     entries 
= ie_entries
.getslice( 
 946                         playliststart
, playlistend
) 
 947                 n_entries 
= len(entries
) 
 948                 report_download(n_entries
) 
 951                     entries 
= make_playlistitems_entries(list(itertools
.islice( 
 952                         ie_entries
, 0, max(playlistitems
)))) 
 954                     entries 
= list(itertools
.islice( 
 955                         ie_entries
, playliststart
, playlistend
)) 
 956                 n_entries 
= len(entries
) 
 957                 report_download(n_entries
) 
 959             if self
.params
.get('playlistreverse', False): 
 960                 entries 
= entries
[::-1] 
 962             if self
.params
.get('playlistrandom', False): 
 963                 random
.shuffle(entries
) 
 965             x_forwarded_for 
= ie_result
.get('__x_forwarded_for_ip') 
 967             for i
, entry 
in enumerate(entries
, 1): 
 968                 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
)) 
 969                 # This __x_forwarded_for_ip thing is a bit ugly but requires 
 972                     entry
['__x_forwarded_for_ip'] = x_forwarded_for
 
 974                     'n_entries': n_entries
, 
 975                     'playlist': playlist
, 
 976                     'playlist_id': ie_result
.get('id'), 
 977                     'playlist_title': ie_result
.get('title'), 
 978                     'playlist_uploader': ie_result
.get('uploader'), 
 979                     'playlist_uploader_id': ie_result
.get('uploader_id'), 
 980                     'playlist_index': i 
+ playliststart
, 
 981                     'extractor': ie_result
['extractor'], 
 982                     'webpage_url': ie_result
['webpage_url'], 
 983                     'webpage_url_basename': url_basename(ie_result
['webpage_url']), 
 984                     'extractor_key': ie_result
['extractor_key'], 
 987                 reason 
= self
._match
_entry
(entry
, incomplete
=True) 
 988                 if reason 
is not None: 
 989                     self
.to_screen('[download] ' + reason
) 
 992                 entry_result 
= self
.process_ie_result(entry
, 
 995                 playlist_results
.append(entry_result
) 
 996             ie_result
['entries'] = playlist_results
 
 997             self
.to_screen('[download] Finished downloading playlist: %s' % playlist
) 
 999         elif result_type 
== 'compat_list': 
1000             self
.report_warning( 
1001                 'Extractor %s returned a compat_list result. ' 
1002                 'It needs to be updated.' % ie_result
.get('extractor')) 
1005                 self
.add_extra_info( 
1008                         'extractor': ie_result
['extractor'], 
1009                         'webpage_url': ie_result
['webpage_url'], 
1010                         'webpage_url_basename': url_basename(ie_result
['webpage_url']), 
1011                         'extractor_key': ie_result
['extractor_key'], 
1015             ie_result
['entries'] = [ 
1016                 self
.process_ie_result(_fixup(r
), download
, extra_info
) 
1017                 for r 
in ie_result
['entries'] 
1021             raise Exception('Invalid result type: %s' % result_type
) 
1023     def _build_format_filter(self
, filter_spec
): 
1024         " Returns a function to filter the formats according to the filter_spec " 
1034         operator_rex 
= re
.compile(r
'''(?x)\s* 
1035             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps) 
1036             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
1037             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) 
1039             ''' % '|'.join(map(re
.escape
, OPERATORS
.keys()))) 
1040         m 
= operator_rex
.search(filter_spec
) 
1043                 comparison_value 
= int(m
.group('value')) 
1045                 comparison_value 
= parse_filesize(m
.group('value')) 
1046                 if comparison_value 
is None: 
1047                     comparison_value 
= parse_filesize(m
.group('value') + 'B') 
1048                 if comparison_value 
is None: 
1050                         'Invalid value %r in format specification %r' % ( 
1051                             m
.group('value'), filter_spec
)) 
1052             op 
= OPERATORS
[m
.group('op')] 
1058                 '^=': lambda attr
, value
: attr
.startswith(value
), 
1059                 '$=': lambda attr
, value
: attr
.endswith(value
), 
1060                 '*=': lambda attr
, value
: value 
in attr
, 
1062             str_operator_rex 
= re
.compile(r
'''(?x) 
1063                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id) 
1064                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)? 
1065                 \s*(?P<value>[a-zA-Z0-9._-]+) 
1067                 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys()))) 
1068             m 
= str_operator_rex
.search(filter_spec
) 
1070                 comparison_value 
= m
.group('value') 
1071                 op 
= STR_OPERATORS
[m
.group('op')] 
1074             raise ValueError('Invalid filter specification %r' % filter_spec
) 
1077             actual_value 
= f
.get(m
.group('key')) 
1078             if actual_value 
is None: 
1079                 return m
.group('none_inclusive') 
1080             return op(actual_value
, comparison_value
) 
1083     def _default_format_spec(self
, info_dict
, download
=True): 
1086             merger 
= FFmpegMergerPP(self
) 
1087             return merger
.available 
and merger
.can_merge() 
1090             if self
.params
.get('simulate', False): 
1094             if self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) == '-': 
1096             if info_dict
.get('is_live'): 
1102         req_format_list 
= ['bestvideo+bestaudio', 'best'] 
1104             req_format_list
.reverse() 
1105         return '/'.join(req_format_list
) 
1107     def build_format_selector(self
, format_spec
): 
1108         def syntax_error(note
, start
): 
1110                 'Invalid format specification: ' 
1111                 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1])) 
1112             return SyntaxError(message
) 
1114         PICKFIRST 
= 'PICKFIRST' 
1118         FormatSelector 
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters']) 
1120         def _parse_filter(tokens
): 
1122             for type, string
, start
, _
, _ 
in tokens
: 
1123                 if type == tokenize
.OP 
and string 
== ']': 
1124                     return ''.join(filter_parts
) 
1126                     filter_parts
.append(string
) 
1128         def _remove_unused_ops(tokens
): 
1129             # Remove operators that we don't use and join them with the surrounding strings 
1130             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' 
1131             ALLOWED_OPS 
= ('/', '+', ',', '(', ')') 
1132             last_string
, last_start
, last_end
, last_line 
= None, None, None, None 
1133             for type, string
, start
, end
, line 
in tokens
: 
1134                 if type == tokenize
.OP 
and string 
== '[': 
1136                         yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
 
1138                     yield type, string
, start
, end
, line
 
1139                     # everything inside brackets will be handled by _parse_filter 
1140                     for type, string
, start
, end
, line 
in tokens
: 
1141                         yield type, string
, start
, end
, line
 
1142                         if type == tokenize
.OP 
and string 
== ']': 
1144                 elif type == tokenize
.OP 
and string 
in ALLOWED_OPS
: 
1146                         yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
 
1148                     yield type, string
, start
, end
, line
 
1149                 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]: 
1151                         last_string 
= string
 
1155                         last_string 
+= string
 
1157                 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
 
1159         def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False): 
1161             current_selector 
= None 
1162             for type, string
, start
, _
, _ 
in tokens
: 
1163                 # ENCODING is only defined in python 3.x 
1164                 if type == getattr(tokenize
, 'ENCODING', None): 
1166                 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]: 
1167                     current_selector 
= FormatSelector(SINGLE
, string
, []) 
1168                 elif type == tokenize
.OP
: 
1170                         if not inside_group
: 
1171                             # ')' will be handled by the parentheses group 
1172                             tokens
.restore_last_token() 
1174                     elif inside_merge 
and string 
in ['/', ',']: 
1175                         tokens
.restore_last_token() 
1177                     elif inside_choice 
and string 
== ',': 
1178                         tokens
.restore_last_token() 
1181                         if not current_selector
: 
1182                             raise syntax_error('"," must follow a format selector', start
) 
1183                         selectors
.append(current_selector
) 
1184                         current_selector 
= None 
1186                         if not current_selector
: 
1187                             raise syntax_error('"/" must follow a format selector', start
) 
1188                         first_choice 
= current_selector
 
1189                         second_choice 
= _parse_format_selection(tokens
, inside_choice
=True) 
1190                         current_selector 
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), []) 
1192                         if not current_selector
: 
1193                             current_selector 
= FormatSelector(SINGLE
, 'best', []) 
1194                         format_filter 
= _parse_filter(tokens
) 
1195                         current_selector
.filters
.append(format_filter
) 
1197                         if current_selector
: 
1198                             raise syntax_error('Unexpected "("', start
) 
1199                         group 
= _parse_format_selection(tokens
, inside_group
=True) 
1200                         current_selector 
= FormatSelector(GROUP
, group
, []) 
1202                         video_selector 
= current_selector
 
1203                         audio_selector 
= _parse_format_selection(tokens
, inside_merge
=True) 
1204                         if not video_selector 
or not audio_selector
: 
1205                             raise syntax_error('"+" must be between two format selectors', start
) 
1206                         current_selector 
= FormatSelector(MERGE
, (video_selector
, audio_selector
), []) 
1208                         raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
) 
1209                 elif type == tokenize
.ENDMARKER
: 
1211             if current_selector
: 
1212                 selectors
.append(current_selector
) 
1215         def _build_selector_function(selector
): 
1216             if isinstance(selector
, list): 
1217                 fs 
= [_build_selector_function(s
) for s 
in selector
] 
1219                 def selector_function(ctx
): 
1221                         for format 
in f(ctx
): 
1223                 return selector_function
 
1224             elif selector
.type == GROUP
: 
1225                 selector_function 
= _build_selector_function(selector
.selector
) 
1226             elif selector
.type == PICKFIRST
: 
1227                 fs 
= [_build_selector_function(s
) for s 
in selector
.selector
] 
1229                 def selector_function(ctx
): 
1231                         picked_formats 
= list(f(ctx
)) 
1233                             return picked_formats
 
1235             elif selector
.type == SINGLE
: 
1236                 format_spec 
= selector
.selector
 
1238                 def selector_function(ctx
): 
1239                     formats 
= list(ctx
['formats']) 
1242                     if format_spec 
== 'all': 
1245                     elif format_spec 
in ['best', 'worst', None]: 
1246                         format_idx 
= 0 if format_spec 
== 'worst' else -1 
1247                         audiovideo_formats 
= [ 
1249                             if f
.get('vcodec') != 'none' and f
.get('acodec') != 'none'] 
1250                         if audiovideo_formats
: 
1251                             yield audiovideo_formats
[format_idx
] 
1252                         # for extractors with incomplete formats (audio only (soundcloud) 
1253                         # or video only (imgur)) we will fallback to best/worst 
1254                         # {video,audio}-only format 
1255                         elif ctx
['incomplete_formats']: 
1256                             yield formats
[format_idx
] 
1257                     elif format_spec 
== 'bestaudio': 
1260                             if f
.get('vcodec') == 'none'] 
1262                             yield audio_formats
[-1] 
1263                     elif format_spec 
== 'worstaudio': 
1266                             if f
.get('vcodec') == 'none'] 
1268                             yield audio_formats
[0] 
1269                     elif format_spec 
== 'bestvideo': 
1272                             if f
.get('acodec') == 'none'] 
1274                             yield video_formats
[-1] 
1275                     elif format_spec 
== 'worstvideo': 
1278                             if f
.get('acodec') == 'none'] 
1280                             yield video_formats
[0] 
1282                         extensions 
= ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] 
1283                         if format_spec 
in extensions
: 
1284                             filter_f 
= lambda f
: f
['ext'] == format_spec
 
1286                             filter_f 
= lambda f
: f
['format_id'] == format_spec
 
1287                         matches 
= list(filter(filter_f
, formats
)) 
1290             elif selector
.type == MERGE
: 
1291                 def _merge(formats_info
): 
1292                     format_1
, format_2 
= [f
['format_id'] for f 
in formats_info
] 
1293                     # The first format must contain the video and the 
1295                     if formats_info
[0].get('vcodec') == 'none': 
1296                         self
.report_error('The first format must ' 
1297                                           'contain the video, try using ' 
1298                                           '"-f %s+%s"' % (format_2
, format_1
)) 
1300                     # Formats must be opposite (video+audio) 
1301                     if formats_info
[0].get('acodec') == 'none' and formats_info
[1].get('acodec') == 'none': 
1303                             'Both formats %s and %s are video-only, you must specify "-f video+audio"' 
1304                             % (format_1
, format_2
)) 
1307                         formats_info
[0]['ext'] 
1308                         if self
.params
.get('merge_output_format') is None 
1309                         else self
.params
['merge_output_format']) 
1311                         'requested_formats': formats_info
, 
1312                         'format': '%s+%s' % (formats_info
[0].get('format'), 
1313                                              formats_info
[1].get('format')), 
1314                         'format_id': '%s+%s' % (formats_info
[0].get('format_id'), 
1315                                                 formats_info
[1].get('format_id')), 
1316                         'width': formats_info
[0].get('width'), 
1317                         'height': formats_info
[0].get('height'), 
1318                         'resolution': formats_info
[0].get('resolution'), 
1319                         'fps': formats_info
[0].get('fps'), 
1320                         'vcodec': formats_info
[0].get('vcodec'), 
1321                         'vbr': formats_info
[0].get('vbr'), 
1322                         'stretched_ratio': formats_info
[0].get('stretched_ratio'), 
1323                         'acodec': formats_info
[1].get('acodec'), 
1324                         'abr': formats_info
[1].get('abr'), 
1327                 video_selector
, audio_selector 
= map(_build_selector_function
, selector
.selector
) 
1329                 def selector_function(ctx
): 
1330                     for pair 
in itertools
.product( 
1331                             video_selector(copy
.deepcopy(ctx
)), audio_selector(copy
.deepcopy(ctx
))): 
1334             filters 
= [self
._build
_format
_filter
(f
) for f 
in selector
.filters
] 
1336             def final_selector(ctx
): 
1337                 ctx_copy 
= copy
.deepcopy(ctx
) 
1338                 for _filter 
in filters
: 
1339                     ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats'])) 
1340                 return selector_function(ctx_copy
) 
1341             return final_selector
 
1343         stream 
= io
.BytesIO(format_spec
.encode('utf-8')) 
1345             tokens 
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
))) 
1346         except tokenize
.TokenError
: 
1347             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
))) 
1349         class TokenIterator(object): 
1350             def __init__(self
, tokens
): 
1351                 self
.tokens 
= tokens
 
1358                 if self
.counter 
>= len(self
.tokens
): 
1359                     raise StopIteration() 
1360                 value 
= self
.tokens
[self
.counter
] 
1366             def restore_last_token(self
): 
1369         parsed_selector 
= _parse_format_selection(iter(TokenIterator(tokens
))) 
1370         return _build_selector_function(parsed_selector
) 
1372     def _calc_headers(self
, info_dict
): 
1373         res 
= std_headers
.copy() 
1375         add_headers 
= info_dict
.get('http_headers') 
1377             res
.update(add_headers
) 
1379         cookies 
= self
._calc
_cookies
(info_dict
) 
1381             res
['Cookie'] = cookies
 
1383         if 'X-Forwarded-For' not in res
: 
1384             x_forwarded_for_ip 
= info_dict
.get('__x_forwarded_for_ip') 
1385             if x_forwarded_for_ip
: 
1386                 res
['X-Forwarded-For'] = x_forwarded_for_ip
 
1390     def _calc_cookies(self
, info_dict
): 
1391         pr 
= sanitized_Request(info_dict
['url']) 
1392         self
.cookiejar
.add_cookie_header(pr
) 
1393         return pr
.get_header('Cookie') 
1395     def process_video_result(self
, info_dict
, download
=True): 
1396         assert info_dict
.get('_type', 'video') == 'video' 
1398         if 'id' not in info_dict
: 
1399             raise ExtractorError('Missing "id" field in extractor result') 
1400         if 'title' not in info_dict
: 
1401             raise ExtractorError('Missing "title" field in extractor result') 
1403         def report_force_conversion(field
, field_not
, conversion
): 
1404             self
.report_warning( 
1405                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor' 
1406                 % (field
, field_not
, conversion
)) 
1408         def sanitize_string_field(info
, string_field
): 
1409             field 
= info
.get(string_field
) 
1410             if field 
is None or isinstance(field
, compat_str
): 
1412             report_force_conversion(string_field
, 'a string', 'string') 
1413             info
[string_field
] = compat_str(field
) 
1415         def sanitize_numeric_fields(info
): 
1416             for numeric_field 
in self
._NUMERIC
_FIELDS
: 
1417                 field 
= info
.get(numeric_field
) 
1418                 if field 
is None or isinstance(field
, compat_numeric_types
): 
1420                 report_force_conversion(numeric_field
, 'numeric', 'int') 
1421                 info
[numeric_field
] = int_or_none(field
) 
1423         sanitize_string_field(info_dict
, 'id') 
1424         sanitize_numeric_fields(info_dict
) 
1426         if 'playlist' not in info_dict
: 
1427             # It isn't part of a playlist 
1428             info_dict
['playlist'] = None 
1429             info_dict
['playlist_index'] = None 
1431         thumbnails 
= info_dict
.get('thumbnails') 
1432         if thumbnails 
is None: 
1433             thumbnail 
= info_dict
.get('thumbnail') 
1435                 info_dict
['thumbnails'] = thumbnails 
= [{'url': thumbnail
}] 
1437             thumbnails
.sort(key
=lambda t
: ( 
1438                 t
.get('preference') if t
.get('preference') is not None else -1, 
1439                 t
.get('width') if t
.get('width') is not None else -1, 
1440                 t
.get('height') if t
.get('height') is not None else -1, 
1441                 t
.get('id') if t
.get('id') is not None else '', t
.get('url'))) 
1442             for i
, t 
in enumerate(thumbnails
): 
1443                 t
['url'] = sanitize_url(t
['url']) 
1444                 if t
.get('width') and t
.get('height'): 
1445                     t
['resolution'] = '%dx%d' % (t
['width'], t
['height']) 
1446                 if t
.get('id') is None: 
1449         if self
.params
.get('list_thumbnails'): 
1450             self
.list_thumbnails(info_dict
) 
1453         thumbnail 
= info_dict
.get('thumbnail') 
1455             info_dict
['thumbnail'] = sanitize_url(thumbnail
) 
1457             info_dict
['thumbnail'] = thumbnails
[-1]['url'] 
1459         if 'display_id' not in info_dict 
and 'id' in info_dict
: 
1460             info_dict
['display_id'] = info_dict
['id'] 
1462         if info_dict
.get('upload_date') is None and info_dict
.get('timestamp') is not None: 
1463             # Working around out-of-range timestamp values (e.g. negative ones on Windows, 
1464             # see http://bugs.python.org/issue1646728) 
1466                 upload_date 
= datetime
.datetime
.utcfromtimestamp(info_dict
['timestamp']) 
1467                 info_dict
['upload_date'] = upload_date
.strftime('%Y%m%d') 
1468             except (ValueError, OverflowError, OSError): 
1471         # Auto generate title fields corresponding to the *_number fields when missing 
1472         # in order to always have clean titles. This is very common for TV series. 
1473         for field 
in ('chapter', 'season', 'episode'): 
1474             if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
): 
1475                 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
]) 
1477         subtitles 
= info_dict
.get('subtitles') 
1479             for _
, subtitle 
in subtitles
.items(): 
1480                 for subtitle_format 
in subtitle
: 
1481                     if subtitle_format
.get('url'): 
1482                         subtitle_format
['url'] = sanitize_url(subtitle_format
['url']) 
1483                     if subtitle_format
.get('ext') is None: 
1484                         subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower() 
1486         if self
.params
.get('listsubtitles', False): 
1487             if 'automatic_captions' in info_dict
: 
1488                 self
.list_subtitles(info_dict
['id'], info_dict
.get('automatic_captions'), 'automatic captions') 
1489             self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles') 
1491         info_dict
['requested_subtitles'] = self
.process_subtitles( 
1492             info_dict
['id'], subtitles
, 
1493             info_dict
.get('automatic_captions')) 
1495         # We now pick which formats have to be downloaded 
1496         if info_dict
.get('formats') is None: 
1497             # There's only one format available 
1498             formats 
= [info_dict
] 
1500             formats 
= info_dict
['formats'] 
1503             raise ExtractorError('No video formats found!') 
1505         def is_wellformed(f
): 
1508                 self
.report_warning( 
1509                     '"url" field is missing or empty - skipping format, ' 
1510                     'there is an error in extractor') 
1512             if isinstance(url
, bytes): 
1513                 sanitize_string_field(f
, 'url') 
1516         # Filter out malformed formats for better extraction robustness 
1517         formats 
= list(filter(is_wellformed
, formats
)) 
1521         # We check that all the formats have the format and format_id fields 
1522         for i
, format 
in enumerate(formats
): 
1523             sanitize_string_field(format
, 'format_id') 
1524             sanitize_numeric_fields(format
) 
1525             format
['url'] = sanitize_url(format
['url']) 
1526             if not format
.get('format_id'): 
1527                 format
['format_id'] = compat_str(i
) 
1529                 # Sanitize format_id from characters used in format selector expression 
1530                 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id']) 
1531             format_id 
= format
['format_id'] 
1532             if format_id 
not in formats_dict
: 
1533                 formats_dict
[format_id
] = [] 
1534             formats_dict
[format_id
].append(format
) 
1536         # Make sure all formats have unique format_id 
1537         for format_id
, ambiguous_formats 
in formats_dict
.items(): 
1538             if len(ambiguous_formats
) > 1: 
1539                 for i
, format 
in enumerate(ambiguous_formats
): 
1540                     format
['format_id'] = '%s-%d' % (format_id
, i
) 
1542         for i
, format 
in enumerate(formats
): 
1543             if format
.get('format') is None: 
1544                 format
['format'] = '{id} - {res}{note}'.format( 
1545                     id=format
['format_id'], 
1546                     res
=self
.format_resolution(format
), 
1547                     note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '', 
1549             # Automatically determine file extension if missing 
1550             if format
.get('ext') is None: 
1551                 format
['ext'] = determine_ext(format
['url']).lower() 
1552             # Automatically determine protocol if missing (useful for format 
1553             # selection purposes) 
1554             if format
.get('protocol') is None: 
1555                 format
['protocol'] = determine_protocol(format
) 
1556             # Add HTTP headers, so that external programs can use them from the 
1558             full_format_info 
= info_dict
.copy() 
1559             full_format_info
.update(format
) 
1560             format
['http_headers'] = self
._calc
_headers
(full_format_info
) 
1561         # Remove private housekeeping stuff 
1562         if '__x_forwarded_for_ip' in info_dict
: 
1563             del info_dict
['__x_forwarded_for_ip'] 
1565         # TODO Central sorting goes here 
1567         if formats
[0] is not info_dict
: 
1568             # only set the 'formats' fields if the original info_dict list them 
1569             # otherwise we end up with a circular reference, the first (and unique) 
1570             # element in the 'formats' field in info_dict is info_dict itself, 
1571             # which can't be exported to json 
1572             info_dict
['formats'] = formats
 
1573         if self
.params
.get('listformats'): 
1574             self
.list_formats(info_dict
) 
1577         req_format 
= self
.params
.get('format') 
1578         if req_format 
is None: 
1579             req_format 
= self
._default
_format
_spec
(info_dict
, download
=download
) 
1580             if self
.params
.get('verbose'): 
1581                 self
.to_stdout('[debug] Default format spec: %s' % req_format
) 
1583         format_selector 
= self
.build_format_selector(req_format
) 
1585         # While in format selection we may need to have an access to the original 
1586         # format set in order to calculate some metrics or do some processing. 
1587         # For now we need to be able to guess whether original formats provided 
1588         # by extractor are incomplete or not (i.e. whether extractor provides only 
1589         # video-only or audio-only formats) for proper formats selection for 
1590         # extractors with such incomplete formats (see 
1591         # https://github.com/rg3/youtube-dl/pull/5556). 
1592         # Since formats may be filtered during format selection and may not match 
1593         # the original formats the results may be incorrect. Thus original formats 
1594         # or pre-calculated metrics should be passed to format selection routines 
1596         # We will pass a context object containing all necessary additional data 
1597         # instead of just formats. 
1598         # This fixes incorrect format selection issue (see 
1599         # https://github.com/rg3/youtube-dl/issues/10083). 
1600         incomplete_formats 
= ( 
1601             # All formats are video-only or 
1602             all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f 
in formats
) or 
1603             # all formats are audio-only 
1604             all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f 
in formats
)) 
1608             'incomplete_formats': incomplete_formats
, 
1611         formats_to_download 
= list(format_selector(ctx
)) 
1612         if not formats_to_download
: 
1613             raise ExtractorError('requested format not available', 
1617             if len(formats_to_download
) > 1: 
1618                 self
.to_screen('[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
))) 
1619             for format 
in formats_to_download
: 
1620                 new_info 
= dict(info_dict
) 
1621                 new_info
.update(format
) 
1622                 self
.process_info(new_info
) 
1623         # We update the info dict with the best quality format (backwards compatibility) 
1624         info_dict
.update(formats_to_download
[-1]) 
1627     def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
): 
1628         """Select the requested subtitles and their format""" 
1630         if normal_subtitles 
and self
.params
.get('writesubtitles'): 
1631             available_subs
.update(normal_subtitles
) 
1632         if automatic_captions 
and self
.params
.get('writeautomaticsub'): 
1633             for lang
, cap_info 
in automatic_captions
.items(): 
1634                 if lang 
not in available_subs
: 
1635                     available_subs
[lang
] = cap_info
 
1637         if (not self
.params
.get('writesubtitles') and not 
1638                 self
.params
.get('writeautomaticsub') or not 
1642         if self
.params
.get('allsubtitles', False): 
1643             requested_langs 
= available_subs
.keys() 
1645             if self
.params
.get('subtitleslangs', False): 
1646                 requested_langs 
= self
.params
.get('subtitleslangs') 
1647             elif 'en' in available_subs
: 
1648                 requested_langs 
= ['en'] 
1650                 requested_langs 
= [list(available_subs
.keys())[0]] 
1652         formats_query 
= self
.params
.get('subtitlesformat', 'best') 
1653         formats_preference 
= formats_query
.split('/') if formats_query 
else [] 
1655         for lang 
in requested_langs
: 
1656             formats 
= available_subs
.get(lang
) 
1658                 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
)) 
1660             for ext 
in formats_preference
: 
1664                 matches 
= list(filter(lambda f
: f
['ext'] == ext
, formats
)) 
1670                 self
.report_warning( 
1671                     'No subtitle format found matching "%s" for language %s, ' 
1672                     'using %s' % (formats_query
, lang
, f
['ext'])) 
1676     def process_info(self
, info_dict
): 
1677         """Process a single resolved IE result.""" 
1679         assert info_dict
.get('_type', 'video') == 'video' 
1681         max_downloads 
= self
.params
.get('max_downloads') 
1682         if max_downloads 
is not None: 
1683             if self
._num
_downloads 
>= int(max_downloads
): 
1684                 raise MaxDownloadsReached() 
1686         info_dict
['fulltitle'] = info_dict
['title'] 
1687         if len(info_dict
['title']) > 200: 
1688             info_dict
['title'] = info_dict
['title'][:197] + '...' 
1690         if 'format' not in info_dict
: 
1691             info_dict
['format'] = info_dict
['ext'] 
1693         reason 
= self
._match
_entry
(info_dict
, incomplete
=False) 
1694         if reason 
is not None: 
1695             self
.to_screen('[download] ' + reason
) 
1698         self
._num
_downloads 
+= 1 
1700         info_dict
['_filename'] = filename 
= self
.prepare_filename(info_dict
) 
1703         if self
.params
.get('forcetitle', False): 
1704             self
.to_stdout(info_dict
['fulltitle']) 
1705         if self
.params
.get('forceid', False): 
1706             self
.to_stdout(info_dict
['id']) 
1707         if self
.params
.get('forceurl', False): 
1708             if info_dict
.get('requested_formats') is not None: 
1709                 for f 
in info_dict
['requested_formats']: 
1710                     self
.to_stdout(f
['url'] + f
.get('play_path', '')) 
1712                 # For RTMP URLs, also include the playpath 
1713                 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', '')) 
1714         if self
.params
.get('forcethumbnail', False) and info_dict
.get('thumbnail') is not None: 
1715             self
.to_stdout(info_dict
['thumbnail']) 
1716         if self
.params
.get('forcedescription', False) and info_dict
.get('description') is not None: 
1717             self
.to_stdout(info_dict
['description']) 
1718         if self
.params
.get('forcefilename', False) and filename 
is not None: 
1719             self
.to_stdout(filename
) 
1720         if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None: 
1721             self
.to_stdout(formatSeconds(info_dict
['duration'])) 
1722         if self
.params
.get('forceformat', False): 
1723             self
.to_stdout(info_dict
['format']) 
1724         if self
.params
.get('forcejson', False): 
1725             self
.to_stdout(json
.dumps(info_dict
)) 
1727         # Do nothing else if in simulate mode 
1728         if self
.params
.get('simulate', False): 
1731         if filename 
is None: 
1734         def ensure_dir_exists(path
): 
1736                 dn 
= os
.path
.dirname(path
) 
1737                 if dn 
and not os
.path
.exists(dn
): 
1740             except (OSError, IOError) as err
: 
1741                 self
.report_error('unable to create directory ' + error_to_compat_str(err
)) 
1744         if not ensure_dir_exists(sanitize_path(encodeFilename(filename
))): 
1747         if self
.params
.get('writedescription', False): 
1748             descfn 
= replace_extension(filename
, 'description', info_dict
.get('ext')) 
1749             if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(descfn
)): 
1750                 self
.to_screen('[info] Video description is already present') 
1751             elif info_dict
.get('description') is None: 
1752                 self
.report_warning('There\'s no description to write.') 
1755                     self
.to_screen('[info] Writing video description to: ' + descfn
) 
1756                     with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
: 
1757                         descfile
.write(info_dict
['description']) 
1758                 except (OSError, IOError): 
1759                     self
.report_error('Cannot write description file ' + descfn
) 
1762         if self
.params
.get('writeannotations', False): 
1763             annofn 
= replace_extension(filename
, 'annotations.xml', info_dict
.get('ext')) 
1764             if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(annofn
)): 
1765                 self
.to_screen('[info] Video annotations are already present') 
1768                     self
.to_screen('[info] Writing video annotations to: ' + annofn
) 
1769                     with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
: 
1770                         annofile
.write(info_dict
['annotations']) 
1771                 except (KeyError, TypeError): 
1772                     self
.report_warning('There are no annotations to write.') 
1773                 except (OSError, IOError): 
1774                     self
.report_error('Cannot write annotations file: ' + annofn
) 
1777         subtitles_are_requested 
= any([self
.params
.get('writesubtitles', False), 
1778                                        self
.params
.get('writeautomaticsub')]) 
1780         if subtitles_are_requested 
and info_dict
.get('requested_subtitles'): 
1781             # subtitles download errors are already managed as troubles in relevant IE 
1782             # that way it will silently go on when used with unsupporting IE 
1783             subtitles 
= info_dict
['requested_subtitles'] 
1784             ie 
= self
.get_info_extractor(info_dict
['extractor_key']) 
1785             for sub_lang
, sub_info 
in subtitles
.items(): 
1786                 sub_format 
= sub_info
['ext'] 
1787                 sub_filename 
= subtitles_filename(filename
, sub_lang
, sub_format
) 
1788                 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(sub_filename
)): 
1789                     self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
)) 
1791                     self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
) 
1792                     if sub_info
.get('data') is not None: 
1794                             # Use newline='' to prevent conversion of newline characters 
1795                             # See https://github.com/rg3/youtube-dl/issues/10268 
1796                             with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
: 
1797                                 subfile
.write(sub_info
['data']) 
1798                         except (OSError, IOError): 
1799                             self
.report_error('Cannot write subtitles file ' + sub_filename
) 
1803                             sub_data 
= ie
._request
_webpage
( 
1804                                 sub_info
['url'], info_dict
['id'], note
=False).read() 
1805                             with io
.open(encodeFilename(sub_filename
), 'wb') as subfile
: 
1806                                 subfile
.write(sub_data
) 
1807                         except (ExtractorError
, IOError, OSError, ValueError) as err
: 
1808                             self
.report_warning('Unable to download subtitle for "%s": %s' % 
1809                                                 (sub_lang
, error_to_compat_str(err
))) 
1812         if self
.params
.get('writeinfojson', False): 
1813             infofn 
= replace_extension(filename
, 'info.json', info_dict
.get('ext')) 
1814             if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(infofn
)): 
1815                 self
.to_screen('[info] Video description metadata is already present') 
1817                 self
.to_screen('[info] Writing video description metadata as JSON to: ' + infofn
) 
1819                     write_json_file(self
.filter_requested_info(info_dict
), infofn
) 
1820                 except (OSError, IOError): 
1821                     self
.report_error('Cannot write metadata to JSON file ' + infofn
) 
1824         self
._write
_thumbnails
(info_dict
, filename
) 
1826         if not self
.params
.get('skip_download', False): 
1829                     fd 
= get_suitable_downloader(info
, self
.params
)(self
, self
.params
) 
1830                     for ph 
in self
._progress
_hooks
: 
1831                         fd
.add_progress_hook(ph
) 
1832                     if self
.params
.get('verbose'): 
1833                         self
.to_stdout('[debug] Invoking downloader on %r' % info
.get('url')) 
1834                     return fd
.download(name
, info
) 
1836                 if info_dict
.get('requested_formats') is not None: 
1839                     merger 
= FFmpegMergerPP(self
) 
1840                     if not merger
.available
: 
1842                         self
.report_warning('You have requested multiple ' 
1843                                             'formats but ffmpeg or avconv are not installed.' 
1844                                             ' The formats won\'t be merged.') 
1846                         postprocessors 
= [merger
] 
1848                     def compatible_formats(formats
): 
1849                         video
, audio 
= formats
 
1851                         video_ext
, audio_ext 
= audio
.get('ext'), video
.get('ext') 
1852                         if video_ext 
and audio_ext
: 
1854                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'), 
1857                             for exts 
in COMPATIBLE_EXTS
: 
1858                                 if video_ext 
in exts 
and audio_ext 
in exts
: 
1860                         # TODO: Check acodec/vcodec 
1863                     filename_real_ext 
= os
.path
.splitext(filename
)[1][1:] 
1865                         os
.path
.splitext(filename
)[0] 
1866                         if filename_real_ext 
== info_dict
['ext'] 
1868                     requested_formats 
= info_dict
['requested_formats'] 
1869                     if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
): 
1870                         info_dict
['ext'] = 'mkv' 
1871                         self
.report_warning( 
1872                             'Requested formats are incompatible for merge and will be merged into mkv.') 
1873                     # Ensure filename always has a correct extension for successful merge 
1874                     filename 
= '%s.%s' % (filename_wo_ext
, info_dict
['ext']) 
1875                     if os
.path
.exists(encodeFilename(filename
)): 
1877                             '[download] %s has already been downloaded and ' 
1878                             'merged' % filename
) 
1880                         for f 
in requested_formats
: 
1881                             new_info 
= dict(info_dict
) 
1883                             fname 
= prepend_extension( 
1884                                 self
.prepare_filename(new_info
), 
1885                                 'f%s' % f
['format_id'], new_info
['ext']) 
1886                             if not ensure_dir_exists(fname
): 
1888                             downloaded
.append(fname
) 
1889                             partial_success 
= dl(fname
, new_info
) 
1890                             success 
= success 
and partial_success
 
1891                         info_dict
['__postprocessors'] = postprocessors
 
1892                         info_dict
['__files_to_merge'] = downloaded
 
1894                     # Just a single file 
1895                     success 
= dl(filename
, info_dict
) 
1896             except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
1897                 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
)) 
1899             except (OSError, IOError) as err
: 
1900                 raise UnavailableVideoError(err
) 
1901             except (ContentTooShortError
, ) as err
: 
1902                 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
1905             if success 
and filename 
!= '-': 
1907                 fixup_policy 
= self
.params
.get('fixup') 
1908                 if fixup_policy 
is None: 
1909                     fixup_policy 
= 'detect_or_warn' 
1911                 INSTALL_FFMPEG_MESSAGE 
= 'Install ffmpeg or avconv to fix this automatically.' 
1913                 stretched_ratio 
= info_dict
.get('stretched_ratio') 
1914                 if stretched_ratio 
is not None and stretched_ratio 
!= 1: 
1915                     if fixup_policy 
== 'warn': 
1916                         self
.report_warning('%s: Non-uniform pixel ratio (%s)' % ( 
1917                             info_dict
['id'], stretched_ratio
)) 
1918                     elif fixup_policy 
== 'detect_or_warn': 
1919                         stretched_pp 
= FFmpegFixupStretchedPP(self
) 
1920                         if stretched_pp
.available
: 
1921                             info_dict
.setdefault('__postprocessors', []) 
1922                             info_dict
['__postprocessors'].append(stretched_pp
) 
1924                             self
.report_warning( 
1925                                 '%s: Non-uniform pixel ratio (%s). %s' 
1926                                 % (info_dict
['id'], stretched_ratio
, INSTALL_FFMPEG_MESSAGE
)) 
1928                         assert fixup_policy 
in ('ignore', 'never') 
1930                 if (info_dict
.get('requested_formats') is None and 
1931                         info_dict
.get('container') == 'm4a_dash'): 
1932                     if fixup_policy 
== 'warn': 
1933                         self
.report_warning( 
1934                             '%s: writing DASH m4a. ' 
1935                             'Only some players support this container.' 
1937                     elif fixup_policy 
== 'detect_or_warn': 
1938                         fixup_pp 
= FFmpegFixupM4aPP(self
) 
1939                         if fixup_pp
.available
: 
1940                             info_dict
.setdefault('__postprocessors', []) 
1941                             info_dict
['__postprocessors'].append(fixup_pp
) 
1943                             self
.report_warning( 
1944                                 '%s: writing DASH m4a. ' 
1945                                 'Only some players support this container. %s' 
1946                                 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
)) 
1948                         assert fixup_policy 
in ('ignore', 'never') 
1950                 if (info_dict
.get('protocol') == 'm3u8_native' or 
1951                         info_dict
.get('protocol') == 'm3u8' and 
1952                         self
.params
.get('hls_prefer_native')): 
1953                     if fixup_policy 
== 'warn': 
1954                         self
.report_warning('%s: malformed AAC bitstream detected.' % ( 
1956                     elif fixup_policy 
== 'detect_or_warn': 
1957                         fixup_pp 
= FFmpegFixupM3u8PP(self
) 
1958                         if fixup_pp
.available
: 
1959                             info_dict
.setdefault('__postprocessors', []) 
1960                             info_dict
['__postprocessors'].append(fixup_pp
) 
1962                             self
.report_warning( 
1963                                 '%s: malformed AAC bitstream detected. %s' 
1964                                 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
)) 
1966                         assert fixup_policy 
in ('ignore', 'never') 
1969                     self
.post_process(filename
, info_dict
) 
1970                 except (PostProcessingError
) as err
: 
1971                     self
.report_error('postprocessing: %s' % str(err
)) 
1973                 self
.record_download_archive(info_dict
) 
1975     def download(self
, url_list
): 
1976         """Download a given list of URLs.""" 
1977         outtmpl 
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) 
1978         if (len(url_list
) > 1 and 
1980                 '%' not in outtmpl 
and 
1981                 self
.params
.get('max_downloads') != 1): 
1982             raise SameFileError(outtmpl
) 
1984         for url 
in url_list
: 
1986                 # It also downloads the videos 
1987                 res 
= self
.extract_info( 
1988                     url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False)) 
1989             except UnavailableVideoError
: 
1990                 self
.report_error('unable to download video') 
1991             except MaxDownloadsReached
: 
1992                 self
.to_screen('[info] Maximum number of downloaded files reached.') 
1995                 if self
.params
.get('dump_single_json', False): 
1996                     self
.to_stdout(json
.dumps(res
)) 
1998         return self
._download
_retcode
 
2000     def download_with_info_file(self
, info_filename
): 
2001         with contextlib
.closing(fileinput
.FileInput( 
2002                 [info_filename
], mode
='r', 
2003                 openhook
=fileinput
.hook_encoded('utf-8'))) as f
: 
2004             # FileInput doesn't have a read method, we can't call json.load 
2005             info 
= self
.filter_requested_info(json
.loads('\n'.join(f
))) 
2007             self
.process_ie_result(info
, download
=True) 
2008         except DownloadError
: 
2009             webpage_url 
= info
.get('webpage_url') 
2010             if webpage_url 
is not None: 
2011                 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
) 
2012                 return self
.download([webpage_url
]) 
2015         return self
._download
_retcode
 
2018     def filter_requested_info(info_dict
): 
2020             (k
, v
) for k
, v 
in info_dict
.items() 
2021             if k 
not in ['requested_formats', 'requested_subtitles']) 
2023     def post_process(self
, filename
, ie_info
): 
2024         """Run all the postprocessors on the given file.""" 
2025         info 
= dict(ie_info
) 
2026         info
['filepath'] = filename
 
2028         if ie_info
.get('__postprocessors') is not None: 
2029             pps_chain
.extend(ie_info
['__postprocessors']) 
2030         pps_chain
.extend(self
._pps
) 
2031         for pp 
in pps_chain
: 
2032             files_to_delete 
= [] 
2034                 files_to_delete
, info 
= pp
.run(info
) 
2035             except PostProcessingError 
as e
: 
2036                 self
.report_error(e
.msg
) 
2037             if files_to_delete 
and not self
.params
.get('keepvideo', False): 
2038                 for old_filename 
in files_to_delete
: 
2039                     self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
) 
2041                         os
.remove(encodeFilename(old_filename
)) 
2042                     except (IOError, OSError): 
2043                         self
.report_warning('Unable to remove downloaded original file') 
2045     def _make_archive_id(self
, info_dict
): 
2046         # Future-proof against any change in case 
2047         # and backwards compatibility with prior versions 
2048         extractor 
= info_dict
.get('extractor_key') 
2049         if extractor 
is None: 
2050             if 'id' in info_dict
: 
2051                 extractor 
= info_dict
.get('ie_key')  # key in a playlist 
2052         if extractor 
is None: 
2053             return None  # Incomplete video information 
2054         return extractor
.lower() + ' ' + info_dict
['id'] 
2056     def in_download_archive(self
, info_dict
): 
2057         fn 
= self
.params
.get('download_archive') 
2061         vid_id 
= self
._make
_archive
_id
(info_dict
) 
2063             return False  # Incomplete video information 
2066             with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
: 
2067                 for line 
in archive_file
: 
2068                     if line
.strip() == vid_id
: 
2070         except IOError as ioe
: 
2071             if ioe
.errno 
!= errno
.ENOENT
: 
2075     def record_download_archive(self
, info_dict
): 
2076         fn 
= self
.params
.get('download_archive') 
2079         vid_id 
= self
._make
_archive
_id
(info_dict
) 
2081         with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
: 
2082             archive_file
.write(vid_id 
+ '\n') 
2085     def format_resolution(format
, default
='unknown'): 
2086         if format
.get('vcodec') == 'none': 
2088         if format
.get('resolution') is not None: 
2089             return format
['resolution'] 
2090         if format
.get('height') is not None: 
2091             if format
.get('width') is not None: 
2092                 res 
= '%sx%s' % (format
['width'], format
['height']) 
2094                 res 
= '%sp' % format
['height'] 
2095         elif format
.get('width') is not None: 
2096             res 
= '%dx?' % format
['width'] 
2101     def _format_note(self
, fdict
): 
2103         if fdict
.get('ext') in ['f4f', 'f4m']: 
2104             res 
+= '(unsupported) ' 
2105         if fdict
.get('language'): 
2108             res 
+= '[%s] ' % fdict
['language'] 
2109         if fdict
.get('format_note') is not None: 
2110             res 
+= fdict
['format_note'] + ' ' 
2111         if fdict
.get('tbr') is not None: 
2112             res 
+= '%4dk ' % fdict
['tbr'] 
2113         if fdict
.get('container') is not None: 
2116             res 
+= '%s container' % fdict
['container'] 
2117         if (fdict
.get('vcodec') is not None and 
2118                 fdict
.get('vcodec') != 'none'): 
2121             res 
+= fdict
['vcodec'] 
2122             if fdict
.get('vbr') is not None: 
2124         elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None: 
2126         if fdict
.get('vbr') is not None: 
2127             res 
+= '%4dk' % fdict
['vbr'] 
2128         if fdict
.get('fps') is not None: 
2131             res 
+= '%sfps' % fdict
['fps'] 
2132         if fdict
.get('acodec') is not None: 
2135             if fdict
['acodec'] == 'none': 
2138                 res 
+= '%-5s' % fdict
['acodec'] 
2139         elif fdict
.get('abr') is not None: 
2143         if fdict
.get('abr') is not None: 
2144             res 
+= '@%3dk' % fdict
['abr'] 
2145         if fdict
.get('asr') is not None: 
2146             res 
+= ' (%5dHz)' % fdict
['asr'] 
2147         if fdict
.get('filesize') is not None: 
2150             res 
+= format_bytes(fdict
['filesize']) 
2151         elif fdict
.get('filesize_approx') is not None: 
2154             res 
+= '~' + format_bytes(fdict
['filesize_approx']) 
2157     def list_formats(self
, info_dict
): 
2158         formats 
= info_dict
.get('formats', [info_dict
]) 
2160             [f
['format_id'], f
['ext'], self
.format_resolution(f
), self
._format
_note
(f
)] 
2162             if f
.get('preference') is None or f
['preference'] >= -1000] 
2163         if len(formats
) > 1: 
2164             table
[-1][-1] += (' ' if table
[-1][-1] else '') + '(best)' 
2166         header_line 
= ['format code', 'extension', 'resolution', 'note'] 
2168             '[info] Available formats for %s:\n%s' % 
2169             (info_dict
['id'], render_table(header_line
, table
))) 
2171     def list_thumbnails(self
, info_dict
): 
2172         thumbnails 
= info_dict
.get('thumbnails') 
2174             self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id']) 
2178             '[info] Thumbnails for %s:' % info_dict
['id']) 
2179         self
.to_screen(render_table( 
2180             ['ID', 'width', 'height', 'URL'], 
2181             [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t 
in thumbnails
])) 
2183     def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'): 
2185             self
.to_screen('%s has no %s' % (video_id
, name
)) 
2188             'Available %s for %s:' % (name
, video_id
)) 
2189         self
.to_screen(render_table( 
2190             ['Language', 'formats'], 
2191             [[lang
, ', '.join(f
['ext'] for f 
in reversed(formats
))] 
2192                 for lang
, formats 
in subtitles
.items()])) 
2194     def urlopen(self
, req
): 
2195         """ Start an HTTP download """ 
2196         if isinstance(req
, compat_basestring
): 
2197             req 
= sanitized_Request(req
) 
2198         return self
._opener
.open(req
, timeout
=self
._socket
_timeout
) 
2200     def print_debug_header(self
): 
2201         if not self
.params
.get('verbose'): 
2204         if type('') is not compat_str
: 
2205             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326) 
2206             self
.report_warning( 
2207                 'Your Python is broken! Update to a newer and supported version') 
2209         stdout_encoding 
= getattr( 
2210             sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__) 
2212             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( 
2213                 locale
.getpreferredencoding(), 
2214                 sys
.getfilesystemencoding(), 
2216                 self
.get_encoding())) 
2217         write_string(encoding_str
, encoding
=None) 
2219         self
._write
_string
('[debug] youtube-dl version ' + __version__ 
+ '\n') 
2221             self
._write
_string
('[debug] Lazy loading extractors enabled' + '\n') 
2223             sp 
= subprocess
.Popen( 
2224                 ['git', 'rev-parse', '--short', 'HEAD'], 
2225                 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, 
2226                 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
))) 
2227             out
, err 
= sp
.communicate() 
2228             out 
= out
.decode().strip() 
2229             if re
.match('[0-9a-f]+', out
): 
2230                 self
._write
_string
('[debug] Git HEAD: ' + out 
+ '\n') 
2237         def python_implementation(): 
2238             impl_name 
= platform
.python_implementation() 
2239             if impl_name 
== 'PyPy' and hasattr(sys
, 'pypy_version_info'): 
2240                 return impl_name 
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3] 
2243         self
._write
_string
('[debug] Python version %s (%s) - %s\n' % ( 
2244             platform
.python_version(), python_implementation(), 
2247         exe_versions 
= FFmpegPostProcessor
.get_versions(self
) 
2248         exe_versions
['rtmpdump'] = rtmpdump_version() 
2249         exe_versions
['phantomjs'] = PhantomJSwrapper
._version
() 
2250         exe_str 
= ', '.join( 
2252             for exe
, v 
in sorted(exe_versions
.items()) 
2257         self
._write
_string
('[debug] exe versions: %s\n' % exe_str
) 
2260         for handler 
in self
._opener
.handlers
: 
2261             if hasattr(handler
, 'proxies'): 
2262                 proxy_map
.update(handler
.proxies
) 
2263         self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n') 
2265         if self
.params
.get('call_home', False): 
2266             ipaddr 
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8') 
2267             self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
) 
2268             latest_version 
= self
.urlopen( 
2269                 'https://yt-dl.org/latest/version').read().decode('utf-8') 
2270             if version_tuple(latest_version
) > version_tuple(__version__
): 
2271                 self
.report_warning( 
2272                     'You are using an outdated version (newest version: %s)! ' 
2273                     'See https://yt-dl.org/update if you need help updating.' % 
2276     def _setup_opener(self
): 
2277         timeout_val 
= self
.params
.get('socket_timeout') 
2278         self
._socket
_timeout 
= 600 if timeout_val 
is None else float(timeout_val
) 
2280         opts_cookiefile 
= self
.params
.get('cookiefile') 
2281         opts_proxy 
= self
.params
.get('proxy') 
2283         if opts_cookiefile 
is None: 
2284             self
.cookiejar 
= compat_cookiejar
.CookieJar() 
2286             opts_cookiefile 
= expand_path(opts_cookiefile
) 
2287             self
.cookiejar 
= compat_cookiejar
.MozillaCookieJar( 
2289             if os
.access(opts_cookiefile
, os
.R_OK
): 
2290                 self
.cookiejar
.load() 
2292         cookie_processor 
= YoutubeDLCookieProcessor(self
.cookiejar
) 
2293         if opts_proxy 
is not None: 
2294             if opts_proxy 
== '': 
2297                 proxies 
= {'http': opts_proxy
, 'https': opts_proxy
} 
2299             proxies 
= compat_urllib_request
.getproxies() 
2300             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) 
2301             if 'http' in proxies 
and 'https' not in proxies
: 
2302                 proxies
['https'] = proxies
['http'] 
2303         proxy_handler 
= PerRequestProxyHandler(proxies
) 
2305         debuglevel 
= 1 if self
.params
.get('debug_printtraffic') else 0 
2306         https_handler 
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
) 
2307         ydlh 
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
) 
2308         data_handler 
= compat_urllib_request_DataHandler() 
2310         # When passing our own FileHandler instance, build_opener won't add the 
2311         # default FileHandler and allows us to disable the file protocol, which 
2312         # can be used for malicious purposes (see 
2313         # https://github.com/rg3/youtube-dl/issues/8227) 
2314         file_handler 
= compat_urllib_request
.FileHandler() 
2316         def file_open(*args
, **kwargs
): 
2317             raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') 
2318         file_handler
.file_open 
= file_open
 
2320         opener 
= compat_urllib_request
.build_opener( 
2321             proxy_handler
, https_handler
, cookie_processor
, ydlh
, data_handler
, file_handler
) 
2323         # Delete the default user-agent header, which would otherwise apply in 
2324         # cases where our custom HTTP handler doesn't come into play 
2325         # (See https://github.com/rg3/youtube-dl/issues/1309 for details) 
2326         opener
.addheaders 
= [] 
2327         self
._opener 
= opener
 
2329     def encode(self
, s
): 
2330         if isinstance(s
, bytes): 
2331             return s  
# Already encoded 
2334             return s
.encode(self
.get_encoding()) 
2335         except UnicodeEncodeError as err
: 
2336             err
.reason 
= err
.reason 
+ '. Check your system encoding configuration or use the --encoding option.' 
2339     def get_encoding(self
): 
2340         encoding 
= self
.params
.get('encoding') 
2341         if encoding 
is None: 
2342             encoding 
= preferredencoding() 
2345     def _write_thumbnails(self
, info_dict
, filename
): 
2346         if self
.params
.get('writethumbnail', False): 
2347             thumbnails 
= info_dict
.get('thumbnails') 
2349                 thumbnails 
= [thumbnails
[-1]] 
2350         elif self
.params
.get('write_all_thumbnails', False): 
2351             thumbnails 
= info_dict
.get('thumbnails') 
2356             # No thumbnails present, so return immediately 
2359         for t 
in thumbnails
: 
2360             thumb_ext 
= determine_ext(t
['url'], 'jpg') 
2361             suffix 
= '_%s' % t
['id'] if len(thumbnails
) > 1 else '' 
2362             thumb_display_id 
= '%s ' % t
['id'] if len(thumbnails
) > 1 else '' 
2363             t
['filename'] = thumb_filename 
= os
.path
.splitext(filename
)[0] + suffix 
+ '.' + thumb_ext
 
2365             if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(thumb_filename
)): 
2366                 self
.to_screen('[%s] %s: Thumbnail %sis already present' % 
2367                                (info_dict
['extractor'], info_dict
['id'], thumb_display_id
)) 
2369                 self
.to_screen('[%s] %s: Downloading thumbnail %s...' % 
2370                                (info_dict
['extractor'], info_dict
['id'], thumb_display_id
)) 
2372                     uf 
= self
.urlopen(t
['url']) 
2373                     with open(encodeFilename(thumb_filename
), 'wb') as thumbf
: 
2374                         shutil
.copyfileobj(uf
, thumbf
) 
2375                     self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' % 
2376                                    (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
)) 
2377                 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
2378                     self
.report_warning('Unable to download thumbnail "%s": %s' % 
2379                                         (t
['url'], error_to_compat_str(err
)))