2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
, unicode_literals
33 compat_get_terminal_size
,
37 compat_tokenize_tokenize
,
39 compat_urllib_request
,
40 compat_urllib_request_DataHandler
,
58 PerRequestProxyHandler
,
68 UnavailableVideoError
,
73 YoutubeDLCookieProcessor
,
80 from .cache
import Cache
81 from .extractor
import get_info_extractor
, gen_extractors
82 from .downloader
import get_suitable_downloader
83 from .downloader
.rtmp
import rtmpdump_version
84 from .postprocessor
import (
86 FFmpegFixupStretchedPP
,
91 from .version
import __version__
94 class YoutubeDL(object):
97 YoutubeDL objects are the ones responsible of downloading the
98 actual video file and writing it to disk if the user has requested
99 it, among some other tasks. In most cases there should be one per
100 program. As, given a video URL, the downloader doesn't know how to
101 extract all the needed information, task that InfoExtractors do, it
102 has to pass the URL to one of them.
104 For this, YoutubeDL objects have a method that allows
105 InfoExtractors to be registered in a given order. When it is passed
106 a URL, the YoutubeDL object handles it to the first InfoExtractor it
107 finds that reports being able to handle it. The InfoExtractor extracts
108 all the information about the video or videos the URL refers to, and
109 YoutubeDL process the extracted information, possibly using a File
110 Downloader to download the video.
112 YoutubeDL objects accept a lot of parameters. In order not to saturate
113 the object constructor with arguments, it receives a dictionary of
114 options instead. These options are available through the params
115 attribute for the InfoExtractors to use. The YoutubeDL also
116 registers itself as the downloader in charge for the InfoExtractors
117 that are added to it, so this is a "mutual registration".
121 username: Username for authentication purposes.
122 password: Password for authentication purposes.
123 videopassword: Password for accessing a video.
124 usenetrc: Use netrc for authentication instead.
125 verbose: Print additional info to stdout.
126 quiet: Do not print messages to stdout.
127 no_warnings: Do not print out anything for warnings.
128 forceurl: Force printing final URL.
129 forcetitle: Force printing title.
130 forceid: Force printing ID.
131 forcethumbnail: Force printing thumbnail URL.
132 forcedescription: Force printing description.
133 forcefilename: Force printing final filename.
134 forceduration: Force printing duration.
135 forcejson: Force printing info_dict as JSON.
136 dump_single_json: Force printing the info_dict of the whole playlist
137 (or video) as a single JSON line.
138 simulate: Do not download the video files.
139 format: Video format code. See options.py for more information.
140 outtmpl: Template for output names.
141 restrictfilenames: Do not allow "&" and spaces in file names
142 ignoreerrors: Do not stop on download errors.
143 force_generic_extractor: Force downloader to use the generic extractor
144 nooverwrites: Prevent overwriting files.
145 playliststart: Playlist item to start at.
146 playlistend: Playlist item to end at.
147 playlist_items: Specific indices of playlist to download.
148 playlistreverse: Download playlist items in reverse order.
149 matchtitle: Download only matching titles.
150 rejecttitle: Reject downloads for matching titles.
151 logger: Log messages to a logging.Logger instance.
152 logtostderr: Log messages to stderr instead of stdout.
153 writedescription: Write the video description to a .description file
154 writeinfojson: Write the video description to a .info.json file
155 writeannotations: Write the video annotations to a .annotations.xml file
156 writethumbnail: Write the thumbnail image to a file
157 write_all_thumbnails: Write all thumbnail formats to files
158 writesubtitles: Write the video subtitles to a file
159 writeautomaticsub: Write the automatic subtitles to a file
160 allsubtitles: Downloads all the subtitles of the video
161 (requires writesubtitles or writeautomaticsub)
162 listsubtitles: Lists all available subtitles for the video
163 subtitlesformat: The format code for subtitles
164 subtitleslangs: List of languages of the subtitles to download
165 keepvideo: Keep the video file after post-processing
166 daterange: A DateRange object, download only if the upload_date is in the range.
167 skip_download: Skip the actual download of the video file
168 cachedir: Location of the cache files in the filesystem.
169 False to disable filesystem cache.
170 noplaylist: Download single video instead of a playlist if in doubt.
171 age_limit: An integer representing the user's age in years.
172 Unsuitable videos for the given age are skipped.
173 min_views: An integer representing the minimum view count the video
174 must have in order to not be skipped.
175 Videos without view count information are always
176 downloaded. None for no limit.
177 max_views: An integer representing the maximum view count.
178 Videos that are more popular than that are not
180 Videos without view count information are always
181 downloaded. None for no limit.
182 download_archive: File name of a file where all downloads are recorded.
183 Videos already present in the file are not downloaded
185 cookiefile: File name where cookies should be read from and dumped to.
186 nocheckcertificate:Do not verify SSL certificates
187 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
188 At the moment, this is only supported by YouTube.
189 proxy: URL of the proxy server to use
190 cn_verification_proxy: URL of the proxy to use for IP address verification
191 on Chinese sites. (Experimental)
192 socket_timeout: Time to wait for unresponsive hosts, in seconds
193 bidi_workaround: Work around buggy terminals without bidirectional text
194 support, using fridibi
195 debug_printtraffic:Print out sent and received HTTP traffic
196 include_ads: Download ads as well
197 default_search: Prepend this string if an input url is not valid.
198 'auto' for elaborate guessing
199 encoding: Use this encoding instead of the system-specified.
200 extract_flat: Do not resolve URLs, return the immediate result.
201 Pass in 'in_playlist' to only show this behavior for
203 postprocessors: A list of dictionaries, each with an entry
204 * key: The name of the postprocessor. See
205 youtube_dl/postprocessor/__init__.py for a list.
206 as well as any further keyword arguments for the
208 progress_hooks: A list of functions that get called on download
209 progress, with a dictionary with the entries
210 * status: One of "downloading", "error", or "finished".
211 Check this first and ignore unknown values.
213 If status is one of "downloading", or "finished", the
214 following properties may also be present:
215 * filename: The final filename (always present)
216 * tmpfilename: The filename we're currently writing to
217 * downloaded_bytes: Bytes on disk
218 * total_bytes: Size of the whole file, None if unknown
219 * total_bytes_estimate: Guess of the eventual file size,
221 * elapsed: The number of seconds since download started.
222 * eta: The estimated time in seconds, None if unknown
223 * speed: The download speed in bytes/second, None if
225 * fragment_index: The counter of the currently
226 downloaded video fragment.
227 * fragment_count: The number of fragments (= individual
228 files that will be merged)
230 Progress hooks are guaranteed to be called at least once
231 (with status "finished") if the download is successful.
232 merge_output_format: Extension to use when merging formats.
233 fixup: Automatically correct known faults of the file.
235 - "never": do nothing
236 - "warn": only emit a warning
237 - "detect_or_warn": check whether we can do anything
238 about it, warn otherwise (default)
239 source_address: (Experimental) Client-side IP address to bind to.
240 call_home: Boolean, true iff we are allowed to contact the
241 youtube-dl servers for debugging.
242 sleep_interval: Number of seconds to sleep before each download.
243 listformats: Print an overview of available video formats and exit.
244 list_thumbnails: Print a table of all thumbnails and exit.
245 match_filter: A function that gets called with the info_dict of
247 If it returns a message, the video is ignored.
248 If it returns None, the video is downloaded.
249 match_filter_func in utils.py is one example for this.
250 no_color: Do not emit color codes in output.
252 The following options determine which downloader is picked:
253 external_downloader: Executable of the external downloader to call.
254 None or unset for standard (built-in) downloader.
255 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
257 The following parameters are not used by YoutubeDL itself, they are used by
258 the downloader (see youtube_dl/downloader/common.py):
259 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
260 noresizebuffer, retries, continuedl, noprogress, consoletitle,
261 xattr_set_filesize, external_downloader_args.
263 The following options are used by the post processors:
264 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
265 otherwise prefer avconv.
266 postprocessor_args: A list of additional command-line arguments for the
273 _download_retcode
= None
274 _num_downloads
= None
277 def __init__(self
, params
=None, auto_init
=True):
278 """Create a FileDownloader object with the given options."""
282 self
._ies
_instances
= {}
284 self
._progress
_hooks
= []
285 self
._download
_retcode
= 0
286 self
._num
_downloads
= 0
287 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
288 self
._err
_file
= sys
.stderr
291 'nocheckcertificate': False,
293 self
.params
.update(params
)
294 self
.cache
= Cache(self
)
296 if params
.get('bidi_workaround', False):
299 master
, slave
= pty
.openpty()
300 width
= compat_get_terminal_size().columns
304 width_args
= ['-w', str(width
)]
306 stdin
=subprocess
.PIPE
,
308 stderr
=self
._err
_file
)
310 self
._output
_process
= subprocess
.Popen(
311 ['bidiv'] + width_args
, **sp_kwargs
314 self
._output
_process
= subprocess
.Popen(
315 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
316 self
._output
_channel
= os
.fdopen(master
, 'rb')
317 except OSError as ose
:
319 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
323 if (sys
.version_info
>= (3,) and sys
.platform
!= 'win32' and
324 sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
325 not params
.get('restrictfilenames', False)):
326 # On Python 3, the Unicode filesystem API will throw errors (#1474)
328 'Assuming --restrict-filenames since file system encoding '
329 'cannot encode all characters. '
330 'Set the LC_ALL environment variable to fix this.')
331 self
.params
['restrictfilenames'] = True
333 if isinstance(params
.get('outtmpl'), bytes):
335 'Parameter outtmpl is bytes, but should be a unicode string. '
336 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
341 self
.print_debug_header()
342 self
.add_default_info_extractors()
344 for pp_def_raw
in self
.params
.get('postprocessors', []):
345 pp_class
= get_postprocessor(pp_def_raw
['key'])
346 pp_def
= dict(pp_def_raw
)
348 pp
= pp_class(self
, **compat_kwargs(pp_def
))
349 self
.add_post_processor(pp
)
351 for ph
in self
.params
.get('progress_hooks', []):
352 self
.add_progress_hook(ph
)
354 def warn_if_short_id(self
, argv
):
355 # short YouTube ID starting with dash?
357 i
for i
, a
in enumerate(argv
)
358 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
362 [a
for i
, a
in enumerate(argv
) if i
not in idxs
] +
363 ['--'] + [argv
[i
] for i
in idxs
]
366 'Long argument string detected. '
367 'Use -- to separate parameters and URLs, like this:\n%s\n' %
368 args_to_str(correct_argv
))
370 def add_info_extractor(self
, ie
):
371 """Add an InfoExtractor object to the end of the list."""
373 self
._ies
_instances
[ie
.ie_key()] = ie
374 ie
.set_downloader(self
)
376 def get_info_extractor(self
, ie_key
):
378 Get an instance of an IE with name ie_key, it will try to get one from
379 the _ies list, if there's no instance it will create a new one and add
380 it to the extractor list.
382 ie
= self
._ies
_instances
.get(ie_key
)
384 ie
= get_info_extractor(ie_key
)()
385 self
.add_info_extractor(ie
)
388 def add_default_info_extractors(self
):
390 Add the InfoExtractors returned by gen_extractors to the end of the list
392 for ie
in gen_extractors():
393 self
.add_info_extractor(ie
)
395 def add_post_processor(self
, pp
):
396 """Add a PostProcessor object to the end of the chain."""
398 pp
.set_downloader(self
)
400 def add_progress_hook(self
, ph
):
401 """Add the progress hook (currently only for the file downloader)"""
402 self
._progress
_hooks
.append(ph
)
404 def _bidi_workaround(self
, message
):
405 if not hasattr(self
, '_output_channel'):
408 assert hasattr(self
, '_output_process')
409 assert isinstance(message
, compat_str
)
410 line_count
= message
.count('\n') + 1
411 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
412 self
._output
_process
.stdin
.flush()
413 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
414 for _
in range(line_count
))
415 return res
[:-len('\n')]
417 def to_screen(self
, message
, skip_eol
=False):
418 """Print message to stdout if not in quiet mode."""
419 return self
.to_stdout(message
, skip_eol
, check_quiet
=True)
421 def _write_string(self
, s
, out
=None):
422 write_string(s
, out
=out
, encoding
=self
.params
.get('encoding'))
424 def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False):
425 """Print message to stdout if not in quiet mode."""
426 if self
.params
.get('logger'):
427 self
.params
['logger'].debug(message
)
428 elif not check_quiet
or not self
.params
.get('quiet', False):
429 message
= self
._bidi
_workaround
(message
)
430 terminator
= ['\n', ''][skip_eol
]
431 output
= message
+ terminator
433 self
._write
_string
(output
, self
._screen
_file
)
435 def to_stderr(self
, message
):
436 """Print message to stderr."""
437 assert isinstance(message
, compat_str
)
438 if self
.params
.get('logger'):
439 self
.params
['logger'].error(message
)
441 message
= self
._bidi
_workaround
(message
)
442 output
= message
+ '\n'
443 self
._write
_string
(output
, self
._err
_file
)
445 def to_console_title(self
, message
):
446 if not self
.params
.get('consoletitle', False):
448 if os
.name
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow():
449 # c_wchar_p() might not be necessary if `message` is
450 # already of type unicode()
451 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
452 elif 'TERM' in os
.environ
:
453 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
455 def save_console_title(self
):
456 if not self
.params
.get('consoletitle', False):
458 if 'TERM' in os
.environ
:
459 # Save the title on stack
460 self
._write
_string
('\033[22;0t', self
._screen
_file
)
462 def restore_console_title(self
):
463 if not self
.params
.get('consoletitle', False):
465 if 'TERM' in os
.environ
:
466 # Restore the title from stack
467 self
._write
_string
('\033[23;0t', self
._screen
_file
)
470 self
.save_console_title()
473 def __exit__(self
, *args
):
474 self
.restore_console_title()
476 if self
.params
.get('cookiefile') is not None:
477 self
.cookiejar
.save()
479 def trouble(self
, message
=None, tb
=None):
480 """Determine action to take when a download problem appears.
482 Depending on if the downloader has been configured to ignore
483 download errors or not, this method may throw an exception or
484 not when errors are found, after printing the message.
486 tb, if given, is additional traceback information.
488 if message
is not None:
489 self
.to_stderr(message
)
490 if self
.params
.get('verbose'):
492 if sys
.exc_info()[0]: # if .trouble has been called from an except block
494 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
495 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
496 tb
+= compat_str(traceback
.format_exc())
498 tb_data
= traceback
.format_list(traceback
.extract_stack())
499 tb
= ''.join(tb_data
)
501 if not self
.params
.get('ignoreerrors', False):
502 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
503 exc_info
= sys
.exc_info()[1].exc_info
505 exc_info
= sys
.exc_info()
506 raise DownloadError(message
, exc_info
)
507 self
._download
_retcode
= 1
509 def report_warning(self
, message
):
511 Print the message to stderr, it will be prefixed with 'WARNING:'
512 If stderr is a tty file the 'WARNING:' will be colored
514 if self
.params
.get('logger') is not None:
515 self
.params
['logger'].warning(message
)
517 if self
.params
.get('no_warnings'):
519 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and os
.name
!= 'nt':
520 _msg_header
= '\033[0;33mWARNING:\033[0m'
522 _msg_header
= 'WARNING:'
523 warning_message
= '%s %s' % (_msg_header
, message
)
524 self
.to_stderr(warning_message
)
526 def report_error(self
, message
, tb
=None):
528 Do the same as trouble, but prefixes the message with 'ERROR:', colored
529 in red if stderr is a tty file.
531 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and os
.name
!= 'nt':
532 _msg_header
= '\033[0;31mERROR:\033[0m'
534 _msg_header
= 'ERROR:'
535 error_message
= '%s %s' % (_msg_header
, message
)
536 self
.trouble(error_message
, tb
)
538 def report_file_already_downloaded(self
, file_name
):
539 """Report file has already been fully downloaded."""
541 self
.to_screen('[download] %s has already been downloaded' % file_name
)
542 except UnicodeEncodeError:
543 self
.to_screen('[download] The file has already been downloaded')
545 def prepare_filename(self
, info_dict
):
546 """Generate the output filename."""
548 template_dict
= dict(info_dict
)
550 template_dict
['epoch'] = int(time
.time())
551 autonumber_size
= self
.params
.get('autonumber_size')
552 if autonumber_size
is None:
554 autonumber_templ
= '%0' + str(autonumber_size
) + 'd'
555 template_dict
['autonumber'] = autonumber_templ
% self
._num
_downloads
556 if template_dict
.get('playlist_index') is not None:
557 template_dict
['playlist_index'] = '%0*d' % (len(str(template_dict
['n_entries'])), template_dict
['playlist_index'])
558 if template_dict
.get('resolution') is None:
559 if template_dict
.get('width') and template_dict
.get('height'):
560 template_dict
['resolution'] = '%dx%d' % (template_dict
['width'], template_dict
['height'])
561 elif template_dict
.get('height'):
562 template_dict
['resolution'] = '%sp' % template_dict
['height']
563 elif template_dict
.get('width'):
564 template_dict
['resolution'] = '?x%d' % template_dict
['width']
566 sanitize
= lambda k
, v
: sanitize_filename(
568 restricted
=self
.params
.get('restrictfilenames'),
570 template_dict
= dict((k
, sanitize(k
, v
))
571 for k
, v
in template_dict
.items()
573 template_dict
= collections
.defaultdict(lambda: 'NA', template_dict
)
575 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
576 tmpl
= compat_expanduser(outtmpl
)
577 filename
= tmpl
% template_dict
578 # Temporary fix for #4787
579 # 'Treat' all problem characters by passing filename through preferredencoding
580 # to workaround encoding issues with subprocess on python2 @ Windows
581 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
582 filename
= encodeFilename(filename
, True).decode(preferredencoding())
583 return sanitize_path(filename
)
584 except ValueError as err
:
585 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
588 def _match_entry(self
, info_dict
, incomplete
):
589 """ Returns None iff the file should be downloaded """
591 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
592 if 'title' in info_dict
:
593 # This can happen when we're just evaluating the playlist
594 title
= info_dict
['title']
595 matchtitle
= self
.params
.get('matchtitle', False)
597 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
598 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
599 rejecttitle
= self
.params
.get('rejecttitle', False)
601 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
602 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
603 date
= info_dict
.get('upload_date', None)
605 dateRange
= self
.params
.get('daterange', DateRange())
606 if date
not in dateRange
:
607 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
608 view_count
= info_dict
.get('view_count', None)
609 if view_count
is not None:
610 min_views
= self
.params
.get('min_views')
611 if min_views
is not None and view_count
< min_views
:
612 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
613 max_views
= self
.params
.get('max_views')
614 if max_views
is not None and view_count
> max_views
:
615 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
616 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
617 return 'Skipping "%s" because it is age restricted' % video_title
618 if self
.in_download_archive(info_dict
):
619 return '%s has already been recorded in archive' % video_title
622 match_filter
= self
.params
.get('match_filter')
623 if match_filter
is not None:
624 ret
= match_filter(info_dict
)
631 def add_extra_info(info_dict
, extra_info
):
632 '''Set the keys from extra_info in info dict if they are missing'''
633 for key
, value
in extra_info
.items():
634 info_dict
.setdefault(key
, value
)
636 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
={},
637 process
=True, force_generic_extractor
=False):
639 Returns a list with a dictionary for each video we find.
640 If 'download', also downloads the videos.
641 extra_info is a dict containing the extra values to add to each result
644 if not ie_key
and force_generic_extractor
:
648 ies
= [self
.get_info_extractor(ie_key
)]
653 if not ie
.suitable(url
):
657 self
.report_warning('The program functionality for this site has been marked as broken, '
658 'and will probably not work.')
661 ie_result
= ie
.extract(url
)
662 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
664 if isinstance(ie_result
, list):
665 # Backwards compatibility: old IE result format
667 '_type': 'compat_list',
668 'entries': ie_result
,
670 self
.add_default_extra_info(ie_result
, ie
, url
)
672 return self
.process_ie_result(ie_result
, download
, extra_info
)
675 except ExtractorError
as de
: # An error we somewhat expected
676 self
.report_error(compat_str(de
), de
.format_traceback())
678 except MaxDownloadsReached
:
680 except Exception as e
:
681 if self
.params
.get('ignoreerrors', False):
682 self
.report_error(compat_str(e
), tb
=compat_str(traceback
.format_exc()))
687 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
689 def add_default_extra_info(self
, ie_result
, ie
, url
):
690 self
.add_extra_info(ie_result
, {
691 'extractor': ie
.IE_NAME
,
693 'webpage_url_basename': url_basename(url
),
694 'extractor_key': ie
.ie_key(),
697 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
699 Take the result of the ie(may be modified) and resolve all unresolved
700 references (URLs, playlist items).
702 It will also download the videos if 'download'.
703 Returns the resolved ie_result.
706 result_type
= ie_result
.get('_type', 'video')
708 if result_type
in ('url', 'url_transparent'):
709 extract_flat
= self
.params
.get('extract_flat', False)
710 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
) or
711 extract_flat
is True):
712 if self
.params
.get('forcejson', False):
713 self
.to_stdout(json
.dumps(ie_result
))
716 if result_type
== 'video':
717 self
.add_extra_info(ie_result
, extra_info
)
718 return self
.process_video_result(ie_result
, download
=download
)
719 elif result_type
== 'url':
720 # We have to add extra_info to the results because it may be
721 # contained in a playlist
722 return self
.extract_info(ie_result
['url'],
724 ie_key
=ie_result
.get('ie_key'),
725 extra_info
=extra_info
)
726 elif result_type
== 'url_transparent':
727 # Use the information from the embedding page
728 info
= self
.extract_info(
729 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
730 extra_info
=extra_info
, download
=False, process
=False)
732 force_properties
= dict(
733 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
734 for f
in ('_type', 'url'):
735 if f
in force_properties
:
736 del force_properties
[f
]
737 new_result
= info
.copy()
738 new_result
.update(force_properties
)
740 assert new_result
.get('_type') != 'url_transparent'
742 return self
.process_ie_result(
743 new_result
, download
=download
, extra_info
=extra_info
)
744 elif result_type
== 'playlist' or result_type
== 'multi_video':
745 # We process each entry in the playlist
746 playlist
= ie_result
.get('title', None) or ie_result
.get('id', None)
747 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
749 playlist_results
= []
751 playliststart
= self
.params
.get('playliststart', 1) - 1
752 playlistend
= self
.params
.get('playlistend', None)
753 # For backwards compatibility, interpret -1 as whole list
754 if playlistend
== -1:
757 playlistitems_str
= self
.params
.get('playlist_items', None)
759 if playlistitems_str
is not None:
760 def iter_playlistitems(format
):
761 for string_segment
in format
.split(','):
762 if '-' in string_segment
:
763 start
, end
= string_segment
.split('-')
764 for item
in range(int(start
), int(end
) + 1):
767 yield int(string_segment
)
768 playlistitems
= iter_playlistitems(playlistitems_str
)
770 ie_entries
= ie_result
['entries']
771 if isinstance(ie_entries
, list):
772 n_all_entries
= len(ie_entries
)
775 ie_entries
[i
- 1] for i
in playlistitems
776 if -n_all_entries
<= i
- 1 < n_all_entries
]
778 entries
= ie_entries
[playliststart
:playlistend
]
779 n_entries
= len(entries
)
781 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
782 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
783 elif isinstance(ie_entries
, PagedList
):
786 for item
in playlistitems
:
787 entries
.extend(ie_entries
.getslice(
791 entries
= ie_entries
.getslice(
792 playliststart
, playlistend
)
793 n_entries
= len(entries
)
795 "[%s] playlist %s: Downloading %d videos" %
796 (ie_result
['extractor'], playlist
, n_entries
))
799 entry_list
= list(ie_entries
)
800 entries
= [entry_list
[i
- 1] for i
in playlistitems
]
802 entries
= list(itertools
.islice(
803 ie_entries
, playliststart
, playlistend
))
804 n_entries
= len(entries
)
806 "[%s] playlist %s: Downloading %d videos" %
807 (ie_result
['extractor'], playlist
, n_entries
))
809 if self
.params
.get('playlistreverse', False):
810 entries
= entries
[::-1]
812 for i
, entry
in enumerate(entries
, 1):
813 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
815 'n_entries': n_entries
,
816 'playlist': playlist
,
817 'playlist_id': ie_result
.get('id'),
818 'playlist_title': ie_result
.get('title'),
819 'playlist_index': i
+ playliststart
,
820 'extractor': ie_result
['extractor'],
821 'webpage_url': ie_result
['webpage_url'],
822 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
823 'extractor_key': ie_result
['extractor_key'],
826 reason
= self
._match
_entry
(entry
, incomplete
=True)
827 if reason
is not None:
828 self
.to_screen('[download] ' + reason
)
831 entry_result
= self
.process_ie_result(entry
,
834 playlist_results
.append(entry_result
)
835 ie_result
['entries'] = playlist_results
837 elif result_type
== 'compat_list':
839 'Extractor %s returned a compat_list result. '
840 'It needs to be updated.' % ie_result
.get('extractor'))
846 'extractor': ie_result
['extractor'],
847 'webpage_url': ie_result
['webpage_url'],
848 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
849 'extractor_key': ie_result
['extractor_key'],
853 ie_result
['entries'] = [
854 self
.process_ie_result(_fixup(r
), download
, extra_info
)
855 for r
in ie_result
['entries']
859 raise Exception('Invalid result type: %s' % result_type
)
861 def _build_format_filter(self
, filter_spec
):
862 " Returns a function to filter the formats according to the filter_spec "
872 operator_rex
= re
.compile(r
'''(?x)\s*
873 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
874 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
875 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
877 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
878 m
= operator_rex
.search(filter_spec
)
881 comparison_value
= int(m
.group('value'))
883 comparison_value
= parse_filesize(m
.group('value'))
884 if comparison_value
is None:
885 comparison_value
= parse_filesize(m
.group('value') + 'B')
886 if comparison_value
is None:
888 'Invalid value %r in format specification %r' % (
889 m
.group('value'), filter_spec
))
890 op
= OPERATORS
[m
.group('op')]
897 str_operator_rex
= re
.compile(r
'''(?x)
898 \s*(?P<key>ext|acodec|vcodec|container|protocol)
899 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
900 \s*(?P<value>[a-zA-Z0-9_-]+)
902 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
903 m
= str_operator_rex
.search(filter_spec
)
905 comparison_value
= m
.group('value')
906 op
= STR_OPERATORS
[m
.group('op')]
909 raise ValueError('Invalid filter specification %r' % filter_spec
)
912 actual_value
= f
.get(m
.group('key'))
913 if actual_value
is None:
914 return m
.group('none_inclusive')
915 return op(actual_value
, comparison_value
)
918 def build_format_selector(self
, format_spec
):
919 def syntax_error(note
, start
):
921 'Invalid format specification: '
922 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
923 return SyntaxError(message
)
925 PICKFIRST
= 'PICKFIRST'
929 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
931 def _parse_filter(tokens
):
933 for type, string
, start
, _
, _
in tokens
:
934 if type == tokenize
.OP
and string
== ']':
935 return ''.join(filter_parts
)
937 filter_parts
.append(string
)
939 def _remove_unused_ops(tokens
):
940 # Remove operators that we don't use and join them with the sourrounding strings
941 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
942 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
943 last_string
, last_start
, last_end
, last_line
= None, None, None, None
944 for type, string
, start
, end
, line
in tokens
:
945 if type == tokenize
.OP
and string
== '[':
947 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
949 yield type, string
, start
, end
, line
950 # everything inside brackets will be handled by _parse_filter
951 for type, string
, start
, end
, line
in tokens
:
952 yield type, string
, start
, end
, line
953 if type == tokenize
.OP
and string
== ']':
955 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
957 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
959 yield type, string
, start
, end
, line
960 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
966 last_string
+= string
968 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
970 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
972 current_selector
= None
973 for type, string
, start
, _
, _
in tokens
:
974 # ENCODING is only defined in python 3.x
975 if type == getattr(tokenize
, 'ENCODING', None):
977 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
978 current_selector
= FormatSelector(SINGLE
, string
, [])
979 elif type == tokenize
.OP
:
982 # ')' will be handled by the parentheses group
983 tokens
.restore_last_token()
985 elif inside_merge
and string
in ['/', ',']:
986 tokens
.restore_last_token()
988 elif inside_choice
and string
== ',':
989 tokens
.restore_last_token()
992 if not current_selector
:
993 raise syntax_error('"," must follow a format selector', start
)
994 selectors
.append(current_selector
)
995 current_selector
= None
997 if not current_selector
:
998 raise syntax_error('"/" must follow a format selector', start
)
999 first_choice
= current_selector
1000 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1001 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1003 if not current_selector
:
1004 current_selector
= FormatSelector(SINGLE
, 'best', [])
1005 format_filter
= _parse_filter(tokens
)
1006 current_selector
.filters
.append(format_filter
)
1008 if current_selector
:
1009 raise syntax_error('Unexpected "("', start
)
1010 group
= _parse_format_selection(tokens
, inside_group
=True)
1011 current_selector
= FormatSelector(GROUP
, group
, [])
1013 video_selector
= current_selector
1014 audio_selector
= _parse_format_selection(tokens
, inside_merge
=True)
1015 if not video_selector
or not audio_selector
:
1016 raise syntax_error('"+" must be between two format selectors', start
)
1017 current_selector
= FormatSelector(MERGE
, (video_selector
, audio_selector
), [])
1019 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1020 elif type == tokenize
.ENDMARKER
:
1022 if current_selector
:
1023 selectors
.append(current_selector
)
1026 def _build_selector_function(selector
):
1027 if isinstance(selector
, list):
1028 fs
= [_build_selector_function(s
) for s
in selector
]
1030 def selector_function(formats
):
1032 for format
in f(formats
):
1034 return selector_function
1035 elif selector
.type == GROUP
:
1036 selector_function
= _build_selector_function(selector
.selector
)
1037 elif selector
.type == PICKFIRST
:
1038 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1040 def selector_function(formats
):
1042 picked_formats
= list(f(formats
))
1044 return picked_formats
1046 elif selector
.type == SINGLE
:
1047 format_spec
= selector
.selector
1049 def selector_function(formats
):
1050 formats
= list(formats
)
1053 if format_spec
== 'all':
1056 elif format_spec
in ['best', 'worst', None]:
1057 format_idx
= 0 if format_spec
== 'worst' else -1
1058 audiovideo_formats
= [
1060 if f
.get('vcodec') != 'none' and f
.get('acodec') != 'none']
1061 if audiovideo_formats
:
1062 yield audiovideo_formats
[format_idx
]
1063 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1064 elif (all(f
.get('acodec') != 'none' for f
in formats
) or
1065 all(f
.get('vcodec') != 'none' for f
in formats
)):
1066 yield formats
[format_idx
]
1067 elif format_spec
== 'bestaudio':
1070 if f
.get('vcodec') == 'none']
1072 yield audio_formats
[-1]
1073 elif format_spec
== 'worstaudio':
1076 if f
.get('vcodec') == 'none']
1078 yield audio_formats
[0]
1079 elif format_spec
== 'bestvideo':
1082 if f
.get('acodec') == 'none']
1084 yield video_formats
[-1]
1085 elif format_spec
== 'worstvideo':
1088 if f
.get('acodec') == 'none']
1090 yield video_formats
[0]
1092 extensions
= ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1093 if format_spec
in extensions
:
1094 filter_f
= lambda f
: f
['ext'] == format_spec
1096 filter_f
= lambda f
: f
['format_id'] == format_spec
1097 matches
= list(filter(filter_f
, formats
))
1100 elif selector
.type == MERGE
:
1101 def _merge(formats_info
):
1102 format_1
, format_2
= [f
['format_id'] for f
in formats_info
]
1103 # The first format must contain the video and the
1105 if formats_info
[0].get('vcodec') == 'none':
1106 self
.report_error('The first format must '
1107 'contain the video, try using '
1108 '"-f %s+%s"' % (format_2
, format_1
))
1111 formats_info
[0]['ext']
1112 if self
.params
.get('merge_output_format') is None
1113 else self
.params
['merge_output_format'])
1115 'requested_formats': formats_info
,
1116 'format': '%s+%s' % (formats_info
[0].get('format'),
1117 formats_info
[1].get('format')),
1118 'format_id': '%s+%s' % (formats_info
[0].get('format_id'),
1119 formats_info
[1].get('format_id')),
1120 'width': formats_info
[0].get('width'),
1121 'height': formats_info
[0].get('height'),
1122 'resolution': formats_info
[0].get('resolution'),
1123 'fps': formats_info
[0].get('fps'),
1124 'vcodec': formats_info
[0].get('vcodec'),
1125 'vbr': formats_info
[0].get('vbr'),
1126 'stretched_ratio': formats_info
[0].get('stretched_ratio'),
1127 'acodec': formats_info
[1].get('acodec'),
1128 'abr': formats_info
[1].get('abr'),
1131 video_selector
, audio_selector
= map(_build_selector_function
, selector
.selector
)
1133 def selector_function(formats
):
1134 formats
= list(formats
)
1135 for pair
in itertools
.product(video_selector(formats
), audio_selector(formats
)):
1138 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1140 def final_selector(formats
):
1141 for _filter
in filters
:
1142 formats
= list(filter(_filter
, formats
))
1143 return selector_function(formats
)
1144 return final_selector
1146 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1148 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1149 except tokenize
.TokenError
:
1150 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1152 class TokenIterator(object):
1153 def __init__(self
, tokens
):
1154 self
.tokens
= tokens
1161 if self
.counter
>= len(self
.tokens
):
1162 raise StopIteration()
1163 value
= self
.tokens
[self
.counter
]
1169 def restore_last_token(self
):
1172 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1173 return _build_selector_function(parsed_selector
)
1175 def _calc_headers(self
, info_dict
):
1176 res
= std_headers
.copy()
1178 add_headers
= info_dict
.get('http_headers')
1180 res
.update(add_headers
)
1182 cookies
= self
._calc
_cookies
(info_dict
)
1184 res
['Cookie'] = cookies
1188 def _calc_cookies(self
, info_dict
):
1189 pr
= compat_urllib_request
.Request(info_dict
['url'])
1190 self
.cookiejar
.add_cookie_header(pr
)
1191 return pr
.get_header('Cookie')
1193 def process_video_result(self
, info_dict
, download
=True):
1194 assert info_dict
.get('_type', 'video') == 'video'
1196 if 'id' not in info_dict
:
1197 raise ExtractorError('Missing "id" field in extractor result')
1198 if 'title' not in info_dict
:
1199 raise ExtractorError('Missing "title" field in extractor result')
1201 if 'playlist' not in info_dict
:
1202 # It isn't part of a playlist
1203 info_dict
['playlist'] = None
1204 info_dict
['playlist_index'] = None
1206 thumbnails
= info_dict
.get('thumbnails')
1207 if thumbnails
is None:
1208 thumbnail
= info_dict
.get('thumbnail')
1210 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail
}]
1212 thumbnails
.sort(key
=lambda t
: (
1213 t
.get('preference'), t
.get('width'), t
.get('height'),
1214 t
.get('id'), t
.get('url')))
1215 for i
, t
in enumerate(thumbnails
):
1216 if t
.get('width') and t
.get('height'):
1217 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
1218 if t
.get('id') is None:
1221 if thumbnails
and 'thumbnail' not in info_dict
:
1222 info_dict
['thumbnail'] = thumbnails
[-1]['url']
1224 if 'display_id' not in info_dict
and 'id' in info_dict
:
1225 info_dict
['display_id'] = info_dict
['id']
1227 if info_dict
.get('upload_date') is None and info_dict
.get('timestamp') is not None:
1228 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1229 # see http://bugs.python.org/issue1646728)
1231 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
['timestamp'])
1232 info_dict
['upload_date'] = upload_date
.strftime('%Y%m%d')
1233 except (ValueError, OverflowError, OSError):
1236 subtitles
= info_dict
.get('subtitles')
1238 for _
, subtitle
in subtitles
.items():
1239 for subtitle_format
in subtitle
:
1240 if 'ext' not in subtitle_format
:
1241 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
1243 if self
.params
.get('listsubtitles', False):
1244 if 'automatic_captions' in info_dict
:
1245 self
.list_subtitles(info_dict
['id'], info_dict
.get('automatic_captions'), 'automatic captions')
1246 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
1248 info_dict
['requested_subtitles'] = self
.process_subtitles(
1249 info_dict
['id'], subtitles
,
1250 info_dict
.get('automatic_captions'))
1252 # We now pick which formats have to be downloaded
1253 if info_dict
.get('formats') is None:
1254 # There's only one format available
1255 formats
= [info_dict
]
1257 formats
= info_dict
['formats']
1260 raise ExtractorError('No video formats found!')
1264 # We check that all the formats have the format and format_id fields
1265 for i
, format
in enumerate(formats
):
1266 if 'url' not in format
:
1267 raise ExtractorError('Missing "url" key in result (index %d)' % i
)
1269 if format
.get('format_id') is None:
1270 format
['format_id'] = compat_str(i
)
1271 format_id
= format
['format_id']
1272 if format_id
not in formats_dict
:
1273 formats_dict
[format_id
] = []
1274 formats_dict
[format_id
].append(format
)
1276 # Make sure all formats have unique format_id
1277 for format_id
, ambiguous_formats
in formats_dict
.items():
1278 if len(ambiguous_formats
) > 1:
1279 for i
, format
in enumerate(ambiguous_formats
):
1280 format
['format_id'] = '%s-%d' % (format_id
, i
)
1282 for i
, format
in enumerate(formats
):
1283 if format
.get('format') is None:
1284 format
['format'] = '{id} - {res}{note}'.format(
1285 id=format
['format_id'],
1286 res
=self
.format_resolution(format
),
1287 note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
1289 # Automatically determine file extension if missing
1290 if 'ext' not in format
:
1291 format
['ext'] = determine_ext(format
['url']).lower()
1292 # Add HTTP headers, so that external programs can use them from the
1294 full_format_info
= info_dict
.copy()
1295 full_format_info
.update(format
)
1296 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
1298 # TODO Central sorting goes here
1300 if formats
[0] is not info_dict
:
1301 # only set the 'formats' fields if the original info_dict list them
1302 # otherwise we end up with a circular reference, the first (and unique)
1303 # element in the 'formats' field in info_dict is info_dict itself,
1304 # wich can't be exported to json
1305 info_dict
['formats'] = formats
1306 if self
.params
.get('listformats'):
1307 self
.list_formats(info_dict
)
1309 if self
.params
.get('list_thumbnails'):
1310 self
.list_thumbnails(info_dict
)
1313 req_format
= self
.params
.get('format')
1314 if req_format
is None:
1315 req_format_list
= []
1316 if (self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) != '-' and
1317 info_dict
['extractor'] in ['youtube', 'ted'] and
1318 not info_dict
.get('is_live')):
1319 merger
= FFmpegMergerPP(self
)
1320 if merger
.available
and merger
.can_merge():
1321 req_format_list
.append('bestvideo+bestaudio')
1322 req_format_list
.append('best')
1323 req_format
= '/'.join(req_format_list
)
1324 format_selector
= self
.build_format_selector(req_format
)
1325 formats_to_download
= list(format_selector(formats
))
1326 if not formats_to_download
:
1327 raise ExtractorError('requested format not available',
1331 if len(formats_to_download
) > 1:
1332 self
.to_screen('[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
)))
1333 for format
in formats_to_download
:
1334 new_info
= dict(info_dict
)
1335 new_info
.update(format
)
1336 self
.process_info(new_info
)
1337 # We update the info dict with the best quality format (backwards compatibility)
1338 info_dict
.update(formats_to_download
[-1])
1341 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
1342 """Select the requested subtitles and their format"""
1344 if normal_subtitles
and self
.params
.get('writesubtitles'):
1345 available_subs
.update(normal_subtitles
)
1346 if automatic_captions
and self
.params
.get('writeautomaticsub'):
1347 for lang
, cap_info
in automatic_captions
.items():
1348 if lang
not in available_subs
:
1349 available_subs
[lang
] = cap_info
1351 if (not self
.params
.get('writesubtitles') and not
1352 self
.params
.get('writeautomaticsub') or not
1356 if self
.params
.get('allsubtitles', False):
1357 requested_langs
= available_subs
.keys()
1359 if self
.params
.get('subtitleslangs', False):
1360 requested_langs
= self
.params
.get('subtitleslangs')
1361 elif 'en' in available_subs
:
1362 requested_langs
= ['en']
1364 requested_langs
= [list(available_subs
.keys())[0]]
1366 formats_query
= self
.params
.get('subtitlesformat', 'best')
1367 formats_preference
= formats_query
.split('/') if formats_query
else []
1369 for lang
in requested_langs
:
1370 formats
= available_subs
.get(lang
)
1372 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
1374 for ext
in formats_preference
:
1378 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
1384 self
.report_warning(
1385 'No subtitle format found matching "%s" for language %s, '
1386 'using %s' % (formats_query
, lang
, f
['ext']))
1390 def process_info(self
, info_dict
):
1391 """Process a single resolved IE result."""
1393 assert info_dict
.get('_type', 'video') == 'video'
1395 max_downloads
= self
.params
.get('max_downloads')
1396 if max_downloads
is not None:
1397 if self
._num
_downloads
>= int(max_downloads
):
1398 raise MaxDownloadsReached()
1400 info_dict
['fulltitle'] = info_dict
['title']
1401 if len(info_dict
['title']) > 200:
1402 info_dict
['title'] = info_dict
['title'][:197] + '...'
1404 if 'format' not in info_dict
:
1405 info_dict
['format'] = info_dict
['ext']
1407 reason
= self
._match
_entry
(info_dict
, incomplete
=False)
1408 if reason
is not None:
1409 self
.to_screen('[download] ' + reason
)
1412 self
._num
_downloads
+= 1
1414 info_dict
['_filename'] = filename
= self
.prepare_filename(info_dict
)
1417 if self
.params
.get('forcetitle', False):
1418 self
.to_stdout(info_dict
['fulltitle'])
1419 if self
.params
.get('forceid', False):
1420 self
.to_stdout(info_dict
['id'])
1421 if self
.params
.get('forceurl', False):
1422 if info_dict
.get('requested_formats') is not None:
1423 for f
in info_dict
['requested_formats']:
1424 self
.to_stdout(f
['url'] + f
.get('play_path', ''))
1426 # For RTMP URLs, also include the playpath
1427 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', ''))
1428 if self
.params
.get('forcethumbnail', False) and info_dict
.get('thumbnail') is not None:
1429 self
.to_stdout(info_dict
['thumbnail'])
1430 if self
.params
.get('forcedescription', False) and info_dict
.get('description') is not None:
1431 self
.to_stdout(info_dict
['description'])
1432 if self
.params
.get('forcefilename', False) and filename
is not None:
1433 self
.to_stdout(filename
)
1434 if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None:
1435 self
.to_stdout(formatSeconds(info_dict
['duration']))
1436 if self
.params
.get('forceformat', False):
1437 self
.to_stdout(info_dict
['format'])
1438 if self
.params
.get('forcejson', False):
1439 self
.to_stdout(json
.dumps(info_dict
))
1441 # Do nothing else if in simulate mode
1442 if self
.params
.get('simulate', False):
1445 if filename
is None:
1449 dn
= os
.path
.dirname(sanitize_path(encodeFilename(filename
)))
1450 if dn
and not os
.path
.exists(dn
):
1452 except (OSError, IOError) as err
:
1453 self
.report_error('unable to create directory ' + compat_str(err
))
1456 if self
.params
.get('writedescription', False):
1457 descfn
= replace_extension(filename
, 'description', info_dict
.get('ext'))
1458 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(descfn
)):
1459 self
.to_screen('[info] Video description is already present')
1460 elif info_dict
.get('description') is None:
1461 self
.report_warning('There\'s no description to write.')
1464 self
.to_screen('[info] Writing video description to: ' + descfn
)
1465 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1466 descfile
.write(info_dict
['description'])
1467 except (OSError, IOError):
1468 self
.report_error('Cannot write description file ' + descfn
)
1471 if self
.params
.get('writeannotations', False):
1472 annofn
= replace_extension(filename
, 'annotations.xml', info_dict
.get('ext'))
1473 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(annofn
)):
1474 self
.to_screen('[info] Video annotations are already present')
1477 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
1478 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
1479 annofile
.write(info_dict
['annotations'])
1480 except (KeyError, TypeError):
1481 self
.report_warning('There are no annotations to write.')
1482 except (OSError, IOError):
1483 self
.report_error('Cannot write annotations file: ' + annofn
)
1486 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
1487 self
.params
.get('writeautomaticsub')])
1489 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
1490 # subtitles download errors are already managed as troubles in relevant IE
1491 # that way it will silently go on when used with unsupporting IE
1492 subtitles
= info_dict
['requested_subtitles']
1493 ie
= self
.get_info_extractor(info_dict
['extractor_key'])
1494 for sub_lang
, sub_info
in subtitles
.items():
1495 sub_format
= sub_info
['ext']
1496 if sub_info
.get('data') is not None:
1497 sub_data
= sub_info
['data']
1500 sub_data
= ie
._download
_webpage
(
1501 sub_info
['url'], info_dict
['id'], note
=False)
1502 except ExtractorError
as err
:
1503 self
.report_warning('Unable to download subtitle for "%s": %s' %
1504 (sub_lang
, compat_str(err
.cause
)))
1507 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
)
1508 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(sub_filename
)):
1509 self
.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang
, sub_format
))
1511 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
1512 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8') as subfile
:
1513 subfile
.write(sub_data
)
1514 except (OSError, IOError):
1515 self
.report_error('Cannot write subtitles file ' + sub_filename
)
1518 if self
.params
.get('writeinfojson', False):
1519 infofn
= replace_extension(filename
, 'info.json', info_dict
.get('ext'))
1520 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(infofn
)):
1521 self
.to_screen('[info] Video description metadata is already present')
1523 self
.to_screen('[info] Writing video description metadata as JSON to: ' + infofn
)
1525 write_json_file(self
.filter_requested_info(info_dict
), infofn
)
1526 except (OSError, IOError):
1527 self
.report_error('Cannot write metadata to JSON file ' + infofn
)
1530 self
._write
_thumbnails
(info_dict
, filename
)
1532 if not self
.params
.get('skip_download', False):
1535 fd
= get_suitable_downloader(info
, self
.params
)(self
, self
.params
)
1536 for ph
in self
._progress
_hooks
:
1537 fd
.add_progress_hook(ph
)
1538 if self
.params
.get('verbose'):
1539 self
.to_stdout('[debug] Invoking downloader on %r' % info
.get('url'))
1540 return fd
.download(name
, info
)
1542 if info_dict
.get('requested_formats') is not None:
1545 merger
= FFmpegMergerPP(self
)
1546 if not merger
.available
:
1548 self
.report_warning('You have requested multiple '
1549 'formats but ffmpeg or avconv are not installed.'
1550 ' The formats won\'t be merged.')
1552 postprocessors
= [merger
]
1554 def compatible_formats(formats
):
1555 video
, audio
= formats
1557 video_ext
, audio_ext
= audio
.get('ext'), video
.get('ext')
1558 if video_ext
and audio_ext
:
1560 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1563 for exts
in COMPATIBLE_EXTS
:
1564 if video_ext
in exts
and audio_ext
in exts
:
1566 # TODO: Check acodec/vcodec
1569 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
1571 os
.path
.splitext(filename
)[0]
1572 if filename_real_ext
== info_dict
['ext']
1574 requested_formats
= info_dict
['requested_formats']
1575 if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
):
1576 info_dict
['ext'] = 'mkv'
1577 self
.report_warning(
1578 'Requested formats are incompatible for merge and will be merged into mkv.')
1579 # Ensure filename always has a correct extension for successful merge
1580 filename
= '%s.%s' % (filename_wo_ext
, info_dict
['ext'])
1581 if os
.path
.exists(encodeFilename(filename
)):
1583 '[download] %s has already been downloaded and '
1584 'merged' % filename
)
1586 for f
in requested_formats
:
1587 new_info
= dict(info_dict
)
1589 fname
= self
.prepare_filename(new_info
)
1590 fname
= prepend_extension(fname
, 'f%s' % f
['format_id'], new_info
['ext'])
1591 downloaded
.append(fname
)
1592 partial_success
= dl(fname
, new_info
)
1593 success
= success
and partial_success
1594 info_dict
['__postprocessors'] = postprocessors
1595 info_dict
['__files_to_merge'] = downloaded
1597 # Just a single file
1598 success
= dl(filename
, info_dict
)
1599 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
1600 self
.report_error('unable to download video data: %s' % str(err
))
1602 except (OSError, IOError) as err
:
1603 raise UnavailableVideoError(err
)
1604 except (ContentTooShortError
, ) as err
:
1605 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
1610 fixup_policy
= self
.params
.get('fixup')
1611 if fixup_policy
is None:
1612 fixup_policy
= 'detect_or_warn'
1614 stretched_ratio
= info_dict
.get('stretched_ratio')
1615 if stretched_ratio
is not None and stretched_ratio
!= 1:
1616 if fixup_policy
== 'warn':
1617 self
.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1618 info_dict
['id'], stretched_ratio
))
1619 elif fixup_policy
== 'detect_or_warn':
1620 stretched_pp
= FFmpegFixupStretchedPP(self
)
1621 if stretched_pp
.available
:
1622 info_dict
.setdefault('__postprocessors', [])
1623 info_dict
['__postprocessors'].append(stretched_pp
)
1625 self
.report_warning(
1626 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1627 info_dict
['id'], stretched_ratio
))
1629 assert fixup_policy
in ('ignore', 'never')
1631 if info_dict
.get('requested_formats') is None and info_dict
.get('container') == 'm4a_dash':
1632 if fixup_policy
== 'warn':
1633 self
.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1635 elif fixup_policy
== 'detect_or_warn':
1636 fixup_pp
= FFmpegFixupM4aPP(self
)
1637 if fixup_pp
.available
:
1638 info_dict
.setdefault('__postprocessors', [])
1639 info_dict
['__postprocessors'].append(fixup_pp
)
1641 self
.report_warning(
1642 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1645 assert fixup_policy
in ('ignore', 'never')
1648 self
.post_process(filename
, info_dict
)
1649 except (PostProcessingError
) as err
:
1650 self
.report_error('postprocessing: %s' % str(err
))
1652 self
.record_download_archive(info_dict
)
1654 def download(self
, url_list
):
1655 """Download a given list of URLs."""
1656 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
1657 if (len(url_list
) > 1 and
1658 '%' not in outtmpl
and
1659 self
.params
.get('max_downloads') != 1):
1660 raise SameFileError(outtmpl
)
1662 for url
in url_list
:
1664 # It also downloads the videos
1665 res
= self
.extract_info(
1666 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
1667 except UnavailableVideoError
:
1668 self
.report_error('unable to download video')
1669 except MaxDownloadsReached
:
1670 self
.to_screen('[info] Maximum number of downloaded files reached.')
1673 if self
.params
.get('dump_single_json', False):
1674 self
.to_stdout(json
.dumps(res
))
1676 return self
._download
_retcode
1678 def download_with_info_file(self
, info_filename
):
1679 with contextlib
.closing(fileinput
.FileInput(
1680 [info_filename
], mode
='r',
1681 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
1682 # FileInput doesn't have a read method, we can't call json.load
1683 info
= self
.filter_requested_info(json
.loads('\n'.join(f
)))
1685 self
.process_ie_result(info
, download
=True)
1686 except DownloadError
:
1687 webpage_url
= info
.get('webpage_url')
1688 if webpage_url
is not None:
1689 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
1690 return self
.download([webpage_url
])
1693 return self
._download
_retcode
1696 def filter_requested_info(info_dict
):
1698 (k
, v
) for k
, v
in info_dict
.items()
1699 if k
not in ['requested_formats', 'requested_subtitles'])
1701 def post_process(self
, filename
, ie_info
):
1702 """Run all the postprocessors on the given file."""
1703 info
= dict(ie_info
)
1704 info
['filepath'] = filename
1706 if ie_info
.get('__postprocessors') is not None:
1707 pps_chain
.extend(ie_info
['__postprocessors'])
1708 pps_chain
.extend(self
._pps
)
1709 for pp
in pps_chain
:
1710 files_to_delete
= []
1712 files_to_delete
, info
= pp
.run(info
)
1713 except PostProcessingError
as e
:
1714 self
.report_error(e
.msg
)
1715 if files_to_delete
and not self
.params
.get('keepvideo', False):
1716 for old_filename
in files_to_delete
:
1717 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
1719 os
.remove(encodeFilename(old_filename
))
1720 except (IOError, OSError):
1721 self
.report_warning('Unable to remove downloaded original file')
1723 def _make_archive_id(self
, info_dict
):
1724 # Future-proof against any change in case
1725 # and backwards compatibility with prior versions
1726 extractor
= info_dict
.get('extractor_key')
1727 if extractor
is None:
1728 if 'id' in info_dict
:
1729 extractor
= info_dict
.get('ie_key') # key in a playlist
1730 if extractor
is None:
1731 return None # Incomplete video information
1732 return extractor
.lower() + ' ' + info_dict
['id']
1734 def in_download_archive(self
, info_dict
):
1735 fn
= self
.params
.get('download_archive')
1739 vid_id
= self
._make
_archive
_id
(info_dict
)
1741 return False # Incomplete video information
1744 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
1745 for line
in archive_file
:
1746 if line
.strip() == vid_id
:
1748 except IOError as ioe
:
1749 if ioe
.errno
!= errno
.ENOENT
:
1753 def record_download_archive(self
, info_dict
):
1754 fn
= self
.params
.get('download_archive')
1757 vid_id
= self
._make
_archive
_id
(info_dict
)
1759 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
1760 archive_file
.write(vid_id
+ '\n')
1763 def format_resolution(format
, default
='unknown'):
1764 if format
.get('vcodec') == 'none':
1766 if format
.get('resolution') is not None:
1767 return format
['resolution']
1768 if format
.get('height') is not None:
1769 if format
.get('width') is not None:
1770 res
= '%sx%s' % (format
['width'], format
['height'])
1772 res
= '%sp' % format
['height']
1773 elif format
.get('width') is not None:
1774 res
= '?x%d' % format
['width']
1779 def _format_note(self
, fdict
):
1781 if fdict
.get('ext') in ['f4f', 'f4m']:
1782 res
+= '(unsupported) '
1783 if fdict
.get('format_note') is not None:
1784 res
+= fdict
['format_note'] + ' '
1785 if fdict
.get('tbr') is not None:
1786 res
+= '%4dk ' % fdict
['tbr']
1787 if fdict
.get('container') is not None:
1790 res
+= '%s container' % fdict
['container']
1791 if (fdict
.get('vcodec') is not None and
1792 fdict
.get('vcodec') != 'none'):
1795 res
+= fdict
['vcodec']
1796 if fdict
.get('vbr') is not None:
1798 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
1800 if fdict
.get('vbr') is not None:
1801 res
+= '%4dk' % fdict
['vbr']
1802 if fdict
.get('fps') is not None:
1803 res
+= ', %sfps' % fdict
['fps']
1804 if fdict
.get('acodec') is not None:
1807 if fdict
['acodec'] == 'none':
1810 res
+= '%-5s' % fdict
['acodec']
1811 elif fdict
.get('abr') is not None:
1815 if fdict
.get('abr') is not None:
1816 res
+= '@%3dk' % fdict
['abr']
1817 if fdict
.get('asr') is not None:
1818 res
+= ' (%5dHz)' % fdict
['asr']
1819 if fdict
.get('filesize') is not None:
1822 res
+= format_bytes(fdict
['filesize'])
1823 elif fdict
.get('filesize_approx') is not None:
1826 res
+= '~' + format_bytes(fdict
['filesize_approx'])
1829 def list_formats(self
, info_dict
):
1830 formats
= info_dict
.get('formats', [info_dict
])
1832 [f
['format_id'], f
['ext'], self
.format_resolution(f
), self
._format
_note
(f
)]
1834 if f
.get('preference') is None or f
['preference'] >= -1000]
1835 if len(formats
) > 1:
1836 table
[-1][-1] += (' ' if table
[-1][-1] else '') + '(best)'
1838 header_line
= ['format code', 'extension', 'resolution', 'note']
1840 '[info] Available formats for %s:\n%s' %
1841 (info_dict
['id'], render_table(header_line
, table
)))
1843 def list_thumbnails(self
, info_dict
):
1844 thumbnails
= info_dict
.get('thumbnails')
1846 tn_url
= info_dict
.get('thumbnail')
1848 thumbnails
= [{'id': '0', 'url': tn_url
}]
1851 '[info] No thumbnails present for %s' % info_dict
['id'])
1855 '[info] Thumbnails for %s:' % info_dict
['id'])
1856 self
.to_screen(render_table(
1857 ['ID', 'width', 'height', 'URL'],
1858 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
1860 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
1862 self
.to_screen('%s has no %s' % (video_id
, name
))
1865 'Available %s for %s:' % (name
, video_id
))
1866 self
.to_screen(render_table(
1867 ['Language', 'formats'],
1868 [[lang
, ', '.join(f
['ext'] for f
in reversed(formats
))]
1869 for lang
, formats
in subtitles
.items()]))
1871 def urlopen(self
, req
):
1872 """ Start an HTTP download """
1873 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
1875 def print_debug_header(self
):
1876 if not self
.params
.get('verbose'):
1879 if type('') is not compat_str
:
1880 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1881 self
.report_warning(
1882 'Your Python is broken! Update to a newer and supported version')
1884 stdout_encoding
= getattr(
1885 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
1887 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1888 locale
.getpreferredencoding(),
1889 sys
.getfilesystemencoding(),
1891 self
.get_encoding()))
1892 write_string(encoding_str
, encoding
=None)
1894 self
._write
_string
('[debug] youtube-dl version ' + __version__
+ '\n')
1896 sp
= subprocess
.Popen(
1897 ['git', 'rev-parse', '--short', 'HEAD'],
1898 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
1899 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
1900 out
, err
= sp
.communicate()
1901 out
= out
.decode().strip()
1902 if re
.match('[0-9a-f]+', out
):
1903 self
._write
_string
('[debug] Git HEAD: ' + out
+ '\n')
1909 self
._write
_string
('[debug] Python version %s - %s\n' % (
1910 platform
.python_version(), platform_name()))
1912 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
1913 exe_versions
['rtmpdump'] = rtmpdump_version()
1914 exe_str
= ', '.join(
1916 for exe
, v
in sorted(exe_versions
.items())
1921 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
1924 for handler
in self
._opener
.handlers
:
1925 if hasattr(handler
, 'proxies'):
1926 proxy_map
.update(handler
.proxies
)
1927 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
1929 if self
.params
.get('call_home', False):
1930 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1931 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
1932 latest_version
= self
.urlopen(
1933 'https://yt-dl.org/latest/version').read().decode('utf-8')
1934 if version_tuple(latest_version
) > version_tuple(__version__
):
1935 self
.report_warning(
1936 'You are using an outdated version (newest version: %s)! '
1937 'See https://yt-dl.org/update if you need help updating.' %
1940 def _setup_opener(self
):
1941 timeout_val
= self
.params
.get('socket_timeout')
1942 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
1944 opts_cookiefile
= self
.params
.get('cookiefile')
1945 opts_proxy
= self
.params
.get('proxy')
1947 if opts_cookiefile
is None:
1948 self
.cookiejar
= compat_cookiejar
.CookieJar()
1950 self
.cookiejar
= compat_cookiejar
.MozillaCookieJar(
1952 if os
.access(opts_cookiefile
, os
.R_OK
):
1953 self
.cookiejar
.load()
1955 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
1956 if opts_proxy
is not None:
1957 if opts_proxy
== '':
1960 proxies
= {'http': opts_proxy
, 'https': opts_proxy
}
1962 proxies
= compat_urllib_request
.getproxies()
1963 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1964 if 'http' in proxies
and 'https' not in proxies
:
1965 proxies
['https'] = proxies
['http']
1966 proxy_handler
= PerRequestProxyHandler(proxies
)
1968 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
1969 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
1970 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
1971 data_handler
= compat_urllib_request_DataHandler()
1972 opener
= compat_urllib_request
.build_opener(
1973 proxy_handler
, https_handler
, cookie_processor
, ydlh
, data_handler
)
1975 # Delete the default user-agent header, which would otherwise apply in
1976 # cases where our custom HTTP handler doesn't come into play
1977 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1978 opener
.addheaders
= []
1979 self
._opener
= opener
1981 def encode(self
, s
):
1982 if isinstance(s
, bytes):
1983 return s
# Already encoded
1986 return s
.encode(self
.get_encoding())
1987 except UnicodeEncodeError as err
:
1988 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
1991 def get_encoding(self
):
1992 encoding
= self
.params
.get('encoding')
1993 if encoding
is None:
1994 encoding
= preferredencoding()
1997 def _write_thumbnails(self
, info_dict
, filename
):
1998 if self
.params
.get('writethumbnail', False):
1999 thumbnails
= info_dict
.get('thumbnails')
2001 thumbnails
= [thumbnails
[-1]]
2002 elif self
.params
.get('write_all_thumbnails', False):
2003 thumbnails
= info_dict
.get('thumbnails')
2008 # No thumbnails present, so return immediately
2011 for t
in thumbnails
:
2012 thumb_ext
= determine_ext(t
['url'], 'jpg')
2013 suffix
= '_%s' % t
['id'] if len(thumbnails
) > 1 else ''
2014 thumb_display_id
= '%s ' % t
['id'] if len(thumbnails
) > 1 else ''
2015 t
['filename'] = thumb_filename
= os
.path
.splitext(filename
)[0] + suffix
+ '.' + thumb_ext
2017 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(thumb_filename
)):
2018 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
2019 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2021 self
.to_screen('[%s] %s: Downloading thumbnail %s...' %
2022 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2024 uf
= self
.urlopen(t
['url'])
2025 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
2026 shutil
.copyfileobj(uf
, thumbf
)
2027 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2028 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
2029 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2030 self
.report_warning('Unable to download thumbnail "%s": %s' %
2031 (t
['url'], compat_str(err
)))