]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
, unicode_literals
33 compat_urllib_request
,
59 UnavailableVideoError
,
69 from . cache
import Cache
70 from . extractor
import get_info_extractor
, gen_extractors
71 from . downloader
import get_suitable_downloader
72 from . downloader
. rtmp
import rtmpdump_version
73 from . postprocessor
import (
74 FFmpegFixupStretchedPP
,
79 from . version
import __version__
82 class YoutubeDL ( object ):
85 YoutubeDL objects are the ones responsible of downloading the
86 actual video file and writing it to disk if the user has requested
87 it, among some other tasks. In most cases there should be one per
88 program. As, given a video URL, the downloader doesn't know how to
89 extract all the needed information, task that InfoExtractors do, it
90 has to pass the URL to one of them.
92 For this, YoutubeDL objects have a method that allows
93 InfoExtractors to be registered in a given order. When it is passed
94 a URL, the YoutubeDL object handles it to the first InfoExtractor it
95 finds that reports being able to handle it. The InfoExtractor extracts
96 all the information about the video or videos the URL refers to, and
97 YoutubeDL process the extracted information, possibly using a File
98 Downloader to download the video.
100 YoutubeDL objects accept a lot of parameters. In order not to saturate
101 the object constructor with arguments, it receives a dictionary of
102 options instead. These options are available through the params
103 attribute for the InfoExtractors to use. The YoutubeDL also
104 registers itself as the downloader in charge for the InfoExtractors
105 that are added to it, so this is a "mutual registration".
109 username: Username for authentication purposes.
110 password: Password for authentication purposes.
111 videopassword: Password for acces a video.
112 usenetrc: Use netrc for authentication instead.
113 verbose: Print additional info to stdout.
114 quiet: Do not print messages to stdout.
115 no_warnings: Do not print out anything for warnings.
116 forceurl: Force printing final URL.
117 forcetitle: Force printing title.
118 forceid: Force printing ID.
119 forcethumbnail: Force printing thumbnail URL.
120 forcedescription: Force printing description.
121 forcefilename: Force printing final filename.
122 forceduration: Force printing duration.
123 forcejson: Force printing info_dict as JSON.
124 dump_single_json: Force printing the info_dict of the whole playlist
125 (or video) as a single JSON line.
126 simulate: Do not download the video files.
127 format: Video format code. See options.py for more information.
128 format_limit: Highest quality format to try.
129 outtmpl: Template for output names.
130 restrictfilenames: Do not allow "&" and spaces in file names
131 ignoreerrors: Do not stop on download errors.
132 nooverwrites: Prevent overwriting files.
133 playliststart: Playlist item to start at.
134 playlistend: Playlist item to end at.
135 playlistreverse: Download playlist items in reverse order.
136 matchtitle: Download only matching titles.
137 rejecttitle: Reject downloads for matching titles.
138 logger: Log messages to a logging.Logger instance.
139 logtostderr: Log messages to stderr instead of stdout.
140 writedescription: Write the video description to a .description file
141 writeinfojson: Write the video description to a .info.json file
142 writeannotations: Write the video annotations to a .annotations.xml file
143 writethumbnail: Write the thumbnail image to a file
144 writesubtitles: Write the video subtitles to a file
145 writeautomaticsub: Write the automatic subtitles to a file
146 allsubtitles: Downloads all the subtitles of the video
147 (requires writesubtitles or writeautomaticsub)
148 listsubtitles: Lists all available subtitles for the video
149 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
150 subtitleslangs: List of languages of the subtitles to download
151 keepvideo: Keep the video file after post-processing
152 daterange: A DateRange object, download only if the upload_date is in the range.
153 skip_download: Skip the actual download of the video file
154 cachedir: Location of the cache files in the filesystem.
155 False to disable filesystem cache.
156 noplaylist: Download single video instead of a playlist if in doubt.
157 age_limit: An integer representing the user's age in years.
158 Unsuitable videos for the given age are skipped.
159 min_views: An integer representing the minimum view count the video
160 must have in order to not be skipped.
161 Videos without view count information are always
162 downloaded. None for no limit.
163 max_views: An integer representing the maximum view count.
164 Videos that are more popular than that are not
166 Videos without view count information are always
167 downloaded. None for no limit.
168 download_archive: File name of a file where all downloads are recorded.
169 Videos already present in the file are not downloaded
171 cookiefile: File name where cookies should be read from and dumped to.
172 nocheckcertificate:Do not verify SSL certificates
173 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
174 At the moment, this is only supported by YouTube.
175 proxy: URL of the proxy server to use
176 socket_timeout: Time to wait for unresponsive hosts, in seconds
177 bidi_workaround: Work around buggy terminals without bidirectional text
178 support, using fridibi
179 debug_printtraffic:Print out sent and received HTTP traffic
180 include_ads: Download ads as well
181 default_search: Prepend this string if an input url is not valid.
182 'auto' for elaborate guessing
183 encoding: Use this encoding instead of the system-specified.
184 extract_flat: Do not resolve URLs, return the immediate result.
185 Pass in 'in_playlist' to only show this behavior for
187 postprocessors: A list of dictionaries, each with an entry
188 * key: The name of the postprocessor. See
189 youtube_dl/postprocessor/__init__.py for a list.
190 as well as any further keyword arguments for the
192 progress_hooks: A list of functions that get called on download
193 progress, with a dictionary with the entries
194 * filename: The final filename
195 * status: One of "downloading" and "finished"
197 The dict may also have some of the following entries:
199 * downloaded_bytes: Bytes on disk
200 * total_bytes: Size of the whole file, None if unknown
201 * tmpfilename: The filename we're currently writing to
202 * eta: The estimated time in seconds, None if unknown
203 * speed: The download speed in bytes/second, None if
206 Progress hooks are guaranteed to be called at least once
207 (with status "finished") if the download is successful.
208 merge_output_format: Extension to use when merging formats.
209 fixup: Automatically correct known faults of the file.
211 - "never": do nothing
212 - "warn": only emit a warning
213 - "detect_or_warn": check whether we can do anything
214 about it, warn otherwise
215 source_address: (Experimental) Client-side IP address to bind to.
216 call_home: Boolean, true iff we are allowed to contact the
217 youtube-dl servers for debugging.
220 The following parameters are not used by YoutubeDL itself, they are used by
222 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
223 noresizebuffer, retries, continuedl, noprogress, consoletitle
225 The following options are used by the post processors:
226 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
227 otherwise prefer avconv.
228 exec_cmd: Arbitrary command to run after downloading
234 _download_retcode
= None
235 _num_downloads
= None
238 def __init__ ( self
, params
= None , auto_init
= True ):
239 """Create a FileDownloader object with the given options."""
243 self
._ ies
_ instances
= {}
245 self
._ progress
_ hooks
= []
246 self
._ download
_ retcode
= 0
247 self
._ num
_ downloads
= 0
248 self
._ screen
_ file
= [ sys
. stdout
, sys
. stderr
][ params
. get ( 'logtostderr' , False )]
249 self
._ err
_ file
= sys
. stderr
251 self
. cache
= Cache ( self
)
253 if params
. get ( 'bidi_workaround' , False ):
256 master
, slave
= pty
. openpty ()
257 width
= get_term_width ()
261 width_args
= [ '-w' , str ( width
)]
263 stdin
= subprocess
. PIPE
,
265 stderr
= self
._ err
_ file
)
267 self
._ output
_ process
= subprocess
. Popen (
268 [ 'bidiv' ] + width_args
, ** sp_kwargs
271 self
._ output
_ process
= subprocess
. Popen (
272 [ 'fribidi' , '-c' , 'UTF-8' ] + width_args
, ** sp_kwargs
)
273 self
._ output
_ channel
= os
. fdopen ( master
, 'rb' )
274 except OSError as ose
:
276 self
. report_warning ( 'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.' )
280 if ( sys
. version_info
>= ( 3 ,) and sys
. platform
!= 'win32' and
281 sys
. getfilesystemencoding () in [ 'ascii' , 'ANSI_X3.4-1968' ]
282 and not params
. get ( 'restrictfilenames' , False )):
283 # On Python 3, the Unicode filesystem API will throw errors (#1474)
285 'Assuming --restrict-filenames since file system encoding '
286 'cannot encode all characters. '
287 'Set the LC_ALL environment variable to fix this.' )
288 self
. params
[ 'restrictfilenames' ] = True
290 if ' %(stitle)s ' in self
. params
. get ( 'outtmpl' , '' ):
291 self
. report_warning ( ' %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.' )
296 self
. print_debug_header ()
297 self
. add_default_info_extractors ()
299 for pp_def_raw
in self
. params
. get ( 'postprocessors' , []):
300 pp_class
= get_postprocessor ( pp_def_raw
[ 'key' ])
301 pp_def
= dict ( pp_def_raw
)
303 pp
= pp_class ( self
, ** compat_kwargs ( pp_def
))
304 self
. add_post_processor ( pp
)
306 for ph
in self
. params
. get ( 'progress_hooks' , []):
307 self
. add_progress_hook ( ph
)
309 def warn_if_short_id ( self
, argv
):
310 # short YouTube ID starting with dash?
312 i
for i
, a
in enumerate ( argv
)
313 if re
. match ( r
'^-[0-9A-Za-z_-] {10} $' , a
)]
317 [ a
for i
, a
in enumerate ( argv
) if i
not in idxs
] +
318 [ '--' ] + [ argv
[ i
] for i
in idxs
]
321 'Long argument string detected. '
322 'Use -- to separate parameters and URLs, like this: \n %s \n ' %
323 args_to_str ( correct_argv
))
325 def add_info_extractor ( self
, ie
):
326 """Add an InfoExtractor object to the end of the list."""
328 self
._ ies
_ instances
[ ie
. ie_key ()] = ie
329 ie
. set_downloader ( self
)
331 def get_info_extractor ( self
, ie_key
):
333 Get an instance of an IE with name ie_key, it will try to get one from
334 the _ies list, if there's no instance it will create a new one and add
335 it to the extractor list.
337 ie
= self
._ ies
_ instances
. get ( ie_key
)
339 ie
= get_info_extractor ( ie_key
)()
340 self
. add_info_extractor ( ie
)
343 def add_default_info_extractors ( self
):
345 Add the InfoExtractors returned by gen_extractors to the end of the list
347 for ie
in gen_extractors ():
348 self
. add_info_extractor ( ie
)
350 def add_post_processor ( self
, pp
):
351 """Add a PostProcessor object to the end of the chain."""
353 pp
. set_downloader ( self
)
355 def add_progress_hook ( self
, ph
):
356 """Add the progress hook (currently only for the file downloader)"""
357 self
._ progress
_ hooks
. append ( ph
)
359 def _bidi_workaround ( self
, message
):
360 if not hasattr ( self
, '_output_channel' ):
363 assert hasattr ( self
, '_output_process' )
364 assert isinstance ( message
, compat_str
)
365 line_count
= message
. count ( ' \n ' ) + 1
366 self
._ output
_ process
. stdin
. write (( message
+ ' \n ' ). encode ( 'utf-8' ))
367 self
._ output
_ process
. stdin
. flush ()
368 res
= '' . join ( self
._ output
_ channel
. readline (). decode ( 'utf-8' )
369 for _
in range ( line_count
))
370 return res
[:- len ( ' \n ' )]
372 def to_screen ( self
, message
, skip_eol
= False ):
373 """Print message to stdout if not in quiet mode."""
374 return self
. to_stdout ( message
, skip_eol
, check_quiet
= True )
376 def _write_string ( self
, s
, out
= None ):
377 write_string ( s
, out
= out
, encoding
= self
. params
. get ( 'encoding' ))
379 def to_stdout ( self
, message
, skip_eol
= False , check_quiet
= False ):
380 """Print message to stdout if not in quiet mode."""
381 if self
. params
. get ( 'logger' ):
382 self
. params
[ 'logger' ]. debug ( message
)
383 elif not check_quiet
or not self
. params
. get ( 'quiet' , False ):
384 message
= self
._ bidi
_ workaround
( message
)
385 terminator
= [ ' \n ' , '' ][ skip_eol
]
386 output
= message
+ terminator
388 self
._ write
_ string
( output
, self
._ screen
_ file
)
390 def to_stderr ( self
, message
):
391 """Print message to stderr."""
392 assert isinstance ( message
, compat_str
)
393 if self
. params
. get ( 'logger' ):
394 self
. params
[ 'logger' ]. error ( message
)
396 message
= self
._ bidi
_ workaround
( message
)
397 output
= message
+ ' \n '
398 self
._ write
_ string
( output
, self
._ err
_ file
)
400 def to_console_title ( self
, message
):
401 if not self
. params
. get ( 'consoletitle' , False ):
403 if os
. name
== 'nt' and ctypes
. windll
. kernel32
. GetConsoleWindow ():
404 # c_wchar_p() might not be necessary if `message` is
405 # already of type unicode()
406 ctypes
. windll
. kernel32
. SetConsoleTitleW ( ctypes
. c_wchar_p ( message
))
407 elif 'TERM' in os
. environ
:
408 self
._ write
_ string
( ' \033 ]0; %s \007 ' % message
, self
._ screen
_ file
)
410 def save_console_title ( self
):
411 if not self
. params
. get ( 'consoletitle' , False ):
413 if 'TERM' in os
. environ
:
414 # Save the title on stack
415 self
._ write
_ string
( ' \033 [22;0t' , self
._ screen
_ file
)
417 def restore_console_title ( self
):
418 if not self
. params
. get ( 'consoletitle' , False ):
420 if 'TERM' in os
. environ
:
421 # Restore the title from stack
422 self
._ write
_ string
( ' \033 [23;0t' , self
._ screen
_ file
)
425 self
. save_console_title ()
428 def __exit__ ( self
, * args
):
429 self
. restore_console_title ()
431 if self
. params
. get ( 'cookiefile' ) is not None :
432 self
. cookiejar
. save ()
434 def trouble ( self
, message
= None , tb
= None ):
435 """Determine action to take when a download problem appears.
437 Depending on if the downloader has been configured to ignore
438 download errors or not, this method may throw an exception or
439 not when errors are found, after printing the message.
441 tb, if given, is additional traceback information.
443 if message
is not None :
444 self
. to_stderr ( message
)
445 if self
. params
. get ( 'verbose' ):
447 if sys
. exc_info ()[ 0 ]: # if .trouble has been called from an except block
449 if hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
450 tb
+= '' . join ( traceback
. format_exception (* sys
. exc_info ()[ 1 ]. exc_info
))
451 tb
+= compat_str ( traceback
. format_exc ())
453 tb_data
= traceback
. format_list ( traceback
. extract_stack ())
454 tb
= '' . join ( tb_data
)
456 if not self
. params
. get ( 'ignoreerrors' , False ):
457 if sys
. exc_info ()[ 0 ] and hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
458 exc_info
= sys
. exc_info ()[ 1 ]. exc_info
460 exc_info
= sys
. exc_info ()
461 raise DownloadError ( message
, exc_info
)
462 self
._ download
_ retcode
= 1
464 def report_warning ( self
, message
):
466 Print the message to stderr, it will be prefixed with 'WARNING:'
467 If stderr is a tty file the 'WARNING:' will be colored
469 if self
. params
. get ( 'logger' ) is not None :
470 self
. params
[ 'logger' ]. warning ( message
)
472 if self
. params
. get ( 'no_warnings' ):
474 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
475 _msg_header
= ' \033 [0;33mWARNING: \033 [0m'
477 _msg_header
= 'WARNING:'
478 warning_message
= ' %s %s ' % ( _msg_header
, message
)
479 self
. to_stderr ( warning_message
)
481 def report_error ( self
, message
, tb
= None ):
483 Do the same as trouble, but prefixes the message with 'ERROR:', colored
484 in red if stderr is a tty file.
486 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
487 _msg_header
= ' \033 [0;31mERROR: \033 [0m'
489 _msg_header
= 'ERROR:'
490 error_message
= ' %s %s ' % ( _msg_header
, message
)
491 self
. trouble ( error_message
, tb
)
493 def report_file_already_downloaded ( self
, file_name
):
494 """Report file has already been fully downloaded."""
496 self
. to_screen ( '[download] %s has already been downloaded' % file_name
)
497 except UnicodeEncodeError :
498 self
. to_screen ( '[download] The file has already been downloaded' )
500 def prepare_filename ( self
, info_dict
):
501 """Generate the output filename."""
503 template_dict
= dict ( info_dict
)
505 template_dict
[ 'epoch' ] = int ( time
. time ())
506 autonumber_size
= self
. params
. get ( 'autonumber_size' )
507 if autonumber_size
is None :
509 autonumber_templ
= ' %0 ' + str ( autonumber_size
) + 'd'
510 template_dict
[ 'autonumber' ] = autonumber_templ
% self
._ num
_ downloads
511 if template_dict
. get ( 'playlist_index' ) is not None :
512 template_dict
[ 'playlist_index' ] = ' %0 *d' % ( len ( str ( template_dict
[ 'n_entries' ])), template_dict
[ 'playlist_index' ])
513 if template_dict
. get ( 'resolution' ) is None :
514 if template_dict
. get ( 'width' ) and template_dict
. get ( 'height' ):
515 template_dict
[ 'resolution' ] = ' %dx%d ' % ( template_dict
[ 'width' ], template_dict
[ 'height' ])
516 elif template_dict
. get ( 'height' ):
517 template_dict
[ 'resolution' ] = ' %s p' % template_dict
[ 'height' ]
518 elif template_dict
. get ( 'width' ):
519 template_dict
[ 'resolution' ] = '?x %d ' % template_dict
[ 'width' ]
521 sanitize
= lambda k
, v
: sanitize_filename (
523 restricted
= self
. params
. get ( 'restrictfilenames' ),
525 template_dict
= dict (( k
, sanitize ( k
, v
))
526 for k
, v
in template_dict
. items ()
528 template_dict
= collections
. defaultdict ( lambda : 'NA' , template_dict
)
530 outtmpl
= self
. params
. get ( 'outtmpl' , DEFAULT_OUTTMPL
)
531 tmpl
= compat_expanduser ( outtmpl
)
532 filename
= tmpl
% template_dict
534 except ValueError as err
:
535 self
. report_error ( 'Error in output template: ' + str ( err
) + ' (encoding: ' + repr ( preferredencoding ()) + ')' )
538 def _match_entry ( self
, info_dict
):
539 """ Returns None iff the file should be downloaded """
541 video_title
= info_dict
. get ( 'title' , info_dict
. get ( 'id' , 'video' ))
542 if 'title' in info_dict
:
543 # This can happen when we're just evaluating the playlist
544 title
= info_dict
[ 'title' ]
545 matchtitle
= self
. params
. get ( 'matchtitle' , False )
547 if not re
. search ( matchtitle
, title
, re
. IGNORECASE
):
548 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
549 rejecttitle
= self
. params
. get ( 'rejecttitle' , False )
551 if re
. search ( rejecttitle
, title
, re
. IGNORECASE
):
552 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
553 date
= info_dict
. get ( 'upload_date' , None )
555 dateRange
= self
. params
. get ( 'daterange' , DateRange ())
556 if date
not in dateRange
:
557 return ' %s upload date is not in range %s ' % ( date_from_str ( date
). isoformat (), dateRange
)
558 view_count
= info_dict
. get ( 'view_count' , None )
559 if view_count
is not None :
560 min_views
= self
. params
. get ( 'min_views' )
561 if min_views
is not None and view_count
< min_views
:
562 return 'Skipping %s , because it has not reached minimum view count ( %d / %d )' % ( video_title
, view_count
, min_views
)
563 max_views
= self
. params
. get ( 'max_views' )
564 if max_views
is not None and view_count
> max_views
:
565 return 'Skipping %s , because it has exceeded the maximum view count ( %d / %d )' % ( video_title
, view_count
, max_views
)
566 if age_restricted ( info_dict
. get ( 'age_limit' ), self
. params
. get ( 'age_limit' )):
567 return 'Skipping " %s " because it is age restricted' % title
568 if self
. in_download_archive ( info_dict
):
569 return ' %s has already been recorded in archive' % video_title
573 def add_extra_info ( info_dict
, extra_info
):
574 '''Set the keys from extra_info in info dict if they are missing'''
575 for key
, value
in extra_info
. items ():
576 info_dict
. setdefault ( key
, value
)
578 def extract_info ( self
, url
, download
= True , ie_key
= None , extra_info
={},
581 Returns a list with a dictionary for each video we find.
582 If 'download', also downloads the videos.
583 extra_info is a dict containing the extra values to add to each result
587 ies
= [ self
. get_info_extractor ( ie_key
)]
592 if not ie
. suitable ( url
):
596 self
. report_warning ( 'The program functionality for this site has been marked as broken, '
597 'and will probably not work.' )
600 ie_result
= ie
. extract ( url
)
601 if ie_result
is None : # Finished already (backwards compatibility; listformats and friends should be moved here)
603 if isinstance ( ie_result
, list ):
604 # Backwards compatibility: old IE result format
606 '_type' : 'compat_list' ,
607 'entries' : ie_result
,
609 self
. add_default_extra_info ( ie_result
, ie
, url
)
611 return self
. process_ie_result ( ie_result
, download
, extra_info
)
614 except ExtractorError
as de
: # An error we somewhat expected
615 self
. report_error ( compat_str ( de
), de
. format_traceback ())
617 except MaxDownloadsReached
:
619 except Exception as e
:
620 if self
. params
. get ( 'ignoreerrors' , False ):
621 self
. report_error ( compat_str ( e
), tb
= compat_str ( traceback
. format_exc ()))
626 self
. report_error ( 'no suitable InfoExtractor for URL %s ' % url
)
628 def add_default_extra_info ( self
, ie_result
, ie
, url
):
629 self
. add_extra_info ( ie_result
, {
630 'extractor' : ie
. IE_NAME
,
632 'webpage_url_basename' : url_basename ( url
),
633 'extractor_key' : ie
. ie_key (),
636 def process_ie_result ( self
, ie_result
, download
= True , extra_info
={}):
638 Take the result of the ie(may be modified) and resolve all unresolved
639 references (URLs, playlist items).
641 It will also download the videos if 'download'.
642 Returns the resolved ie_result.
645 result_type
= ie_result
. get ( '_type' , 'video' )
647 if result_type
in ( 'url' , 'url_transparent' ):
648 extract_flat
= self
. params
. get ( 'extract_flat' , False )
649 if (( extract_flat
== 'in_playlist' and 'playlist' in extra_info
) or
650 extract_flat
is True ):
651 if self
. params
. get ( 'forcejson' , False ):
652 self
. to_stdout ( json
. dumps ( ie_result
))
655 if result_type
== 'video' :
656 self
. add_extra_info ( ie_result
, extra_info
)
657 return self
. process_video_result ( ie_result
, download
= download
)
658 elif result_type
== 'url' :
659 # We have to add extra_info to the results because it may be
660 # contained in a playlist
661 return self
. extract_info ( ie_result
[ 'url' ],
663 ie_key
= ie_result
. get ( 'ie_key' ),
664 extra_info
= extra_info
)
665 elif result_type
== 'url_transparent' :
666 # Use the information from the embedding page
667 info
= self
. extract_info (
668 ie_result
[ 'url' ], ie_key
= ie_result
. get ( 'ie_key' ),
669 extra_info
= extra_info
, download
= False , process
= False )
671 force_properties
= dict (
672 ( k
, v
) for k
, v
in ie_result
. items () if v
is not None )
673 for f
in ( '_type' , 'url' ):
674 if f
in force_properties
:
675 del force_properties
[ f
]
676 new_result
= info
. copy ()
677 new_result
. update ( force_properties
)
679 assert new_result
. get ( '_type' ) != 'url_transparent'
681 return self
. process_ie_result (
682 new_result
, download
= download
, extra_info
= extra_info
)
683 elif result_type
== 'playlist' or result_type
== 'multi_video' :
684 # We process each entry in the playlist
685 playlist
= ie_result
. get ( 'title' , None ) or ie_result
. get ( 'id' , None )
686 self
. to_screen ( '[download] Downloading playlist: %s ' % playlist
)
688 playlist_results
= []
690 playliststart
= self
. params
. get ( 'playliststart' , 1 ) - 1
691 playlistend
= self
. params
. get ( 'playlistend' , None )
692 # For backwards compatibility, interpret -1 as whole list
693 if playlistend
== - 1 :
696 ie_entries
= ie_result
[ 'entries' ]
697 if isinstance ( ie_entries
, list ):
698 n_all_entries
= len ( ie_entries
)
699 entries
= ie_entries
[ playliststart
: playlistend
]
700 n_entries
= len ( entries
)
702 "[ %s ] playlist %s : Collected %d video ids (downloading %d of them)" %
703 ( ie_result
[ 'extractor' ], playlist
, n_all_entries
, n_entries
))
704 elif isinstance ( ie_entries
, PagedList
):
705 entries
= ie_entries
. getslice (
706 playliststart
, playlistend
)
707 n_entries
= len ( entries
)
709 "[ %s ] playlist %s : Downloading %d videos" %
710 ( ie_result
[ 'extractor' ], playlist
, n_entries
))
712 entries
= list ( itertools
. islice (
713 ie_entries
, playliststart
, playlistend
))
714 n_entries
= len ( entries
)
716 "[ %s ] playlist %s : Downloading %d videos" %
717 ( ie_result
[ 'extractor' ], playlist
, n_entries
))
719 if self
. params
. get ( 'playlistreverse' , False ):
720 entries
= entries
[::- 1 ]
722 for i
, entry
in enumerate ( entries
, 1 ):
723 self
. to_screen ( '[download] Downloading video %s of %s ' % ( i
, n_entries
))
725 'n_entries' : n_entries
,
726 'playlist' : playlist
,
727 'playlist_id' : ie_result
. get ( 'id' ),
728 'playlist_title' : ie_result
. get ( 'title' ),
729 'playlist_index' : i
+ playliststart
,
730 'extractor' : ie_result
[ 'extractor' ],
731 'webpage_url' : ie_result
[ 'webpage_url' ],
732 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
733 'extractor_key' : ie_result
[ 'extractor_key' ],
736 reason
= self
._ match
_ entry
( entry
)
737 if reason
is not None :
738 self
. to_screen ( '[download] ' + reason
)
741 entry_result
= self
. process_ie_result ( entry
,
744 playlist_results
. append ( entry_result
)
745 ie_result
[ 'entries' ] = playlist_results
747 elif result_type
== 'compat_list' :
749 'Extractor %s returned a compat_list result. '
750 'It needs to be updated.' % ie_result
. get ( 'extractor' ))
756 'extractor' : ie_result
[ 'extractor' ],
757 'webpage_url' : ie_result
[ 'webpage_url' ],
758 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
759 'extractor_key' : ie_result
[ 'extractor_key' ],
763 ie_result
[ 'entries' ] = [
764 self
. process_ie_result ( _fixup ( r
), download
, extra_info
)
765 for r
in ie_result
[ 'entries' ]
769 raise Exception ( 'Invalid result type: %s ' % result_type
)
771 def select_format ( self
, format_spec
, available_formats
):
772 if format_spec
== 'best' or format_spec
is None :
773 return available_formats
[- 1 ]
774 elif format_spec
== 'worst' :
775 return available_formats
[ 0 ]
776 elif format_spec
== 'bestaudio' :
778 f
for f
in available_formats
779 if f
. get ( 'vcodec' ) == 'none' ]
781 return audio_formats
[- 1 ]
782 elif format_spec
== 'worstaudio' :
784 f
for f
in available_formats
785 if f
. get ( 'vcodec' ) == 'none' ]
787 return audio_formats
[ 0 ]
788 elif format_spec
== 'bestvideo' :
790 f
for f
in available_formats
791 if f
. get ( 'acodec' ) == 'none' ]
793 return video_formats
[- 1 ]
794 elif format_spec
== 'worstvideo' :
796 f
for f
in available_formats
797 if f
. get ( 'acodec' ) == 'none' ]
799 return video_formats
[ 0 ]
801 extensions
= [ 'mp4' , 'flv' , 'webm' , '3gp' , 'm4a' , 'mp3' , 'ogg' , 'aac' , 'wav' ]
802 if format_spec
in extensions
:
803 filter_f
= lambda f
: f
[ 'ext' ] == format_spec
805 filter_f
= lambda f
: f
[ 'format_id' ] == format_spec
806 matches
= list ( filter ( filter_f
, available_formats
))
811 def process_video_result ( self
, info_dict
, download
= True ):
812 assert info_dict
. get ( '_type' , 'video' ) == 'video'
814 if 'id' not in info_dict
:
815 raise ExtractorError ( 'Missing "id" field in extractor result' )
816 if 'title' not in info_dict
:
817 raise ExtractorError ( 'Missing "title" field in extractor result' )
819 if 'playlist' not in info_dict
:
820 # It isn't part of a playlist
821 info_dict
[ 'playlist' ] = None
822 info_dict
[ 'playlist_index' ] = None
824 thumbnails
= info_dict
. get ( 'thumbnails' )
826 thumbnails
. sort ( key
= lambda t
: (
827 t
. get ( 'width' ), t
. get ( 'height' ), t
. get ( 'url' )))
829 if 'width' in t
and 'height' in t
:
830 t
[ 'resolution' ] = ' %dx%d ' % ( t
[ 'width' ], t
[ 'height' ])
832 if thumbnails
and 'thumbnail' not in info_dict
:
833 info_dict
[ 'thumbnail' ] = thumbnails
[- 1 ][ 'url' ]
835 if 'display_id' not in info_dict
and 'id' in info_dict
:
836 info_dict
[ 'display_id' ] = info_dict
[ 'id' ]
838 if info_dict
. get ( 'upload_date' ) is None and info_dict
. get ( 'timestamp' ) is not None :
839 # Working around negative timestamps in Windows
840 # (see http://bugs.python.org/issue1646728)
841 if info_dict
[ 'timestamp' ] < 0 and os
. name
== 'nt' :
842 info_dict
[ 'timestamp' ] = 0
843 upload_date
= datetime
. datetime
. utcfromtimestamp (
844 info_dict
[ 'timestamp' ])
845 info_dict
[ 'upload_date' ] = upload_date
. strftime ( '%Y%m %d ' )
847 # This extractors handle format selection themselves
848 if info_dict
[ 'extractor' ] in [ 'Youku' ]:
850 self
. process_info ( info_dict
)
853 # We now pick which formats have to be downloaded
854 if info_dict
. get ( 'formats' ) is None :
855 # There's only one format available
856 formats
= [ info_dict
]
858 formats
= info_dict
[ 'formats' ]
861 raise ExtractorError ( 'No video formats found!' )
863 # We check that all the formats have the format and format_id fields
864 for i
, format
in enumerate ( formats
):
865 if 'url' not in format
:
866 raise ExtractorError ( 'Missing "url" key in result (index %d )' % i
)
868 if format
. get ( 'format_id' ) is None :
869 format
[ 'format_id' ] = compat_str ( i
)
870 if format
. get ( 'format' ) is None :
871 format
[ 'format' ] = ' {id} - {res}{note} ' . format (
872 id = format
[ 'format_id' ],
873 res
= self
. format_resolution ( format
),
874 note
= ' ( {0} )' . format ( format
[ 'format_note' ]) if format
. get ( 'format_note' ) is not None else '' ,
876 # Automatically determine file extension if missing
877 if 'ext' not in format
:
878 format
[ 'ext' ] = determine_ext ( format
[ 'url' ]). lower ()
880 format_limit
= self
. params
. get ( 'format_limit' , None )
882 formats
= list ( takewhile_inclusive (
883 lambda f
: f
[ 'format_id' ] != format_limit
, formats
886 # TODO Central sorting goes here
888 if formats
[ 0 ] is not info_dict
:
889 # only set the 'formats' fields if the original info_dict list them
890 # otherwise we end up with a circular reference, the first (and unique)
891 # element in the 'formats' field in info_dict is info_dict itself,
892 # wich can't be exported to json
893 info_dict
[ 'formats' ] = formats
894 if self
. params
. get ( 'listformats' , None ):
895 self
. list_formats ( info_dict
)
898 req_format
= self
. params
. get ( 'format' )
899 if req_format
is None :
901 formats_to_download
= []
902 # The -1 is for supporting YoutubeIE
903 if req_format
in ( '-1' , 'all' ):
904 formats_to_download
= formats
906 for rfstr
in req_format
. split ( ',' ):
907 # We can accept formats requested in the format: 34/5/best, we pick
908 # the first that is available, starting from left
909 req_formats
= rfstr
. split ( '/' )
910 for rf
in req_formats
:
911 if re
. match ( r
'.+?\+.+?' , rf
) is not None :
912 # Two formats have been requested like '137+139'
913 format_1
, format_2
= rf
. split ( '+' )
914 formats_info
= ( self
. select_format ( format_1
, formats
),
915 self
. select_format ( format_2
, formats
))
916 if all ( formats_info
):
917 # The first format must contain the video and the
919 if formats_info
[ 0 ]. get ( 'vcodec' ) == 'none' :
920 self
. report_error ( 'The first format must '
921 'contain the video, try using '
922 '"-f %s+%s "' % ( format_2
, format_1
))
925 formats_info
[ 0 ][ 'ext' ]
926 if self
. params
. get ( 'merge_output_format' ) is None
927 else self
. params
[ 'merge_output_format' ])
929 'requested_formats' : formats_info
,
931 'ext' : formats_info
[ 0 ][ 'ext' ],
932 'width' : formats_info
[ 0 ]. get ( 'width' ),
933 'height' : formats_info
[ 0 ]. get ( 'height' ),
934 'resolution' : formats_info
[ 0 ]. get ( 'resolution' ),
935 'fps' : formats_info
[ 0 ]. get ( 'fps' ),
936 'vcodec' : formats_info
[ 0 ]. get ( 'vcodec' ),
937 'vbr' : formats_info
[ 0 ]. get ( 'vbr' ),
938 'stretched_ratio' : formats_info
[ 0 ]. get ( 'stretched_ratio' ),
939 'acodec' : formats_info
[ 1 ]. get ( 'acodec' ),
940 'abr' : formats_info
[ 1 ]. get ( 'abr' ),
944 selected_format
= None
946 selected_format
= self
. select_format ( rf
, formats
)
947 if selected_format
is not None :
948 formats_to_download
. append ( selected_format
)
950 if not formats_to_download
:
951 raise ExtractorError ( 'requested format not available' ,
955 if len ( formats_to_download
) > 1 :
956 self
. to_screen ( '[info] %s : downloading video in %s formats' % ( info_dict
[ 'id' ], len ( formats_to_download
)))
957 for format
in formats_to_download
:
958 new_info
= dict ( info_dict
)
959 new_info
. update ( format
)
960 self
. process_info ( new_info
)
961 # We update the info dict with the best quality format (backwards compatibility)
962 info_dict
. update ( formats_to_download
[- 1 ])
965 def process_info ( self
, info_dict
):
966 """Process a single resolved IE result."""
968 assert info_dict
. get ( '_type' , 'video' ) == 'video'
970 max_downloads
= self
. params
. get ( 'max_downloads' )
971 if max_downloads
is not None :
972 if self
._ num
_ downloads
>= int ( max_downloads
):
973 raise MaxDownloadsReached ()
975 info_dict
[ 'fulltitle' ] = info_dict
[ 'title' ]
976 if len ( info_dict
[ 'title' ]) > 200 :
977 info_dict
[ 'title' ] = info_dict
[ 'title' ][: 197 ] + '...'
979 # Keep for backwards compatibility
980 info_dict
[ 'stitle' ] = info_dict
[ 'title' ]
982 if 'format' not in info_dict
:
983 info_dict
[ 'format' ] = info_dict
[ 'ext' ]
985 reason
= self
._ match
_ entry
( info_dict
)
986 if reason
is not None :
987 self
. to_screen ( '[download] ' + reason
)
990 self
._ num
_ downloads
+= 1
992 filename
= self
. prepare_filename ( info_dict
)
995 if self
. params
. get ( 'forcetitle' , False ):
996 self
. to_stdout ( info_dict
[ 'fulltitle' ])
997 if self
. params
. get ( 'forceid' , False ):
998 self
. to_stdout ( info_dict
[ 'id' ])
999 if self
. params
. get ( 'forceurl' , False ):
1000 if info_dict
. get ( 'requested_formats' ) is not None :
1001 for f
in info_dict
[ 'requested_formats' ]:
1002 self
. to_stdout ( f
[ 'url' ] + f
. get ( 'play_path' , '' ))
1004 # For RTMP URLs, also include the playpath
1005 self
. to_stdout ( info_dict
[ 'url' ] + info_dict
. get ( 'play_path' , '' ))
1006 if self
. params
. get ( 'forcethumbnail' , False ) and info_dict
. get ( 'thumbnail' ) is not None :
1007 self
. to_stdout ( info_dict
[ 'thumbnail' ])
1008 if self
. params
. get ( 'forcedescription' , False ) and info_dict
. get ( 'description' ) is not None :
1009 self
. to_stdout ( info_dict
[ 'description' ])
1010 if self
. params
. get ( 'forcefilename' , False ) and filename
is not None :
1011 self
. to_stdout ( filename
)
1012 if self
. params
. get ( 'forceduration' , False ) and info_dict
. get ( 'duration' ) is not None :
1013 self
. to_stdout ( formatSeconds ( info_dict
[ 'duration' ]))
1014 if self
. params
. get ( 'forceformat' , False ):
1015 self
. to_stdout ( info_dict
[ 'format' ])
1016 if self
. params
. get ( 'forcejson' , False ):
1017 info_dict
[ '_filename' ] = filename
1018 self
. to_stdout ( json
. dumps ( info_dict
))
1019 if self
. params
. get ( 'dump_single_json' , False ):
1020 info_dict
[ '_filename' ] = filename
1022 # Do nothing else if in simulate mode
1023 if self
. params
. get ( 'simulate' , False ):
1026 if filename
is None :
1030 dn
= os
. path
. dirname ( encodeFilename ( filename
))
1031 if dn
and not os
. path
. exists ( dn
):
1033 except ( OSError , IOError ) as err
:
1034 self
. report_error ( 'unable to create directory ' + compat_str ( err
))
1037 if self
. params
. get ( 'writedescription' , False ):
1038 descfn
= filename
+ '.description'
1039 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( descfn
)):
1040 self
. to_screen ( '[info] Video description is already present' )
1041 elif info_dict
. get ( 'description' ) is None :
1042 self
. report_warning ( 'There \' s no description to write.' )
1045 self
. to_screen ( '[info] Writing video description to: ' + descfn
)
1046 with io
. open ( encodeFilename ( descfn
), 'w' , encoding
= 'utf-8' ) as descfile
:
1047 descfile
. write ( info_dict
[ 'description' ])
1048 except ( OSError , IOError ):
1049 self
. report_error ( 'Cannot write description file ' + descfn
)
1052 if self
. params
. get ( 'writeannotations' , False ):
1053 annofn
= filename
+ '.annotations.xml'
1054 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( annofn
)):
1055 self
. to_screen ( '[info] Video annotations are already present' )
1058 self
. to_screen ( '[info] Writing video annotations to: ' + annofn
)
1059 with io
. open ( encodeFilename ( annofn
), 'w' , encoding
= 'utf-8' ) as annofile
:
1060 annofile
. write ( info_dict
[ 'annotations' ])
1061 except ( KeyError , TypeError ):
1062 self
. report_warning ( 'There are no annotations to write.' )
1063 except ( OSError , IOError ):
1064 self
. report_error ( 'Cannot write annotations file: ' + annofn
)
1067 subtitles_are_requested
= any ([ self
. params
. get ( 'writesubtitles' , False ),
1068 self
. params
. get ( 'writeautomaticsub' )])
1070 if subtitles_are_requested
and 'subtitles' in info_dict
and info_dict
[ 'subtitles' ]:
1071 # subtitles download errors are already managed as troubles in relevant IE
1072 # that way it will silently go on when used with unsupporting IE
1073 subtitles
= info_dict
[ 'subtitles' ]
1074 sub_format
= self
. params
. get ( 'subtitlesformat' , 'srt' )
1075 for sub_lang
in subtitles
. keys ():
1076 sub
= subtitles
[ sub_lang
]
1080 sub_filename
= subtitles_filename ( filename
, sub_lang
, sub_format
)
1081 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( sub_filename
)):
1082 self
. to_screen ( '[info] Video subtitle %s . %s is already_present' % ( sub_lang
, sub_format
))
1084 self
. to_screen ( '[info] Writing video subtitles to: ' + sub_filename
)
1085 with io
. open ( encodeFilename ( sub_filename
), 'w' , encoding
= 'utf-8' ) as subfile
:
1087 except ( OSError , IOError ):
1088 self
. report_error ( 'Cannot write subtitles file ' + sub_filename
)
1091 if self
. params
. get ( 'writeinfojson' , False ):
1092 infofn
= os
. path
. splitext ( filename
)[ 0 ] + '.info.json'
1093 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( infofn
)):
1094 self
. to_screen ( '[info] Video description metadata is already present' )
1096 self
. to_screen ( '[info] Writing video description metadata as JSON to: ' + infofn
)
1098 write_json_file ( info_dict
, infofn
)
1099 except ( OSError , IOError ):
1100 self
. report_error ( 'Cannot write metadata to JSON file ' + infofn
)
1103 if self
. params
. get ( 'writethumbnail' , False ):
1104 if info_dict
. get ( 'thumbnail' ) is not None :
1105 thumb_format
= determine_ext ( info_dict
[ 'thumbnail' ], 'jpg' )
1106 thumb_filename
= os
. path
. splitext ( filename
)[ 0 ] + '.' + thumb_format
1107 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( thumb_filename
)):
1108 self
. to_screen ( '[ %s ] %s : Thumbnail is already present' %
1109 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
1111 self
. to_screen ( '[ %s ] %s : Downloading thumbnail ...' %
1112 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
1114 uf
= self
. urlopen ( info_dict
[ 'thumbnail' ])
1115 with open ( thumb_filename
, 'wb' ) as thumbf
:
1116 shutil
. copyfileobj ( uf
, thumbf
)
1117 self
. to_screen ( '[ %s ] %s : Writing thumbnail to: %s ' %
1118 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ], thumb_filename
))
1119 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
1120 self
. report_warning ( 'Unable to download thumbnail " %s ": %s ' %
1121 ( info_dict
[ 'thumbnail' ], compat_str ( err
)))
1123 if not self
. params
. get ( 'skip_download' , False ):
1126 fd
= get_suitable_downloader ( info
)( self
, self
. params
)
1127 for ph
in self
._ progress
_ hooks
:
1128 fd
. add_progress_hook ( ph
)
1129 if self
. params
. get ( 'verbose' ):
1130 self
. to_stdout ( '[debug] Invoking downloader on %r ' % info
. get ( 'url' ))
1131 return fd
. download ( name
, info
)
1132 if info_dict
. get ( 'requested_formats' ) is not None :
1135 merger
= FFmpegMergerPP ( self
, not self
. params
. get ( 'keepvideo' ))
1136 if not merger
._ executable
:
1138 self
. report_warning ( 'You have requested multiple '
1139 'formats but ffmpeg or avconv are not installed.'
1140 ' The formats won \' t be merged' )
1142 postprocessors
= [ merger
]
1143 for f
in info_dict
[ 'requested_formats' ]:
1144 new_info
= dict ( info_dict
)
1146 fname
= self
. prepare_filename ( new_info
)
1147 fname
= prepend_extension ( fname
, 'f %s ' % f
[ 'format_id' ])
1148 downloaded
. append ( fname
)
1149 partial_success
= dl ( fname
, new_info
)
1150 success
= success
and partial_success
1151 info_dict
[ '__postprocessors' ] = postprocessors
1152 info_dict
[ '__files_to_merge' ] = downloaded
1154 # Just a single file
1155 success
= dl ( filename
, info_dict
)
1156 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
1157 self
. report_error ( 'unable to download video data: %s ' % str ( err
))
1159 except ( OSError , IOError ) as err
:
1160 raise UnavailableVideoError ( err
)
1161 except ( ContentTooShortError
, ) as err
:
1162 self
. report_error ( 'content too short (expected %s bytes and served %s )' % ( err
. expected
, err
. downloaded
))
1167 stretched_ratio
= info_dict
. get ( 'stretched_ratio' )
1168 if stretched_ratio
is not None and stretched_ratio
!= 1 :
1169 fixup_policy
= self
. params
. get ( 'fixup' )
1170 if fixup_policy
is None :
1171 fixup_policy
= 'detect_or_warn'
1172 if fixup_policy
== 'warn' :
1173 self
. report_warning ( ' %s : Non-uniform pixel ratio ( %s )' % (
1174 info_dict
[ 'id' ], stretched_ratio
))
1175 elif fixup_policy
== 'detect_or_warn' :
1176 stretched_pp
= FFmpegFixupStretchedPP ( self
)
1177 if stretched_pp
. available
:
1178 info_dict
. setdefault ( '__postprocessors' , [])
1179 info_dict
[ '__postprocessors' ]. append ( stretched_pp
)
1181 self
. report_warning (
1182 ' %s : Non-uniform pixel ratio ( %s ). Install ffmpeg or avconv to fix this automatically.' % (
1183 info_dict
[ 'id' ], stretched_ratio
))
1185 assert fixup_policy
== 'ignore'
1188 self
. post_process ( filename
, info_dict
)
1189 except ( PostProcessingError
) as err
:
1190 self
. report_error ( 'postprocessing: %s ' % str ( err
))
1192 self
. record_download_archive ( info_dict
)
1194 def download ( self
, url_list
):
1195 """Download a given list of URLs."""
1196 outtmpl
= self
. params
. get ( 'outtmpl' , DEFAULT_OUTTMPL
)
1197 if ( len ( url_list
) > 1 and
1199 and self
. params
. get ( 'max_downloads' ) != 1 ):
1200 raise SameFileError ( outtmpl
)
1202 for url
in url_list
:
1204 # It also downloads the videos
1205 res
= self
. extract_info ( url
)
1206 except UnavailableVideoError
:
1207 self
. report_error ( 'unable to download video' )
1208 except MaxDownloadsReached
:
1209 self
. to_screen ( '[info] Maximum number of downloaded files reached.' )
1212 if self
. params
. get ( 'dump_single_json' , False ):
1213 self
. to_stdout ( json
. dumps ( res
))
1215 return self
._ download
_ retcode
1217 def download_with_info_file ( self
, info_filename
):
1218 with io
. open ( info_filename
, 'r' , encoding
= 'utf-8' ) as f
:
1221 self
. process_ie_result ( info
, download
= True )
1222 except DownloadError
:
1223 webpage_url
= info
. get ( 'webpage_url' )
1224 if webpage_url
is not None :
1225 self
. report_warning ( 'The info failed to download, trying with " %s "' % webpage_url
)
1226 return self
. download ([ webpage_url
])
1229 return self
._ download
_ retcode
1231 def post_process ( self
, filename
, ie_info
):
1232 """Run all the postprocessors on the given file."""
1233 info
= dict ( ie_info
)
1234 info
[ 'filepath' ] = filename
1236 if ie_info
. get ( '__postprocessors' ) is not None :
1237 pps_chain
. extend ( ie_info
[ '__postprocessors' ])
1238 pps_chain
. extend ( self
._ pps
)
1239 for pp
in pps_chain
:
1241 old_filename
= info
[ 'filepath' ]
1243 keep_video_wish
, info
= pp
. run ( info
)
1244 if keep_video_wish
is not None :
1246 keep_video
= keep_video_wish
1247 elif keep_video
is None :
1248 # No clear decision yet, let IE decide
1249 keep_video
= keep_video_wish
1250 except PostProcessingError
as e
:
1251 self
. report_error ( e
. msg
)
1252 if keep_video
is False and not self
. params
. get ( 'keepvideo' , False ):
1254 self
. to_screen ( 'Deleting original file %s (pass -k to keep)' % old_filename
)
1255 os
. remove ( encodeFilename ( old_filename
))
1256 except ( IOError , OSError ):
1257 self
. report_warning ( 'Unable to remove downloaded video file' )
1259 def _make_archive_id ( self
, info_dict
):
1260 # Future-proof against any change in case
1261 # and backwards compatibility with prior versions
1262 extractor
= info_dict
. get ( 'extractor_key' )
1263 if extractor
is None :
1264 if 'id' in info_dict
:
1265 extractor
= info_dict
. get ( 'ie_key' ) # key in a playlist
1266 if extractor
is None :
1267 return None # Incomplete video information
1268 return extractor
. lower () + ' ' + info_dict
[ 'id' ]
1270 def in_download_archive ( self
, info_dict
):
1271 fn
= self
. params
. get ( 'download_archive' )
1275 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1277 return False # Incomplete video information
1280 with locked_file ( fn
, 'r' , encoding
= 'utf-8' ) as archive_file
:
1281 for line
in archive_file
:
1282 if line
. strip () == vid_id
:
1284 except IOError as ioe
:
1285 if ioe
. errno
!= errno
. ENOENT
:
1289 def record_download_archive ( self
, info_dict
):
1290 fn
= self
. params
. get ( 'download_archive' )
1293 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1295 with locked_file ( fn
, 'a' , encoding
= 'utf-8' ) as archive_file
:
1296 archive_file
. write ( vid_id
+ ' \n ' )
1299 def format_resolution ( format
, default
= 'unknown' ):
1300 if format
. get ( 'vcodec' ) == 'none' :
1302 if format
. get ( 'resolution' ) is not None :
1303 return format
[ 'resolution' ]
1304 if format
. get ( 'height' ) is not None :
1305 if format
. get ( 'width' ) is not None :
1306 res
= ' %sx%s ' % ( format
[ 'width' ], format
[ 'height' ])
1308 res
= ' %s p' % format
[ 'height' ]
1309 elif format
. get ( 'width' ) is not None :
1310 res
= '?x %d ' % format
[ 'width' ]
1315 def _format_note ( self
, fdict
):
1317 if fdict
. get ( 'ext' ) in [ 'f4f' , 'f4m' ]:
1318 res
+= '(unsupported) '
1319 if fdict
. get ( 'format_note' ) is not None :
1320 res
+= fdict
[ 'format_note' ] + ' '
1321 if fdict
. get ( 'tbr' ) is not None :
1322 res
+= '%4dk ' % fdict
[ 'tbr' ]
1323 if fdict
. get ( 'container' ) is not None :
1326 res
+= ' %s container' % fdict
[ 'container' ]
1327 if ( fdict
. get ( 'vcodec' ) is not None and
1328 fdict
. get ( 'vcodec' ) != 'none' ):
1331 res
+= fdict
[ 'vcodec' ]
1332 if fdict
. get ( 'vbr' ) is not None :
1334 elif fdict
. get ( 'vbr' ) is not None and fdict
. get ( 'abr' ) is not None :
1336 if fdict
. get ( 'vbr' ) is not None :
1337 res
+= '%4dk' % fdict
[ 'vbr' ]
1338 if fdict
. get ( 'fps' ) is not None :
1339 res
+= ', %sf ps' % fdict
[ 'fps' ]
1340 if fdict
. get ( 'acodec' ) is not None :
1343 if fdict
[ 'acodec' ] == 'none' :
1346 res
+= ' %- 5s' % fdict
[ 'acodec' ]
1347 elif fdict
. get ( 'abr' ) is not None :
1351 if fdict
. get ( 'abr' ) is not None :
1352 res
+= '@%3dk' % fdict
[ 'abr' ]
1353 if fdict
. get ( 'asr' ) is not None :
1354 res
+= ' (%5dHz)' % fdict
[ 'asr' ]
1355 if fdict
. get ( 'filesize' ) is not None :
1358 res
+= format_bytes ( fdict
[ 'filesize' ])
1359 elif fdict
. get ( 'filesize_approx' ) is not None :
1362 res
+= '~' + format_bytes ( fdict
[ 'filesize_approx' ])
1365 def list_formats ( self
, info_dict
):
1366 def line ( format
, idlen
= 20 ):
1367 return (( ' %- ' + compat_str ( idlen
+ 1 ) + 's %- 10s %- 12s %s ' ) % (
1368 format
[ 'format_id' ],
1370 self
. format_resolution ( format
),
1371 self
._ format
_ note
( format
),
1374 formats
= info_dict
. get ( 'formats' , [ info_dict
])
1375 idlen
= max ( len ( 'format code' ),
1376 max ( len ( f
[ 'format_id' ]) for f
in formats
))
1378 line ( f
, idlen
) for f
in formats
1379 if f
. get ( 'preference' ) is None or f
[ 'preference' ] >= - 1000 ]
1380 if len ( formats
) > 1 :
1381 formats_s
[ 0 ] += ( ' ' if self
._ format
_ note
( formats
[ 0 ]) else '' ) + '(worst)'
1382 formats_s
[- 1 ] += ( ' ' if self
._ format
_ note
( formats
[- 1 ]) else '' ) + '(best)'
1384 header_line
= line ({
1385 'format_id' : 'format code' , 'ext' : 'extension' ,
1386 'resolution' : 'resolution' , 'format_note' : 'note' }, idlen
= idlen
)
1387 self
. to_screen ( '[info] Available formats for %s : \n %s \n %s ' %
1388 ( info_dict
[ 'id' ], header_line
, ' \n ' . join ( formats_s
)))
1390 def urlopen ( self
, req
):
1391 """ Start an HTTP download """
1393 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1394 # always respected by websites, some tend to give out URLs with non percent-encoded
1395 # non-ASCII characters (see telemb.py, ard.py [#3412])
1396 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1397 # To work around aforementioned issue we will replace request's original URL with
1398 # percent-encoded one
1399 req_is_string
= isinstance ( req
, basestring
if sys
. version_info
< ( 3 , 0 ) else compat_str
)
1400 url
= req
if req_is_string
else req
. get_full_url ()
1401 url_escaped
= escape_url ( url
)
1403 # Substitute URL if any change after escaping
1404 if url
!= url_escaped
:
1408 req
= compat_urllib_request
. Request (
1409 url_escaped
, data
= req
. data
, headers
= req
. headers
,
1410 origin_req_host
= req
. origin_req_host
, unverifiable
= req
. unverifiable
)
1412 return self
._ opener
. open ( req
, timeout
= self
._ socket
_ timeout
)
1414 def print_debug_header ( self
):
1415 if not self
. params
. get ( 'verbose' ):
1418 if type ( '' ) is not compat_str
:
1419 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1420 self
. report_warning (
1421 'Your Python is broken! Update to a newer and supported version' )
1423 stdout_encoding
= getattr (
1424 sys
. stdout
, 'encoding' , 'missing ( %s )' % type ( sys
. stdout
) .__ name
__ )
1426 '[debug] Encodings: locale %s , fs %s , out %s , pref %s \n ' % (
1427 locale
. getpreferredencoding (),
1428 sys
. getfilesystemencoding (),
1430 self
. get_encoding ()))
1431 write_string ( encoding_str
, encoding
= None )
1433 self
._ write
_ string
( '[debug] youtube-dl version ' + __version__
+ ' \n ' )
1435 sp
= subprocess
. Popen (
1436 [ 'git' , 'rev-parse' , '--short' , 'HEAD' ],
1437 stdout
= subprocess
. PIPE
, stderr
= subprocess
. PIPE
,
1438 cwd
= os
. path
. dirname ( os
. path
. abspath ( __file__
)))
1439 out
, err
= sp
. communicate ()
1440 out
= out
. decode (). strip ()
1441 if re
. match ( '[0-9a-f]+' , out
):
1442 self
._ write
_ string
( '[debug] Git HEAD: ' + out
+ ' \n ' )
1448 self
._ write
_ string
( '[debug] Python version %s - %s \n ' % (
1449 platform
. python_version (), platform_name ()))
1451 exe_versions
= FFmpegPostProcessor
. get_versions ()
1452 exe_versions
[ 'rtmpdump' ] = rtmpdump_version ()
1453 exe_str
= ', ' . join (
1455 for exe
, v
in sorted ( exe_versions
. items ())
1460 self
._ write
_ string
( '[debug] exe versions: %s \n ' % exe_str
)
1463 for handler
in self
._ opener
. handlers
:
1464 if hasattr ( handler
, 'proxies' ):
1465 proxy_map
. update ( handler
. proxies
)
1466 self
._ write
_ string
( '[debug] Proxy map: ' + compat_str ( proxy_map
) + ' \n ' )
1468 if self
. params
. get ( 'call_home' , False ):
1469 ipaddr
= self
. urlopen ( 'https://yt-dl.org/ip' ). read (). decode ( 'utf-8' )
1470 self
._ write
_ string
( '[debug] Public IP address: %s \n ' % ipaddr
)
1471 latest_version
= self
. urlopen (
1472 'https://yt-dl.org/latest/version' ). read (). decode ( 'utf-8' )
1473 if version_tuple ( latest_version
) > version_tuple ( __version__
):
1474 self
. report_warning (
1475 'You are using an outdated version (newest version: %s )! '
1476 'See https://yt-dl.org/update if you need help updating.' %
1479 def _setup_opener ( self
):
1480 timeout_val
= self
. params
. get ( 'socket_timeout' )
1481 self
._ socket
_ timeout
= 600 if timeout_val
is None else float ( timeout_val
)
1483 opts_cookiefile
= self
. params
. get ( 'cookiefile' )
1484 opts_proxy
= self
. params
. get ( 'proxy' )
1486 if opts_cookiefile
is None :
1487 self
. cookiejar
= compat_cookiejar
. CookieJar ()
1489 self
. cookiejar
= compat_cookiejar
. MozillaCookieJar (
1491 if os
. access ( opts_cookiefile
, os
. R_OK
):
1492 self
. cookiejar
. load ()
1494 cookie_processor
= compat_urllib_request
. HTTPCookieProcessor (
1496 if opts_proxy
is not None :
1497 if opts_proxy
== '' :
1500 proxies
= { 'http' : opts_proxy
, 'https' : opts_proxy
}
1502 proxies
= compat_urllib_request
. getproxies ()
1503 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1504 if 'http' in proxies
and 'https' not in proxies
:
1505 proxies
[ 'https' ] = proxies
[ 'http' ]
1506 proxy_handler
= compat_urllib_request
. ProxyHandler ( proxies
)
1508 debuglevel
= 1 if self
. params
. get ( 'debug_printtraffic' ) else 0
1509 https_handler
= make_HTTPS_handler ( self
. params
, debuglevel
= debuglevel
)
1510 ydlh
= YoutubeDLHandler ( self
. params
, debuglevel
= debuglevel
)
1511 opener
= compat_urllib_request
. build_opener (
1512 https_handler
, proxy_handler
, cookie_processor
, ydlh
)
1513 # Delete the default user-agent header, which would otherwise apply in
1514 # cases where our custom HTTP handler doesn't come into play
1515 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1516 opener
. addheaders
= []
1517 self
._ opener
= opener
1519 def encode ( self
, s
):
1520 if isinstance ( s
, bytes ):
1521 return s
# Already encoded
1524 return s
. encode ( self
. get_encoding ())
1525 except UnicodeEncodeError as err
:
1526 err
. reason
= err
. reason
+ '. Check your system encoding configuration or use the --encoding option.'
1529 def get_encoding ( self
):
1530 encoding
= self
. params
. get ( 'encoding' )
1531 if encoding
is None :
1532 encoding
= preferredencoding ()