]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
, unicode_literals
28 compat_urllib_request
,
49 UnavailableVideoError
,
56 from . extractor
import get_info_extractor
, gen_extractors
57 from . downloader
import get_suitable_downloader
58 from . postprocessor
import FFmpegMergerPP
59 from . version
import __version__
62 class YoutubeDL ( object ):
65 YoutubeDL objects are the ones responsible of downloading the
66 actual video file and writing it to disk if the user has requested
67 it, among some other tasks. In most cases there should be one per
68 program. As, given a video URL, the downloader doesn't know how to
69 extract all the needed information, task that InfoExtractors do, it
70 has to pass the URL to one of them.
72 For this, YoutubeDL objects have a method that allows
73 InfoExtractors to be registered in a given order. When it is passed
74 a URL, the YoutubeDL object handles it to the first InfoExtractor it
75 finds that reports being able to handle it. The InfoExtractor extracts
76 all the information about the video or videos the URL refers to, and
77 YoutubeDL process the extracted information, possibly using a File
78 Downloader to download the video.
80 YoutubeDL objects accept a lot of parameters. In order not to saturate
81 the object constructor with arguments, it receives a dictionary of
82 options instead. These options are available through the params
83 attribute for the InfoExtractors to use. The YoutubeDL also
84 registers itself as the downloader in charge for the InfoExtractors
85 that are added to it, so this is a "mutual registration".
89 username: Username for authentication purposes.
90 password: Password for authentication purposes.
91 videopassword: Password for acces a video.
92 usenetrc: Use netrc for authentication instead.
93 verbose: Print additional info to stdout.
94 quiet: Do not print messages to stdout.
95 forceurl: Force printing final URL.
96 forcetitle: Force printing title.
97 forceid: Force printing ID.
98 forcethumbnail: Force printing thumbnail URL.
99 forcedescription: Force printing description.
100 forcefilename: Force printing final filename.
101 forceduration: Force printing duration.
102 forcejson: Force printing info_dict as JSON.
103 simulate: Do not download the video files.
104 format: Video format code.
105 format_limit: Highest quality format to try.
106 outtmpl: Template for output names.
107 restrictfilenames: Do not allow "&" and spaces in file names
108 ignoreerrors: Do not stop on download errors.
109 nooverwrites: Prevent overwriting files.
110 playliststart: Playlist item to start at.
111 playlistend: Playlist item to end at.
112 matchtitle: Download only matching titles.
113 rejecttitle: Reject downloads for matching titles.
114 logger: Log messages to a logging.Logger instance.
115 logtostderr: Log messages to stderr instead of stdout.
116 writedescription: Write the video description to a .description file
117 writeinfojson: Write the video description to a .info.json file
118 writeannotations: Write the video annotations to a .annotations.xml file
119 writethumbnail: Write the thumbnail image to a file
120 writesubtitles: Write the video subtitles to a file
121 writeautomaticsub: Write the automatic subtitles to a file
122 allsubtitles: Downloads all the subtitles of the video
123 (requires writesubtitles or writeautomaticsub)
124 listsubtitles: Lists all available subtitles for the video
125 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
126 subtitleslangs: List of languages of the subtitles to download
127 keepvideo: Keep the video file after post-processing
128 daterange: A DateRange object, download only if the upload_date is in the range.
129 skip_download: Skip the actual download of the video file
130 cachedir: Location of the cache files in the filesystem.
131 None to disable filesystem cache.
132 noplaylist: Download single video instead of a playlist if in doubt.
133 age_limit: An integer representing the user's age in years.
134 Unsuitable videos for the given age are skipped.
135 min_views: An integer representing the minimum view count the video
136 must have in order to not be skipped.
137 Videos without view count information are always
138 downloaded. None for no limit.
139 max_views: An integer representing the maximum view count.
140 Videos that are more popular than that are not
142 Videos without view count information are always
143 downloaded. None for no limit.
144 download_archive: File name of a file where all downloads are recorded.
145 Videos already present in the file are not downloaded
147 cookiefile: File name where cookies should be read from and dumped to.
148 nocheckcertificate:Do not verify SSL certificates
149 proxy: URL of the proxy server to use
150 socket_timeout: Time to wait for unresponsive hosts, in seconds
151 bidi_workaround: Work around buggy terminals without bidirectional text
152 support, using fridibi
153 debug_printtraffic:Print out sent and received HTTP traffic
155 The following parameters are not used by YoutubeDL itself, they are used by
157 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
158 noresizebuffer, retries, continuedl, noprogress, consoletitle
160 The following options are used by the post processors:
161 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
162 otherwise prefer avconv.
168 _download_retcode
= None
169 _num_downloads
= None
172 def __init__ ( self
, params
= None ):
173 """Create a FileDownloader object with the given options."""
177 self
._ ies
_ instances
= {}
179 self
._ progress
_ hooks
= []
180 self
._ download
_ retcode
= 0
181 self
._ num
_ downloads
= 0
182 self
._ screen
_ file
= [ sys
. stdout
, sys
. stderr
][ params
. get ( 'logtostderr' , False )]
183 self
._ err
_ file
= sys
. stderr
186 if params
. get ( 'bidi_workaround' , False ):
189 master
, slave
= pty
. openpty ()
190 width
= get_term_width ()
194 width_args
= [ '-w' , str ( width
)]
196 stdin
= subprocess
. PIPE
,
198 stderr
= self
._ err
_ file
)
200 self
._ output
_ process
= subprocess
. Popen (
201 [ 'bidiv' ] + width_args
, ** sp_kwargs
204 self
._ output
_ process
= subprocess
. Popen (
205 [ 'fribidi' , '-c' , 'UTF-8' ] + width_args
, ** sp_kwargs
)
206 self
._ output
_ channel
= os
. fdopen ( master
, 'rb' )
207 except OSError as ose
:
209 self
. report_warning ( 'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.' )
213 if ( sys
. version_info
>= ( 3 ,) and sys
. platform
!= 'win32' and
214 sys
. getfilesystemencoding () in [ 'ascii' , 'ANSI_X3.4-1968' ]
215 and not params
[ 'restrictfilenames' ]):
216 # On Python 3, the Unicode filesystem API will throw errors (#1474)
218 'Assuming --restrict-filenames since file system encoding '
219 'cannot encode all charactes. '
220 'Set the LC_ALL environment variable to fix this.' )
221 self
. params
[ 'restrictfilenames' ] = True
223 if ' %(stitle)s ' in self
. params
. get ( 'outtmpl' , '' ):
224 self
. report_warning ( ' %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.' )
228 def add_info_extractor ( self
, ie
):
229 """Add an InfoExtractor object to the end of the list."""
231 self
._ ies
_ instances
[ ie
. ie_key ()] = ie
232 ie
. set_downloader ( self
)
234 def get_info_extractor ( self
, ie_key
):
236 Get an instance of an IE with name ie_key, it will try to get one from
237 the _ies list, if there's no instance it will create a new one and add
238 it to the extractor list.
240 ie
= self
._ ies
_ instances
. get ( ie_key
)
242 ie
= get_info_extractor ( ie_key
)()
243 self
. add_info_extractor ( ie
)
246 def add_default_info_extractors ( self
):
248 Add the InfoExtractors returned by gen_extractors to the end of the list
250 for ie
in gen_extractors ():
251 self
. add_info_extractor ( ie
)
253 def add_post_processor ( self
, pp
):
254 """Add a PostProcessor object to the end of the chain."""
256 pp
. set_downloader ( self
)
258 def add_progress_hook ( self
, ph
):
259 """Add the progress hook (currently only for the file downloader)"""
260 self
._ progress
_ hooks
. append ( ph
)
262 def _bidi_workaround ( self
, message
):
263 if not hasattr ( self
, '_output_channel' ):
266 assert hasattr ( self
, '_output_process' )
267 assert type ( message
) == type ( '' )
268 line_count
= message
. count ( ' \n ' ) + 1
269 self
._ output
_ process
. stdin
. write (( message
+ ' \n ' ). encode ( 'utf-8' ))
270 self
._ output
_ process
. stdin
. flush ()
271 res
= '' . join ( self
._ output
_ channel
. readline (). decode ( 'utf-8' )
272 for _
in range ( line_count
))
273 return res
[:- len ( ' \n ' )]
275 def to_screen ( self
, message
, skip_eol
= False ):
276 """Print message to stdout if not in quiet mode."""
277 return self
. to_stdout ( message
, skip_eol
, check_quiet
= True )
279 def to_stdout ( self
, message
, skip_eol
= False , check_quiet
= False ):
280 """Print message to stdout if not in quiet mode."""
281 if self
. params
. get ( 'logger' ):
282 self
. params
[ 'logger' ]. debug ( message
)
283 elif not check_quiet
or not self
. params
. get ( 'quiet' , False ):
284 message
= self
._ bidi
_ workaround
( message
)
285 terminator
= [ ' \n ' , '' ][ skip_eol
]
286 output
= message
+ terminator
288 write_string ( output
, self
._ screen
_ file
)
290 def to_stderr ( self
, message
):
291 """Print message to stderr."""
292 assert type ( message
) == type ( '' )
293 if self
. params
. get ( 'logger' ):
294 self
. params
[ 'logger' ]. error ( message
)
296 message
= self
._ bidi
_ workaround
( message
)
297 output
= message
+ ' \n '
298 write_string ( output
, self
._ err
_ file
)
300 def to_console_title ( self
, message
):
301 if not self
. params
. get ( 'consoletitle' , False ):
303 if os
. name
== 'nt' and ctypes
. windll
. kernel32
. GetConsoleWindow ():
304 # c_wchar_p() might not be necessary if `message` is
305 # already of type unicode()
306 ctypes
. windll
. kernel32
. SetConsoleTitleW ( ctypes
. c_wchar_p ( message
))
307 elif 'TERM' in os
. environ
:
308 write_string ( ' \033 ]0; %s \007 ' % message
, self
._ screen
_ file
)
310 def save_console_title ( self
):
311 if not self
. params
. get ( 'consoletitle' , False ):
313 if 'TERM' in os
. environ
:
314 # Save the title on stack
315 write_string ( ' \033 [22;0t' , self
._ screen
_ file
)
317 def restore_console_title ( self
):
318 if not self
. params
. get ( 'consoletitle' , False ):
320 if 'TERM' in os
. environ
:
321 # Restore the title from stack
322 write_string ( ' \033 [23;0t' , self
._ screen
_ file
)
325 self
. save_console_title ()
328 def __exit__ ( self
, * args
):
329 self
. restore_console_title ()
331 if self
. params
. get ( 'cookiefile' ) is not None :
332 self
. cookiejar
. save ()
334 def trouble ( self
, message
= None , tb
= None ):
335 """Determine action to take when a download problem appears.
337 Depending on if the downloader has been configured to ignore
338 download errors or not, this method may throw an exception or
339 not when errors are found, after printing the message.
341 tb, if given, is additional traceback information.
343 if message
is not None :
344 self
. to_stderr ( message
)
345 if self
. params
. get ( 'verbose' ):
347 if sys
. exc_info ()[ 0 ]: # if .trouble has been called from an except block
349 if hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
350 tb
+= '' . join ( traceback
. format_exception (* sys
. exc_info ()[ 1 ]. exc_info
))
351 tb
+= compat_str ( traceback
. format_exc ())
353 tb_data
= traceback
. format_list ( traceback
. extract_stack ())
354 tb
= '' . join ( tb_data
)
356 if not self
. params
. get ( 'ignoreerrors' , False ):
357 if sys
. exc_info ()[ 0 ] and hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
358 exc_info
= sys
. exc_info ()[ 1 ]. exc_info
360 exc_info
= sys
. exc_info ()
361 raise DownloadError ( message
, exc_info
)
362 self
._ download
_ retcode
= 1
364 def report_warning ( self
, message
):
366 Print the message to stderr, it will be prefixed with 'WARNING:'
367 If stderr is a tty file the 'WARNING:' will be colored
369 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
370 _msg_header
= ' \033 [0;33mWARNING: \033 [0m'
372 _msg_header
= 'WARNING:'
373 warning_message
= ' %s %s ' % ( _msg_header
, message
)
374 self
. to_stderr ( warning_message
)
376 def report_error ( self
, message
, tb
= None ):
378 Do the same as trouble, but prefixes the message with 'ERROR:', colored
379 in red if stderr is a tty file.
381 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
382 _msg_header
= ' \033 [0;31mERROR: \033 [0m'
384 _msg_header
= 'ERROR:'
385 error_message
= ' %s %s ' % ( _msg_header
, message
)
386 self
. trouble ( error_message
, tb
)
388 def report_file_already_downloaded ( self
, file_name
):
389 """Report file has already been fully downloaded."""
391 self
. to_screen ( '[download] %s has already been downloaded' % file_name
)
392 except UnicodeEncodeError :
393 self
. to_screen ( '[download] The file has already been downloaded' )
395 def increment_downloads ( self
):
396 """Increment the ordinal that assigns a number to each file."""
397 self
._ num
_ downloads
+= 1
399 def prepare_filename ( self
, info_dict
):
400 """Generate the output filename."""
402 template_dict
= dict ( info_dict
)
404 template_dict
[ 'epoch' ] = int ( time
. time ())
405 autonumber_size
= self
. params
. get ( 'autonumber_size' )
406 if autonumber_size
is None :
408 autonumber_templ
= ' %0 ' + str ( autonumber_size
) + 'd'
409 template_dict
[ 'autonumber' ] = autonumber_templ
% self
._ num
_ downloads
410 if template_dict
. get ( 'playlist_index' ) is not None :
411 template_dict
[ 'playlist_index' ] = ' %0 5d' % template_dict
[ 'playlist_index' ]
413 sanitize
= lambda k
, v
: sanitize_filename (
415 restricted
= self
. params
. get ( 'restrictfilenames' ),
417 template_dict
= dict (( k
, sanitize ( k
, v
))
418 for k
, v
in template_dict
. items ()
420 template_dict
= collections
. defaultdict ( lambda : 'NA' , template_dict
)
422 tmpl
= os
. path
. expanduser ( self
. params
[ 'outtmpl' ])
423 filename
= tmpl
% template_dict
425 except ValueError as err
:
426 self
. report_error ( 'Error in output template: ' + str ( err
) + ' (encoding: ' + repr ( preferredencoding ()) + ')' )
429 def _match_entry ( self
, info_dict
):
430 """ Returns None iff the file should be downloaded """
432 video_title
= info_dict
. get ( 'title' , info_dict
. get ( 'id' , 'video' ))
433 if 'title' in info_dict
:
434 # This can happen when we're just evaluating the playlist
435 title
= info_dict
[ 'title' ]
436 matchtitle
= self
. params
. get ( 'matchtitle' , False )
438 if not re
. search ( matchtitle
, title
, re
. IGNORECASE
):
439 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
440 rejecttitle
= self
. params
. get ( 'rejecttitle' , False )
442 if re
. search ( rejecttitle
, title
, re
. IGNORECASE
):
443 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
444 date
= info_dict
. get ( 'upload_date' , None )
446 dateRange
= self
. params
. get ( 'daterange' , DateRange ())
447 if date
not in dateRange
:
448 return ' %s upload date is not in range %s ' % ( date_from_str ( date
). isoformat (), dateRange
)
449 view_count
= info_dict
. get ( 'view_count' , None )
450 if view_count
is not None :
451 min_views
= self
. params
. get ( 'min_views' )
452 if min_views
is not None and view_count
< min_views
:
453 return 'Skipping %s , because it has not reached minimum view count ( %d / %d )' % ( video_title
, view_count
, min_views
)
454 max_views
= self
. params
. get ( 'max_views' )
455 if max_views
is not None and view_count
> max_views
:
456 return 'Skipping %s , because it has exceeded the maximum view count ( %d / %d )' % ( video_title
, view_count
, max_views
)
457 age_limit
= self
. params
. get ( 'age_limit' )
458 if age_limit
is not None :
459 if age_limit
< info_dict
. get ( 'age_limit' , 0 ):
460 return 'Skipping "' + title
+ '" because it is age restricted'
461 if self
. in_download_archive ( info_dict
):
462 return ' %s has already been recorded in archive' % video_title
466 def add_extra_info ( info_dict
, extra_info
):
467 '''Set the keys from extra_info in info dict if they are missing'''
468 for key
, value
in extra_info
. items ():
469 info_dict
. setdefault ( key
, value
)
471 def extract_info ( self
, url
, download
= True , ie_key
= None , extra_info
={},
474 Returns a list with a dictionary for each video we find.
475 If 'download', also downloads the videos.
476 extra_info is a dict containing the extra values to add to each result
480 ies
= [ self
. get_info_extractor ( ie_key
)]
485 if not ie
. suitable ( url
):
489 self
. report_warning ( 'The program functionality for this site has been marked as broken, '
490 'and will probably not work.' )
493 ie_result
= ie
. extract ( url
)
494 if ie_result
is None : # Finished already (backwards compatibility; listformats and friends should be moved here)
496 if isinstance ( ie_result
, list ):
497 # Backwards compatibility: old IE result format
499 '_type' : 'compat_list' ,
500 'entries' : ie_result
,
502 self
. add_extra_info ( ie_result
,
504 'extractor' : ie
. IE_NAME
,
506 'webpage_url_basename' : url_basename ( url
),
507 'extractor_key' : ie
. ie_key (),
510 return self
. process_ie_result ( ie_result
, download
, extra_info
)
513 except ExtractorError
as de
: # An error we somewhat expected
514 self
. report_error ( compat_str ( de
), de
. format_traceback ())
516 except Exception as e
:
517 if self
. params
. get ( 'ignoreerrors' , False ):
518 self
. report_error ( compat_str ( e
), tb
= compat_str ( traceback
. format_exc ()))
523 self
. report_error ( 'no suitable InfoExtractor: %s ' % url
)
525 def process_ie_result ( self
, ie_result
, download
= True , extra_info
={}):
527 Take the result of the ie(may be modified) and resolve all unresolved
528 references (URLs, playlist items).
530 It will also download the videos if 'download'.
531 Returns the resolved ie_result.
534 result_type
= ie_result
. get ( '_type' , 'video' ) # If not given we suppose it's a video, support the default old system
535 if result_type
== 'video' :
536 self
. add_extra_info ( ie_result
, extra_info
)
537 return self
. process_video_result ( ie_result
, download
= download
)
538 elif result_type
== 'url' :
539 # We have to add extra_info to the results because it may be
540 # contained in a playlist
541 return self
. extract_info ( ie_result
[ 'url' ],
543 ie_key
= ie_result
. get ( 'ie_key' ),
544 extra_info
= extra_info
)
545 elif result_type
== 'url_transparent' :
546 # Use the information from the embedding page
547 info
= self
. extract_info (
548 ie_result
[ 'url' ], ie_key
= ie_result
. get ( 'ie_key' ),
549 extra_info
= extra_info
, download
= False , process
= False )
551 def make_result ( embedded_info
):
552 new_result
= ie_result
. copy ()
553 for f
in ( '_type' , 'url' , 'ext' , 'player_url' , 'formats' ,
554 'entries' , 'ie_key' , 'duration' ,
555 'subtitles' , 'annotations' , 'format' ,
556 'thumbnail' , 'thumbnails' ):
559 if f
in embedded_info
:
560 new_result
[ f
] = embedded_info
[ f
]
562 new_result
= make_result ( info
)
564 assert new_result
. get ( '_type' ) != 'url_transparent'
565 if new_result
. get ( '_type' ) == 'compat_list' :
566 new_result
[ 'entries' ] = [
567 make_result ( e
) for e
in new_result
[ 'entries' ]]
569 return self
. process_ie_result (
570 new_result
, download
= download
, extra_info
= extra_info
)
571 elif result_type
== 'playlist' :
572 # We process each entry in the playlist
573 playlist
= ie_result
. get ( 'title' , None ) or ie_result
. get ( 'id' , None )
574 self
. to_screen ( '[download] Downloading playlist: %s ' % playlist
)
576 playlist_results
= []
578 n_all_entries
= len ( ie_result
[ 'entries' ])
579 playliststart
= self
. params
. get ( 'playliststart' , 1 ) - 1
580 playlistend
= self
. params
. get ( 'playlistend' , None )
581 # For backwards compatibility, interpret -1 as whole list
582 if playlistend
== - 1 :
585 entries
= ie_result
[ 'entries' ][ playliststart
: playlistend
]
586 n_entries
= len ( entries
)
589 "[ %s ] playlist ' %s ': Collected %d video ids (downloading %d of them)" %
590 ( ie_result
[ 'extractor' ], playlist
, n_all_entries
, n_entries
))
592 for i
, entry
in enumerate ( entries
, 1 ):
593 self
. to_screen ( '[download] Downloading video # %s of %s ' % ( i
, n_entries
))
595 'playlist' : playlist
,
596 'playlist_index' : i
+ playliststart
,
597 'extractor' : ie_result
[ 'extractor' ],
598 'webpage_url' : ie_result
[ 'webpage_url' ],
599 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
600 'extractor_key' : ie_result
[ 'extractor_key' ],
603 reason
= self
._ match
_ entry
( entry
)
604 if reason
is not None :
605 self
. to_screen ( '[download] ' + reason
)
608 entry_result
= self
. process_ie_result ( entry
,
611 playlist_results
. append ( entry_result
)
612 ie_result
[ 'entries' ] = playlist_results
614 elif result_type
== 'compat_list' :
616 self
. add_extra_info ( r
,
618 'extractor' : ie_result
[ 'extractor' ],
619 'webpage_url' : ie_result
[ 'webpage_url' ],
620 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
621 'extractor_key' : ie_result
[ 'extractor_key' ],
624 ie_result
[ 'entries' ] = [
625 self
. process_ie_result ( _fixup ( r
), download
, extra_info
)
626 for r
in ie_result
[ 'entries' ]
630 raise Exception ( 'Invalid result type: %s ' % result_type
)
632 def select_format ( self
, format_spec
, available_formats
):
633 if format_spec
== 'best' or format_spec
is None :
634 return available_formats
[- 1 ]
635 elif format_spec
== 'worst' :
636 return available_formats
[ 0 ]
638 extensions
= [ 'mp4' , 'flv' , 'webm' , '3gp' ]
639 if format_spec
in extensions
:
640 filter_f
= lambda f
: f
[ 'ext' ] == format_spec
642 filter_f
= lambda f
: f
[ 'format_id' ] == format_spec
643 matches
= list ( filter ( filter_f
, available_formats
))
648 def process_video_result ( self
, info_dict
, download
= True ):
649 assert info_dict
. get ( '_type' , 'video' ) == 'video'
651 if 'playlist' not in info_dict
:
652 # It isn't part of a playlist
653 info_dict
[ 'playlist' ] = None
654 info_dict
[ 'playlist_index' ] = None
656 # This extractors handle format selection themselves
657 if info_dict
[ 'extractor' ] in [ 'Youku' ]:
659 self
. process_info ( info_dict
)
662 # We now pick which formats have to be downloaded
663 if info_dict
. get ( 'formats' ) is None :
664 # There's only one format available
665 formats
= [ info_dict
]
667 formats
= info_dict
[ 'formats' ]
669 # We check that all the formats have the format and format_id fields
670 for ( i
, format
) in enumerate ( formats
):
671 if format
. get ( 'format_id' ) is None :
672 format
[ 'format_id' ] = compat_str ( i
)
673 if format
. get ( 'format' ) is None :
674 format
[ 'format' ] = ' {id} - {res}{note} ' . format (
675 id = format
[ 'format_id' ],
676 res
= self
. format_resolution ( format
),
677 note
= ' ( {0} )' . format ( format
[ 'format_note' ]) if format
. get ( 'format_note' ) is not None else '' ,
679 # Automatically determine file extension if missing
680 if 'ext' not in format
:
681 format
[ 'ext' ] = determine_ext ( format
[ 'url' ])
683 format_limit
= self
. params
. get ( 'format_limit' , None )
685 formats
= list ( takewhile_inclusive (
686 lambda f
: f
[ 'format_id' ] != format_limit
, formats
689 # TODO Central sorting goes here
691 if formats
[ 0 ] is not info_dict
:
692 # only set the 'formats' fields if the original info_dict list them
693 # otherwise we end up with a circular reference, the first (and unique)
694 # element in the 'formats' field in info_dict is info_dict itself,
695 # wich can't be exported to json
696 info_dict
[ 'formats' ] = formats
697 if self
. params
. get ( 'listformats' , None ):
698 self
. list_formats ( info_dict
)
701 req_format
= self
. params
. get ( 'format' , 'best' )
702 if req_format
is None :
704 formats_to_download
= []
705 # The -1 is for supporting YoutubeIE
706 if req_format
in ( '-1' , 'all' ):
707 formats_to_download
= formats
709 # We can accept formats requested in the format: 34/5/best, we pick
710 # the first that is available, starting from left
711 req_formats
= req_format
. split ( '/' )
712 for rf
in req_formats
:
713 if re
. match ( r
'.+?\+.+?' , rf
) is not None :
714 # Two formats have been requested like '137+139'
715 format_1
, format_2
= rf
. split ( '+' )
716 formats_info
= ( self
. select_format ( format_1
, formats
),
717 self
. select_format ( format_2
, formats
))
718 if all ( formats_info
):
720 'requested_formats' : formats_info
,
722 'ext' : formats_info
[ 0 ][ 'ext' ],
725 selected_format
= None
727 selected_format
= self
. select_format ( rf
, formats
)
728 if selected_format
is not None :
729 formats_to_download
= [ selected_format
]
731 if not formats_to_download
:
732 raise ExtractorError ( 'requested format not available' ,
736 if len ( formats_to_download
) > 1 :
737 self
. to_screen ( '[info] %s : downloading video in %s formats' % ( info_dict
[ 'id' ], len ( formats_to_download
)))
738 for format
in formats_to_download
:
739 new_info
= dict ( info_dict
)
740 new_info
. update ( format
)
741 self
. process_info ( new_info
)
742 # We update the info dict with the best quality format (backwards compatibility)
743 info_dict
. update ( formats_to_download
[- 1 ])
746 def process_info ( self
, info_dict
):
747 """Process a single resolved IE result."""
749 assert info_dict
. get ( '_type' , 'video' ) == 'video'
750 #We increment the download the download count here to match the previous behaviour.
751 self
. increment_downloads ()
753 info_dict
[ 'fulltitle' ] = info_dict
[ 'title' ]
754 if len ( info_dict
[ 'title' ]) > 200 :
755 info_dict
[ 'title' ] = info_dict
[ 'title' ][: 197 ] + '...'
757 # Keep for backwards compatibility
758 info_dict
[ 'stitle' ] = info_dict
[ 'title' ]
760 if not 'format' in info_dict
:
761 info_dict
[ 'format' ] = info_dict
[ 'ext' ]
763 reason
= self
._ match
_ entry
( info_dict
)
764 if reason
is not None :
765 self
. to_screen ( '[download] ' + reason
)
768 max_downloads
= self
. params
. get ( 'max_downloads' )
769 if max_downloads
is not None :
770 if self
._ num
_ downloads
> int ( max_downloads
):
771 raise MaxDownloadsReached ()
773 filename
= self
. prepare_filename ( info_dict
)
776 if self
. params
. get ( 'forcetitle' , False ):
777 self
. to_stdout ( info_dict
[ 'fulltitle' ])
778 if self
. params
. get ( 'forceid' , False ):
779 self
. to_stdout ( info_dict
[ 'id' ])
780 if self
. params
. get ( 'forceurl' , False ):
781 # For RTMP URLs, also include the playpath
782 self
. to_stdout ( info_dict
[ 'url' ] + info_dict
. get ( 'play_path' , '' ))
783 if self
. params
. get ( 'forcethumbnail' , False ) and info_dict
. get ( 'thumbnail' ) is not None :
784 self
. to_stdout ( info_dict
[ 'thumbnail' ])
785 if self
. params
. get ( 'forcedescription' , False ) and info_dict
. get ( 'description' ) is not None :
786 self
. to_stdout ( info_dict
[ 'description' ])
787 if self
. params
. get ( 'forcefilename' , False ) and filename
is not None :
788 self
. to_stdout ( filename
)
789 if self
. params
. get ( 'forceduration' , False ) and info_dict
. get ( 'duration' ) is not None :
790 self
. to_stdout ( formatSeconds ( info_dict
[ 'duration' ]))
791 if self
. params
. get ( 'forceformat' , False ):
792 self
. to_stdout ( info_dict
[ 'format' ])
793 if self
. params
. get ( 'forcejson' , False ):
794 info_dict
[ '_filename' ] = filename
795 self
. to_stdout ( json
. dumps ( info_dict
))
797 # Do nothing else if in simulate mode
798 if self
. params
. get ( 'simulate' , False ):
805 dn
= os
. path
. dirname ( encodeFilename ( filename
))
806 if dn
!= '' and not os
. path
. exists ( dn
):
808 except ( OSError , IOError ) as err
:
809 self
. report_error ( 'unable to create directory ' + compat_str ( err
))
812 if self
. params
. get ( 'writedescription' , False ):
813 descfn
= filename
+ '.description'
814 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( descfn
)):
815 self
. to_screen ( '[info] Video description is already present' )
818 self
. to_screen ( '[info] Writing video description to: ' + descfn
)
819 with io
. open ( encodeFilename ( descfn
), 'w' , encoding
= 'utf-8' ) as descfile
:
820 descfile
. write ( info_dict
[ 'description' ])
821 except ( KeyError , TypeError ):
822 self
. report_warning ( 'There \' s no description to write.' )
823 except ( OSError , IOError ):
824 self
. report_error ( 'Cannot write description file ' + descfn
)
827 if self
. params
. get ( 'writeannotations' , False ):
828 annofn
= filename
+ '.annotations.xml'
829 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( annofn
)):
830 self
. to_screen ( '[info] Video annotations are already present' )
833 self
. to_screen ( '[info] Writing video annotations to: ' + annofn
)
834 with io
. open ( encodeFilename ( annofn
), 'w' , encoding
= 'utf-8' ) as annofile
:
835 annofile
. write ( info_dict
[ 'annotations' ])
836 except ( KeyError , TypeError ):
837 self
. report_warning ( 'There are no annotations to write.' )
838 except ( OSError , IOError ):
839 self
. report_error ( 'Cannot write annotations file: ' + annofn
)
842 subtitles_are_requested
= any ([ self
. params
. get ( 'writesubtitles' , False ),
843 self
. params
. get ( 'writeautomaticsub' )])
845 if subtitles_are_requested
and 'subtitles' in info_dict
and info_dict
[ 'subtitles' ]:
846 # subtitles download errors are already managed as troubles in relevant IE
847 # that way it will silently go on when used with unsupporting IE
848 subtitles
= info_dict
[ 'subtitles' ]
849 sub_format
= self
. params
. get ( 'subtitlesformat' , 'srt' )
850 for sub_lang
in subtitles
. keys ():
851 sub
= subtitles
[ sub_lang
]
855 sub_filename
= subtitles_filename ( filename
, sub_lang
, sub_format
)
856 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( sub_filename
)):
857 self
. to_screen ( '[info] Video subtitle %s . %s is already_present' % ( sub_lang
, sub_format
))
859 self
. to_screen ( '[info] Writing video subtitles to: ' + sub_filename
)
860 with io
. open ( encodeFilename ( sub_filename
), 'w' , encoding
= 'utf-8' ) as subfile
:
862 except ( OSError , IOError ):
863 self
. report_error ( 'Cannot write subtitles file ' + descfn
)
866 if self
. params
. get ( 'writeinfojson' , False ):
867 infofn
= os
. path
. splitext ( filename
)[ 0 ] + '.info.json'
868 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( infofn
)):
869 self
. to_screen ( '[info] Video description metadata is already present' )
871 self
. to_screen ( '[info] Writing video description metadata as JSON to: ' + infofn
)
873 write_json_file ( info_dict
, encodeFilename ( infofn
))
874 except ( OSError , IOError ):
875 self
. report_error ( 'Cannot write metadata to JSON file ' + infofn
)
878 if self
. params
. get ( 'writethumbnail' , False ):
879 if info_dict
. get ( 'thumbnail' ) is not None :
880 thumb_format
= determine_ext ( info_dict
[ 'thumbnail' ], 'jpg' )
881 thumb_filename
= os
. path
. splitext ( filename
)[ 0 ] + '.' + thumb_format
882 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( thumb_filename
)):
883 self
. to_screen ( '[ %s ] %s : Thumbnail is already present' %
884 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
886 self
. to_screen ( '[ %s ] %s : Downloading thumbnail ...' %
887 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
889 uf
= compat_urllib_request
. urlopen ( info_dict
[ 'thumbnail' ])
890 with open ( thumb_filename
, 'wb' ) as thumbf
:
891 shutil
. copyfileobj ( uf
, thumbf
)
892 self
. to_screen ( '[ %s ] %s : Writing thumbnail to: %s ' %
893 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ], thumb_filename
))
894 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
895 self
. report_warning ( 'Unable to download thumbnail " %s ": %s ' %
896 ( info_dict
[ 'thumbnail' ], compat_str ( err
)))
898 if not self
. params
. get ( 'skip_download' , False ):
899 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( filename
)):
904 fd
= get_suitable_downloader ( info
)( self
, self
. params
)
905 for ph
in self
._ progress
_ hooks
:
906 fd
. add_progress_hook ( ph
)
907 return fd
. download ( name
, info
)
908 if info_dict
. get ( 'requested_formats' ) is not None :
911 for f
in info_dict
[ 'requested_formats' ]:
912 new_info
= dict ( info_dict
)
914 fname
= self
. prepare_filename ( new_info
)
915 fname
= prepend_extension ( fname
, 'f %s ' % f
[ 'format_id' ])
916 downloaded
. append ( fname
)
917 partial_success
= dl ( fname
, new_info
)
918 success
= success
and partial_success
919 info_dict
[ '__postprocessors' ] = [ FFmpegMergerPP ( self
)]
920 info_dict
[ '__files_to_merge' ] = downloaded
923 success
= dl ( filename
, info_dict
)
924 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
925 self
. report_error ( 'unable to download video data: %s ' % str ( err
))
927 except ( OSError , IOError ) as err
:
928 raise UnavailableVideoError ( err
)
929 except ( ContentTooShortError
, ) as err
:
930 self
. report_error ( 'content too short (expected %s bytes and served %s )' % ( err
. expected
, err
. downloaded
))
935 self
. post_process ( filename
, info_dict
)
936 except ( PostProcessingError
) as err
:
937 self
. report_error ( 'postprocessing: %s ' % str ( err
))
940 self
. record_download_archive ( info_dict
)
942 def download ( self
, url_list
):
943 """Download a given list of URLs."""
944 if ( len ( url_list
) > 1 and
945 '%' not in self
. params
[ 'outtmpl' ]
946 and self
. params
. get ( 'max_downloads' ) != 1 ):
947 raise SameFileError ( self
. params
[ 'outtmpl' ])
951 #It also downloads the videos
952 self
. extract_info ( url
)
953 except UnavailableVideoError
:
954 self
. report_error ( 'unable to download video' )
955 except MaxDownloadsReached
:
956 self
. to_screen ( '[info] Maximum number of downloaded files reached.' )
959 return self
._ download
_ retcode
961 def download_with_info_file ( self
, info_filename
):
962 with io
. open ( info_filename
, 'r' , encoding
= 'utf-8' ) as f
:
965 self
. process_ie_result ( info
, download
= True )
966 except DownloadError
:
967 webpage_url
= info
. get ( 'webpage_url' )
968 if webpage_url
is not None :
969 self
. report_warning ( 'The info failed to download, trying with " %s "' % webpage_url
)
970 return self
. download ([ webpage_url
])
973 return self
._ download
_ retcode
975 def post_process ( self
, filename
, ie_info
):
976 """Run all the postprocessors on the given file."""
978 info
[ 'filepath' ] = filename
981 if ie_info
. get ( '__postprocessors' ) is not None :
982 pps_chain
. extend ( ie_info
[ '__postprocessors' ])
983 pps_chain
. extend ( self
._ pps
)
986 keep_video_wish
, new_info
= pp
. run ( info
)
987 if keep_video_wish
is not None :
989 keep_video
= keep_video_wish
990 elif keep_video
is None :
991 # No clear decision yet, let IE decide
992 keep_video
= keep_video_wish
993 except PostProcessingError
as e
:
994 self
. report_error ( e
. msg
)
995 if keep_video
is False and not self
. params
. get ( 'keepvideo' , False ):
997 self
. to_screen ( 'Deleting original file %s (pass -k to keep)' % filename
)
998 os
. remove ( encodeFilename ( filename
))
999 except ( IOError , OSError ):
1000 self
. report_warning ( 'Unable to remove downloaded video file' )
1002 def _make_archive_id ( self
, info_dict
):
1003 # Future-proof against any change in case
1004 # and backwards compatibility with prior versions
1005 extractor
= info_dict
. get ( 'extractor_key' )
1006 if extractor
is None :
1007 if 'id' in info_dict
:
1008 extractor
= info_dict
. get ( 'ie_key' ) # key in a playlist
1009 if extractor
is None :
1010 return None # Incomplete video information
1011 return extractor
. lower () + ' ' + info_dict
[ 'id' ]
1013 def in_download_archive ( self
, info_dict
):
1014 fn
= self
. params
. get ( 'download_archive' )
1018 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1020 return False # Incomplete video information
1023 with locked_file ( fn
, 'r' , encoding
= 'utf-8' ) as archive_file
:
1024 for line
in archive_file
:
1025 if line
. strip () == vid_id
:
1027 except IOError as ioe
:
1028 if ioe
. errno
!= errno
. ENOENT
:
1032 def record_download_archive ( self
, info_dict
):
1033 fn
= self
. params
. get ( 'download_archive' )
1036 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1038 with locked_file ( fn
, 'a' , encoding
= 'utf-8' ) as archive_file
:
1039 archive_file
. write ( vid_id
+ ' \n ' )
1042 def format_resolution ( format
, default
= 'unknown' ):
1043 if format
. get ( 'vcodec' ) == 'none' :
1045 if format
. get ( 'resolution' ) is not None :
1046 return format
[ 'resolution' ]
1047 if format
. get ( 'height' ) is not None :
1048 if format
. get ( 'width' ) is not None :
1049 res
= ' %sx%s ' % ( format
[ 'width' ], format
[ 'height' ])
1051 res
= ' %s p' % format
[ 'height' ]
1052 elif format
. get ( 'width' ) is not None :
1053 res
= '?x %d ' % format
[ 'width' ]
1058 def list_formats ( self
, info_dict
):
1059 def format_note ( fdict
):
1061 if fdict
. get ( 'ext' ) in [ 'f4f' , 'f4m' ]:
1062 res
+= '(unsupported) '
1063 if fdict
. get ( 'format_note' ) is not None :
1064 res
+= fdict
[ 'format_note' ] + ' '
1065 if fdict
. get ( 'tbr' ) is not None :
1066 res
+= '%4dk ' % fdict
[ 'tbr' ]
1067 if ( fdict
. get ( 'vcodec' ) is not None and
1068 fdict
. get ( 'vcodec' ) != 'none' ):
1069 res
+= ' %- 5s' % fdict
[ 'vcodec' ]
1070 if fdict
. get ( 'vbr' ) is not None :
1072 elif fdict
. get ( 'vbr' ) is not None and fdict
. get ( 'abr' ) is not None :
1074 if fdict
. get ( 'vbr' ) is not None :
1075 res
+= '%4dk' % fdict
[ 'vbr' ]
1076 if fdict
. get ( 'acodec' ) is not None :
1079 res
+= ' %- 5s' % fdict
[ 'acodec' ]
1080 elif fdict
. get ( 'abr' ) is not None :
1084 if fdict
. get ( 'abr' ) is not None :
1085 res
+= '@%3dk' % fdict
[ 'abr' ]
1086 if fdict
. get ( 'filesize' ) is not None :
1089 res
+= format_bytes ( fdict
[ 'filesize' ])
1092 def line ( format
, idlen
= 20 ):
1093 return (( ' %- ' + compat_str ( idlen
+ 1 ) + 's %- 10s %- 12s %s ' ) % (
1094 format
[ 'format_id' ],
1096 self
. format_resolution ( format
),
1097 format_note ( format
),
1100 formats
= info_dict
. get ( 'formats' , [ info_dict
])
1101 idlen
= max ( len ( 'format code' ),
1102 max ( len ( f
[ 'format_id' ]) for f
in formats
))
1103 formats_s
= [ line ( f
, idlen
) for f
in formats
]
1104 if len ( formats
) > 1 :
1105 formats_s
[ 0 ] += ( ' ' if format_note ( formats
[ 0 ]) else '' ) + '(worst)'
1106 formats_s
[- 1 ] += ( ' ' if format_note ( formats
[- 1 ]) else '' ) + '(best)'
1108 header_line
= line ({
1109 'format_id' : 'format code' , 'ext' : 'extension' ,
1110 'resolution' : 'resolution' , 'format_note' : 'note' }, idlen
= idlen
)
1111 self
. to_screen ( '[info] Available formats for %s : \n %s \n %s ' %
1112 ( info_dict
[ 'id' ], header_line
, ' \n ' . join ( formats_s
)))
1114 def urlopen ( self
, req
):
1115 """ Start an HTTP download """
1116 return self
._ opener
. open ( req
)
1118 def print_debug_header ( self
):
1119 if not self
. params
. get ( 'verbose' ):
1121 write_string ( '[debug] youtube-dl version ' + __version__
+ ' \n ' )
1123 sp
= subprocess
. Popen (
1124 [ 'git' , 'rev-parse' , '--short' , 'HEAD' ],
1125 stdout
= subprocess
. PIPE
, stderr
= subprocess
. PIPE
,
1126 cwd
= os
. path
. dirname ( os
. path
. abspath ( __file__
)))
1127 out
, err
= sp
. communicate ()
1128 out
= out
. decode (). strip ()
1129 if re
. match ( '[0-9a-f]+' , out
):
1130 write_string ( '[debug] Git HEAD: ' + out
+ ' \n ' )
1136 write_string ( '[debug] Python version %s - %s ' %
1137 ( platform
. python_version (), platform_name ()) + ' \n ' )
1140 for handler
in self
._ opener
. handlers
:
1141 if hasattr ( handler
, 'proxies' ):
1142 proxy_map
. update ( handler
. proxies
)
1143 write_string ( '[debug] Proxy map: ' + compat_str ( proxy_map
) + ' \n ' )
1145 def _setup_opener ( self
):
1146 timeout_val
= self
. params
. get ( 'socket_timeout' )
1147 timeout
= 600 if timeout_val
is None else float ( timeout_val
)
1149 opts_cookiefile
= self
. params
. get ( 'cookiefile' )
1150 opts_proxy
= self
. params
. get ( 'proxy' )
1152 if opts_cookiefile
is None :
1153 self
. cookiejar
= compat_cookiejar
. CookieJar ()
1155 self
. cookiejar
= compat_cookiejar
. MozillaCookieJar (
1157 if os
. access ( opts_cookiefile
, os
. R_OK
):
1158 self
. cookiejar
. load ()
1160 cookie_processor
= compat_urllib_request
. HTTPCookieProcessor (
1162 if opts_proxy
is not None :
1163 if opts_proxy
== '' :
1166 proxies
= { 'http' : opts_proxy
, 'https' : opts_proxy
}
1168 proxies
= compat_urllib_request
. getproxies ()
1169 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1170 if 'http' in proxies
and 'https' not in proxies
:
1171 proxies
[ 'https' ] = proxies
[ 'http' ]
1172 proxy_handler
= compat_urllib_request
. ProxyHandler ( proxies
)
1174 debuglevel
= 1 if self
. params
. get ( 'debug_printtraffic' ) else 0
1175 https_handler
= make_HTTPS_handler (
1176 self
. params
. get ( 'nocheckcertificate' , False ), debuglevel
= debuglevel
)
1177 ydlh
= YoutubeDLHandler ( debuglevel
= debuglevel
)
1178 opener
= compat_urllib_request
. build_opener (
1179 https_handler
, proxy_handler
, cookie_processor
, ydlh
)
1180 # Delete the default user-agent header, which would otherwise apply in
1181 # cases where our custom HTTP handler doesn't come into play
1182 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1183 opener
. addheaders
= []
1184 self
._ opener
= opener
1186 # TODO remove this global modification
1187 compat_urllib_request
. install_opener ( opener
)
1188 socket
. setdefaulttimeout ( timeout
)