]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
, unicode_literals
28 compat_urllib_request
,
50 UnavailableVideoError
,
57 from . extractor
import get_info_extractor
, gen_extractors
58 from . downloader
import get_suitable_downloader
59 from . postprocessor
import FFmpegMergerPP
60 from . version
import __version__
63 class YoutubeDL ( object ):
66 YoutubeDL objects are the ones responsible of downloading the
67 actual video file and writing it to disk if the user has requested
68 it, among some other tasks. In most cases there should be one per
69 program. As, given a video URL, the downloader doesn't know how to
70 extract all the needed information, task that InfoExtractors do, it
71 has to pass the URL to one of them.
73 For this, YoutubeDL objects have a method that allows
74 InfoExtractors to be registered in a given order. When it is passed
75 a URL, the YoutubeDL object handles it to the first InfoExtractor it
76 finds that reports being able to handle it. The InfoExtractor extracts
77 all the information about the video or videos the URL refers to, and
78 YoutubeDL process the extracted information, possibly using a File
79 Downloader to download the video.
81 YoutubeDL objects accept a lot of parameters. In order not to saturate
82 the object constructor with arguments, it receives a dictionary of
83 options instead. These options are available through the params
84 attribute for the InfoExtractors to use. The YoutubeDL also
85 registers itself as the downloader in charge for the InfoExtractors
86 that are added to it, so this is a "mutual registration".
90 username: Username for authentication purposes.
91 password: Password for authentication purposes.
92 videopassword: Password for acces a video.
93 usenetrc: Use netrc for authentication instead.
94 verbose: Print additional info to stdout.
95 quiet: Do not print messages to stdout.
96 forceurl: Force printing final URL.
97 forcetitle: Force printing title.
98 forceid: Force printing ID.
99 forcethumbnail: Force printing thumbnail URL.
100 forcedescription: Force printing description.
101 forcefilename: Force printing final filename.
102 forceduration: Force printing duration.
103 forcejson: Force printing info_dict as JSON.
104 simulate: Do not download the video files.
105 format: Video format code.
106 format_limit: Highest quality format to try.
107 outtmpl: Template for output names.
108 restrictfilenames: Do not allow "&" and spaces in file names
109 ignoreerrors: Do not stop on download errors.
110 nooverwrites: Prevent overwriting files.
111 playliststart: Playlist item to start at.
112 playlistend: Playlist item to end at.
113 matchtitle: Download only matching titles.
114 rejecttitle: Reject downloads for matching titles.
115 logger: Log messages to a logging.Logger instance.
116 logtostderr: Log messages to stderr instead of stdout.
117 writedescription: Write the video description to a .description file
118 writeinfojson: Write the video description to a .info.json file
119 writeannotations: Write the video annotations to a .annotations.xml file
120 writethumbnail: Write the thumbnail image to a file
121 writesubtitles: Write the video subtitles to a file
122 writeautomaticsub: Write the automatic subtitles to a file
123 allsubtitles: Downloads all the subtitles of the video
124 (requires writesubtitles or writeautomaticsub)
125 listsubtitles: Lists all available subtitles for the video
126 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
127 subtitleslangs: List of languages of the subtitles to download
128 keepvideo: Keep the video file after post-processing
129 daterange: A DateRange object, download only if the upload_date is in the range.
130 skip_download: Skip the actual download of the video file
131 cachedir: Location of the cache files in the filesystem.
132 None to disable filesystem cache.
133 noplaylist: Download single video instead of a playlist if in doubt.
134 age_limit: An integer representing the user's age in years.
135 Unsuitable videos for the given age are skipped.
136 min_views: An integer representing the minimum view count the video
137 must have in order to not be skipped.
138 Videos without view count information are always
139 downloaded. None for no limit.
140 max_views: An integer representing the maximum view count.
141 Videos that are more popular than that are not
143 Videos without view count information are always
144 downloaded. None for no limit.
145 download_archive: File name of a file where all downloads are recorded.
146 Videos already present in the file are not downloaded
148 cookiefile: File name where cookies should be read from and dumped to.
149 nocheckcertificate:Do not verify SSL certificates
150 proxy: URL of the proxy server to use
151 socket_timeout: Time to wait for unresponsive hosts, in seconds
152 bidi_workaround: Work around buggy terminals without bidirectional text
153 support, using fridibi
154 debug_printtraffic:Print out sent and received HTTP traffic
155 include_ads: Download ads as well
156 default_search: Prepend this string if an input url is not valid.
157 'auto' for elaborate guessing
159 The following parameters are not used by YoutubeDL itself, they are used by
161 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
162 noresizebuffer, retries, continuedl, noprogress, consoletitle
164 The following options are used by the post processors:
165 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
166 otherwise prefer avconv.
172 _download_retcode
= None
173 _num_downloads
= None
176 def __init__ ( self
, params
= None ):
177 """Create a FileDownloader object with the given options."""
181 self
._ ies
_ instances
= {}
183 self
._ progress
_ hooks
= []
184 self
._ download
_ retcode
= 0
185 self
._ num
_ downloads
= 0
186 self
._ screen
_ file
= [ sys
. stdout
, sys
. stderr
][ params
. get ( 'logtostderr' , False )]
187 self
._ err
_ file
= sys
. stderr
190 if params
. get ( 'bidi_workaround' , False ):
193 master
, slave
= pty
. openpty ()
194 width
= get_term_width ()
198 width_args
= [ '-w' , str ( width
)]
200 stdin
= subprocess
. PIPE
,
202 stderr
= self
._ err
_ file
)
204 self
._ output
_ process
= subprocess
. Popen (
205 [ 'bidiv' ] + width_args
, ** sp_kwargs
208 self
._ output
_ process
= subprocess
. Popen (
209 [ 'fribidi' , '-c' , 'UTF-8' ] + width_args
, ** sp_kwargs
)
210 self
._ output
_ channel
= os
. fdopen ( master
, 'rb' )
211 except OSError as ose
:
213 self
. report_warning ( 'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.' )
217 if ( sys
. version_info
>= ( 3 ,) and sys
. platform
!= 'win32' and
218 sys
. getfilesystemencoding () in [ 'ascii' , 'ANSI_X3.4-1968' ]
219 and not params
[ 'restrictfilenames' ]):
220 # On Python 3, the Unicode filesystem API will throw errors (#1474)
222 'Assuming --restrict-filenames since file system encoding '
223 'cannot encode all charactes. '
224 'Set the LC_ALL environment variable to fix this.' )
225 self
. params
[ 'restrictfilenames' ] = True
227 if ' %(stitle)s ' in self
. params
. get ( 'outtmpl' , '' ):
228 self
. report_warning ( ' %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.' )
232 def add_info_extractor ( self
, ie
):
233 """Add an InfoExtractor object to the end of the list."""
235 self
._ ies
_ instances
[ ie
. ie_key ()] = ie
236 ie
. set_downloader ( self
)
238 def get_info_extractor ( self
, ie_key
):
240 Get an instance of an IE with name ie_key, it will try to get one from
241 the _ies list, if there's no instance it will create a new one and add
242 it to the extractor list.
244 ie
= self
._ ies
_ instances
. get ( ie_key
)
246 ie
= get_info_extractor ( ie_key
)()
247 self
. add_info_extractor ( ie
)
250 def add_default_info_extractors ( self
):
252 Add the InfoExtractors returned by gen_extractors to the end of the list
254 for ie
in gen_extractors ():
255 self
. add_info_extractor ( ie
)
257 def add_post_processor ( self
, pp
):
258 """Add a PostProcessor object to the end of the chain."""
260 pp
. set_downloader ( self
)
262 def add_progress_hook ( self
, ph
):
263 """Add the progress hook (currently only for the file downloader)"""
264 self
._ progress
_ hooks
. append ( ph
)
266 def _bidi_workaround ( self
, message
):
267 if not hasattr ( self
, '_output_channel' ):
270 assert hasattr ( self
, '_output_process' )
271 assert type ( message
) == type ( '' )
272 line_count
= message
. count ( ' \n ' ) + 1
273 self
._ output
_ process
. stdin
. write (( message
+ ' \n ' ). encode ( 'utf-8' ))
274 self
._ output
_ process
. stdin
. flush ()
275 res
= '' . join ( self
._ output
_ channel
. readline (). decode ( 'utf-8' )
276 for _
in range ( line_count
))
277 return res
[:- len ( ' \n ' )]
279 def to_screen ( self
, message
, skip_eol
= False ):
280 """Print message to stdout if not in quiet mode."""
281 return self
. to_stdout ( message
, skip_eol
, check_quiet
= True )
283 def to_stdout ( self
, message
, skip_eol
= False , check_quiet
= False ):
284 """Print message to stdout if not in quiet mode."""
285 if self
. params
. get ( 'logger' ):
286 self
. params
[ 'logger' ]. debug ( message
)
287 elif not check_quiet
or not self
. params
. get ( 'quiet' , False ):
288 message
= self
._ bidi
_ workaround
( message
)
289 terminator
= [ ' \n ' , '' ][ skip_eol
]
290 output
= message
+ terminator
292 write_string ( output
, self
._ screen
_ file
)
294 def to_stderr ( self
, message
):
295 """Print message to stderr."""
296 assert type ( message
) == type ( '' )
297 if self
. params
. get ( 'logger' ):
298 self
. params
[ 'logger' ]. error ( message
)
300 message
= self
._ bidi
_ workaround
( message
)
301 output
= message
+ ' \n '
302 write_string ( output
, self
._ err
_ file
)
304 def to_console_title ( self
, message
):
305 if not self
. params
. get ( 'consoletitle' , False ):
307 if os
. name
== 'nt' and ctypes
. windll
. kernel32
. GetConsoleWindow ():
308 # c_wchar_p() might not be necessary if `message` is
309 # already of type unicode()
310 ctypes
. windll
. kernel32
. SetConsoleTitleW ( ctypes
. c_wchar_p ( message
))
311 elif 'TERM' in os
. environ
:
312 write_string ( ' \033 ]0; %s \007 ' % message
, self
._ screen
_ file
)
314 def save_console_title ( self
):
315 if not self
. params
. get ( 'consoletitle' , False ):
317 if 'TERM' in os
. environ
:
318 # Save the title on stack
319 write_string ( ' \033 [22;0t' , self
._ screen
_ file
)
321 def restore_console_title ( self
):
322 if not self
. params
. get ( 'consoletitle' , False ):
324 if 'TERM' in os
. environ
:
325 # Restore the title from stack
326 write_string ( ' \033 [23;0t' , self
._ screen
_ file
)
329 self
. save_console_title ()
332 def __exit__ ( self
, * args
):
333 self
. restore_console_title ()
335 if self
. params
. get ( 'cookiefile' ) is not None :
336 self
. cookiejar
. save ()
338 def trouble ( self
, message
= None , tb
= None ):
339 """Determine action to take when a download problem appears.
341 Depending on if the downloader has been configured to ignore
342 download errors or not, this method may throw an exception or
343 not when errors are found, after printing the message.
345 tb, if given, is additional traceback information.
347 if message
is not None :
348 self
. to_stderr ( message
)
349 if self
. params
. get ( 'verbose' ):
351 if sys
. exc_info ()[ 0 ]: # if .trouble has been called from an except block
353 if hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
354 tb
+= '' . join ( traceback
. format_exception (* sys
. exc_info ()[ 1 ]. exc_info
))
355 tb
+= compat_str ( traceback
. format_exc ())
357 tb_data
= traceback
. format_list ( traceback
. extract_stack ())
358 tb
= '' . join ( tb_data
)
360 if not self
. params
. get ( 'ignoreerrors' , False ):
361 if sys
. exc_info ()[ 0 ] and hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
362 exc_info
= sys
. exc_info ()[ 1 ]. exc_info
364 exc_info
= sys
. exc_info ()
365 raise DownloadError ( message
, exc_info
)
366 self
._ download
_ retcode
= 1
368 def report_warning ( self
, message
):
370 Print the message to stderr, it will be prefixed with 'WARNING:'
371 If stderr is a tty file the 'WARNING:' will be colored
373 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
374 _msg_header
= ' \033 [0;33mWARNING: \033 [0m'
376 _msg_header
= 'WARNING:'
377 warning_message
= ' %s %s ' % ( _msg_header
, message
)
378 self
. to_stderr ( warning_message
)
380 def report_error ( self
, message
, tb
= None ):
382 Do the same as trouble, but prefixes the message with 'ERROR:', colored
383 in red if stderr is a tty file.
385 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
386 _msg_header
= ' \033 [0;31mERROR: \033 [0m'
388 _msg_header
= 'ERROR:'
389 error_message
= ' %s %s ' % ( _msg_header
, message
)
390 self
. trouble ( error_message
, tb
)
392 def report_file_already_downloaded ( self
, file_name
):
393 """Report file has already been fully downloaded."""
395 self
. to_screen ( '[download] %s has already been downloaded' % file_name
)
396 except UnicodeEncodeError :
397 self
. to_screen ( '[download] The file has already been downloaded' )
399 def prepare_filename ( self
, info_dict
):
400 """Generate the output filename."""
402 template_dict
= dict ( info_dict
)
404 template_dict
[ 'epoch' ] = int ( time
. time ())
405 autonumber_size
= self
. params
. get ( 'autonumber_size' )
406 if autonumber_size
is None :
408 autonumber_templ
= ' %0 ' + str ( autonumber_size
) + 'd'
409 template_dict
[ 'autonumber' ] = autonumber_templ
% self
._ num
_ downloads
410 if template_dict
. get ( 'playlist_index' ) is not None :
411 template_dict
[ 'playlist_index' ] = ' %0 5d' % template_dict
[ 'playlist_index' ]
413 sanitize
= lambda k
, v
: sanitize_filename (
415 restricted
= self
. params
. get ( 'restrictfilenames' ),
417 template_dict
= dict (( k
, sanitize ( k
, v
))
418 for k
, v
in template_dict
. items ()
420 template_dict
= collections
. defaultdict ( lambda : 'NA' , template_dict
)
422 tmpl
= os
. path
. expanduser ( self
. params
[ 'outtmpl' ])
423 filename
= tmpl
% template_dict
425 except ValueError as err
:
426 self
. report_error ( 'Error in output template: ' + str ( err
) + ' (encoding: ' + repr ( preferredencoding ()) + ')' )
429 def _match_entry ( self
, info_dict
):
430 """ Returns None iff the file should be downloaded """
432 video_title
= info_dict
. get ( 'title' , info_dict
. get ( 'id' , 'video' ))
433 if 'title' in info_dict
:
434 # This can happen when we're just evaluating the playlist
435 title
= info_dict
[ 'title' ]
436 matchtitle
= self
. params
. get ( 'matchtitle' , False )
438 if not re
. search ( matchtitle
, title
, re
. IGNORECASE
):
439 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
440 rejecttitle
= self
. params
. get ( 'rejecttitle' , False )
442 if re
. search ( rejecttitle
, title
, re
. IGNORECASE
):
443 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
444 date
= info_dict
. get ( 'upload_date' , None )
446 dateRange
= self
. params
. get ( 'daterange' , DateRange ())
447 if date
not in dateRange
:
448 return ' %s upload date is not in range %s ' % ( date_from_str ( date
). isoformat (), dateRange
)
449 view_count
= info_dict
. get ( 'view_count' , None )
450 if view_count
is not None :
451 min_views
= self
. params
. get ( 'min_views' )
452 if min_views
is not None and view_count
< min_views
:
453 return 'Skipping %s , because it has not reached minimum view count ( %d / %d )' % ( video_title
, view_count
, min_views
)
454 max_views
= self
. params
. get ( 'max_views' )
455 if max_views
is not None and view_count
> max_views
:
456 return 'Skipping %s , because it has exceeded the maximum view count ( %d / %d )' % ( video_title
, view_count
, max_views
)
457 age_limit
= self
. params
. get ( 'age_limit' )
458 if age_limit
is not None :
459 if age_limit
< info_dict
. get ( 'age_limit' , 0 ):
460 return 'Skipping "' + title
+ '" because it is age restricted'
461 if self
. in_download_archive ( info_dict
):
462 return ' %s has already been recorded in archive' % video_title
466 def add_extra_info ( info_dict
, extra_info
):
467 '''Set the keys from extra_info in info dict if they are missing'''
468 for key
, value
in extra_info
. items ():
469 info_dict
. setdefault ( key
, value
)
471 def extract_info ( self
, url
, download
= True , ie_key
= None , extra_info
={},
474 Returns a list with a dictionary for each video we find.
475 If 'download', also downloads the videos.
476 extra_info is a dict containing the extra values to add to each result
480 ies
= [ self
. get_info_extractor ( ie_key
)]
485 if not ie
. suitable ( url
):
489 self
. report_warning ( 'The program functionality for this site has been marked as broken, '
490 'and will probably not work.' )
493 ie_result
= ie
. extract ( url
)
494 if ie_result
is None : # Finished already (backwards compatibility; listformats and friends should be moved here)
496 if isinstance ( ie_result
, list ):
497 # Backwards compatibility: old IE result format
499 '_type' : 'compat_list' ,
500 'entries' : ie_result
,
502 self
. add_extra_info ( ie_result
,
504 'extractor' : ie
. IE_NAME
,
506 'webpage_url_basename' : url_basename ( url
),
507 'extractor_key' : ie
. ie_key (),
510 return self
. process_ie_result ( ie_result
, download
, extra_info
)
513 except ExtractorError
as de
: # An error we somewhat expected
514 self
. report_error ( compat_str ( de
), de
. format_traceback ())
516 except MaxDownloadsReached
:
518 except Exception as e
:
519 if self
. params
. get ( 'ignoreerrors' , False ):
520 self
. report_error ( compat_str ( e
), tb
= compat_str ( traceback
. format_exc ()))
525 self
. report_error ( 'no suitable InfoExtractor: %s ' % url
)
527 def process_ie_result ( self
, ie_result
, download
= True , extra_info
={}):
529 Take the result of the ie(may be modified) and resolve all unresolved
530 references (URLs, playlist items).
532 It will also download the videos if 'download'.
533 Returns the resolved ie_result.
536 result_type
= ie_result
. get ( '_type' , 'video' ) # If not given we suppose it's a video, support the default old system
537 if result_type
== 'video' :
538 self
. add_extra_info ( ie_result
, extra_info
)
539 return self
. process_video_result ( ie_result
, download
= download
)
540 elif result_type
== 'url' :
541 # We have to add extra_info to the results because it may be
542 # contained in a playlist
543 return self
. extract_info ( ie_result
[ 'url' ],
545 ie_key
= ie_result
. get ( 'ie_key' ),
546 extra_info
= extra_info
)
547 elif result_type
== 'url_transparent' :
548 # Use the information from the embedding page
549 info
= self
. extract_info (
550 ie_result
[ 'url' ], ie_key
= ie_result
. get ( 'ie_key' ),
551 extra_info
= extra_info
, download
= False , process
= False )
553 def make_result ( embedded_info
):
554 new_result
= ie_result
. copy ()
555 for f
in ( '_type' , 'url' , 'ext' , 'player_url' , 'formats' ,
556 'entries' , 'ie_key' , 'duration' ,
557 'subtitles' , 'annotations' , 'format' ,
558 'thumbnail' , 'thumbnails' ):
561 if f
in embedded_info
:
562 new_result
[ f
] = embedded_info
[ f
]
564 new_result
= make_result ( info
)
566 assert new_result
. get ( '_type' ) != 'url_transparent'
567 if new_result
. get ( '_type' ) == 'compat_list' :
568 new_result
[ 'entries' ] = [
569 make_result ( e
) for e
in new_result
[ 'entries' ]]
571 return self
. process_ie_result (
572 new_result
, download
= download
, extra_info
= extra_info
)
573 elif result_type
== 'playlist' :
574 # We process each entry in the playlist
575 playlist
= ie_result
. get ( 'title' , None ) or ie_result
. get ( 'id' , None )
576 self
. to_screen ( '[download] Downloading playlist: %s ' % playlist
)
578 playlist_results
= []
580 playliststart
= self
. params
. get ( 'playliststart' , 1 ) - 1
581 playlistend
= self
. params
. get ( 'playlistend' , None )
582 # For backwards compatibility, interpret -1 as whole list
583 if playlistend
== - 1 :
586 if isinstance ( ie_result
[ 'entries' ], list ):
587 n_all_entries
= len ( ie_result
[ 'entries' ])
588 entries
= ie_result
[ 'entries' ][ playliststart
: playlistend
]
589 n_entries
= len ( entries
)
591 "[ %s ] playlist %s : Collected %d video ids (downloading %d of them)" %
592 ( ie_result
[ 'extractor' ], playlist
, n_all_entries
, n_entries
))
594 assert isinstance ( ie_result
[ 'entries' ], PagedList
)
595 entries
= ie_result
[ 'entries' ]. getslice (
596 playliststart
, playlistend
)
597 n_entries
= len ( entries
)
599 "[ %s ] playlist %s : Downloading %d videos" %
600 ( ie_result
[ 'extractor' ], playlist
, n_entries
))
602 for i
, entry
in enumerate ( entries
, 1 ):
603 self
. to_screen ( '[download] Downloading video # %s of %s ' % ( i
, n_entries
))
605 'playlist' : playlist
,
606 'playlist_index' : i
+ playliststart
,
607 'extractor' : ie_result
[ 'extractor' ],
608 'webpage_url' : ie_result
[ 'webpage_url' ],
609 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
610 'extractor_key' : ie_result
[ 'extractor_key' ],
613 reason
= self
._ match
_ entry
( entry
)
614 if reason
is not None :
615 self
. to_screen ( '[download] ' + reason
)
618 entry_result
= self
. process_ie_result ( entry
,
621 playlist_results
. append ( entry_result
)
622 ie_result
[ 'entries' ] = playlist_results
624 elif result_type
== 'compat_list' :
626 self
. add_extra_info ( r
,
628 'extractor' : ie_result
[ 'extractor' ],
629 'webpage_url' : ie_result
[ 'webpage_url' ],
630 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
631 'extractor_key' : ie_result
[ 'extractor_key' ],
634 ie_result
[ 'entries' ] = [
635 self
. process_ie_result ( _fixup ( r
), download
, extra_info
)
636 for r
in ie_result
[ 'entries' ]
640 raise Exception ( 'Invalid result type: %s ' % result_type
)
642 def select_format ( self
, format_spec
, available_formats
):
643 if format_spec
== 'best' or format_spec
is None :
644 return available_formats
[- 1 ]
645 elif format_spec
== 'worst' :
646 return available_formats
[ 0 ]
647 elif format_spec
== 'bestaudio' :
649 f
for f
in available_formats
650 if f
. get ( 'vcodec' ) == 'none' ]
652 return audio_formats
[- 1 ]
653 elif format_spec
== 'worstaudio' :
655 f
for f
in available_formats
656 if f
. get ( 'vcodec' ) == 'none' ]
658 return audio_formats
[ 0 ]
660 extensions
= [ 'mp4' , 'flv' , 'webm' , '3gp' ]
661 if format_spec
in extensions
:
662 filter_f
= lambda f
: f
[ 'ext' ] == format_spec
664 filter_f
= lambda f
: f
[ 'format_id' ] == format_spec
665 matches
= list ( filter ( filter_f
, available_formats
))
670 def process_video_result ( self
, info_dict
, download
= True ):
671 assert info_dict
. get ( '_type' , 'video' ) == 'video'
673 if 'playlist' not in info_dict
:
674 # It isn't part of a playlist
675 info_dict
[ 'playlist' ] = None
676 info_dict
[ 'playlist_index' ] = None
678 # This extractors handle format selection themselves
679 if info_dict
[ 'extractor' ] in [ 'Youku' ]:
681 self
. process_info ( info_dict
)
684 # We now pick which formats have to be downloaded
685 if info_dict
. get ( 'formats' ) is None :
686 # There's only one format available
687 formats
= [ info_dict
]
689 formats
= info_dict
[ 'formats' ]
691 # We check that all the formats have the format and format_id fields
692 for ( i
, format
) in enumerate ( formats
):
693 if format
. get ( 'format_id' ) is None :
694 format
[ 'format_id' ] = compat_str ( i
)
695 if format
. get ( 'format' ) is None :
696 format
[ 'format' ] = ' {id} - {res}{note} ' . format (
697 id = format
[ 'format_id' ],
698 res
= self
. format_resolution ( format
),
699 note
= ' ( {0} )' . format ( format
[ 'format_note' ]) if format
. get ( 'format_note' ) is not None else '' ,
701 # Automatically determine file extension if missing
702 if 'ext' not in format
:
703 format
[ 'ext' ] = determine_ext ( format
[ 'url' ])
705 format_limit
= self
. params
. get ( 'format_limit' , None )
707 formats
= list ( takewhile_inclusive (
708 lambda f
: f
[ 'format_id' ] != format_limit
, formats
711 # TODO Central sorting goes here
713 if formats
[ 0 ] is not info_dict
:
714 # only set the 'formats' fields if the original info_dict list them
715 # otherwise we end up with a circular reference, the first (and unique)
716 # element in the 'formats' field in info_dict is info_dict itself,
717 # wich can't be exported to json
718 info_dict
[ 'formats' ] = formats
719 if self
. params
. get ( 'listformats' , None ):
720 self
. list_formats ( info_dict
)
723 req_format
= self
. params
. get ( 'format' )
724 if req_format
is None :
726 formats_to_download
= []
727 # The -1 is for supporting YoutubeIE
728 if req_format
in ( '-1' , 'all' ):
729 formats_to_download
= formats
731 # We can accept formats requested in the format: 34/5/best, we pick
732 # the first that is available, starting from left
733 req_formats
= req_format
. split ( '/' )
734 for rf
in req_formats
:
735 if re
. match ( r
'.+?\+.+?' , rf
) is not None :
736 # Two formats have been requested like '137+139'
737 format_1
, format_2
= rf
. split ( '+' )
738 formats_info
= ( self
. select_format ( format_1
, formats
),
739 self
. select_format ( format_2
, formats
))
740 if all ( formats_info
):
742 'requested_formats' : formats_info
,
744 'ext' : formats_info
[ 0 ][ 'ext' ],
747 selected_format
= None
749 selected_format
= self
. select_format ( rf
, formats
)
750 if selected_format
is not None :
751 formats_to_download
= [ selected_format
]
753 if not formats_to_download
:
754 raise ExtractorError ( 'requested format not available' ,
758 if len ( formats_to_download
) > 1 :
759 self
. to_screen ( '[info] %s : downloading video in %s formats' % ( info_dict
[ 'id' ], len ( formats_to_download
)))
760 for format
in formats_to_download
:
761 new_info
= dict ( info_dict
)
762 new_info
. update ( format
)
763 self
. process_info ( new_info
)
764 # We update the info dict with the best quality format (backwards compatibility)
765 info_dict
. update ( formats_to_download
[- 1 ])
768 def process_info ( self
, info_dict
):
769 """Process a single resolved IE result."""
771 assert info_dict
. get ( '_type' , 'video' ) == 'video'
773 max_downloads
= self
. params
. get ( 'max_downloads' )
774 if max_downloads
is not None :
775 if self
._ num
_ downloads
>= int ( max_downloads
):
776 raise MaxDownloadsReached ()
778 info_dict
[ 'fulltitle' ] = info_dict
[ 'title' ]
779 if len ( info_dict
[ 'title' ]) > 200 :
780 info_dict
[ 'title' ] = info_dict
[ 'title' ][: 197 ] + '...'
782 # Keep for backwards compatibility
783 info_dict
[ 'stitle' ] = info_dict
[ 'title' ]
785 if not 'format' in info_dict
:
786 info_dict
[ 'format' ] = info_dict
[ 'ext' ]
788 reason
= self
._ match
_ entry
( info_dict
)
789 if reason
is not None :
790 self
. to_screen ( '[download] ' + reason
)
793 self
._ num
_ downloads
+= 1
795 filename
= self
. prepare_filename ( info_dict
)
798 if self
. params
. get ( 'forcetitle' , False ):
799 self
. to_stdout ( info_dict
[ 'fulltitle' ])
800 if self
. params
. get ( 'forceid' , False ):
801 self
. to_stdout ( info_dict
[ 'id' ])
802 if self
. params
. get ( 'forceurl' , False ):
803 # For RTMP URLs, also include the playpath
804 self
. to_stdout ( info_dict
[ 'url' ] + info_dict
. get ( 'play_path' , '' ))
805 if self
. params
. get ( 'forcethumbnail' , False ) and info_dict
. get ( 'thumbnail' ) is not None :
806 self
. to_stdout ( info_dict
[ 'thumbnail' ])
807 if self
. params
. get ( 'forcedescription' , False ) and info_dict
. get ( 'description' ) is not None :
808 self
. to_stdout ( info_dict
[ 'description' ])
809 if self
. params
. get ( 'forcefilename' , False ) and filename
is not None :
810 self
. to_stdout ( filename
)
811 if self
. params
. get ( 'forceduration' , False ) and info_dict
. get ( 'duration' ) is not None :
812 self
. to_stdout ( formatSeconds ( info_dict
[ 'duration' ]))
813 if self
. params
. get ( 'forceformat' , False ):
814 self
. to_stdout ( info_dict
[ 'format' ])
815 if self
. params
. get ( 'forcejson' , False ):
816 info_dict
[ '_filename' ] = filename
817 self
. to_stdout ( json
. dumps ( info_dict
))
819 # Do nothing else if in simulate mode
820 if self
. params
. get ( 'simulate' , False ):
827 dn
= os
. path
. dirname ( encodeFilename ( filename
))
828 if dn
!= '' and not os
. path
. exists ( dn
):
830 except ( OSError , IOError ) as err
:
831 self
. report_error ( 'unable to create directory ' + compat_str ( err
))
834 if self
. params
. get ( 'writedescription' , False ):
835 descfn
= filename
+ '.description'
836 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( descfn
)):
837 self
. to_screen ( '[info] Video description is already present' )
840 self
. to_screen ( '[info] Writing video description to: ' + descfn
)
841 with io
. open ( encodeFilename ( descfn
), 'w' , encoding
= 'utf-8' ) as descfile
:
842 descfile
. write ( info_dict
[ 'description' ])
843 except ( KeyError , TypeError ):
844 self
. report_warning ( 'There \' s no description to write.' )
845 except ( OSError , IOError ):
846 self
. report_error ( 'Cannot write description file ' + descfn
)
849 if self
. params
. get ( 'writeannotations' , False ):
850 annofn
= filename
+ '.annotations.xml'
851 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( annofn
)):
852 self
. to_screen ( '[info] Video annotations are already present' )
855 self
. to_screen ( '[info] Writing video annotations to: ' + annofn
)
856 with io
. open ( encodeFilename ( annofn
), 'w' , encoding
= 'utf-8' ) as annofile
:
857 annofile
. write ( info_dict
[ 'annotations' ])
858 except ( KeyError , TypeError ):
859 self
. report_warning ( 'There are no annotations to write.' )
860 except ( OSError , IOError ):
861 self
. report_error ( 'Cannot write annotations file: ' + annofn
)
864 subtitles_are_requested
= any ([ self
. params
. get ( 'writesubtitles' , False ),
865 self
. params
. get ( 'writeautomaticsub' )])
867 if subtitles_are_requested
and 'subtitles' in info_dict
and info_dict
[ 'subtitles' ]:
868 # subtitles download errors are already managed as troubles in relevant IE
869 # that way it will silently go on when used with unsupporting IE
870 subtitles
= info_dict
[ 'subtitles' ]
871 sub_format
= self
. params
. get ( 'subtitlesformat' , 'srt' )
872 for sub_lang
in subtitles
. keys ():
873 sub
= subtitles
[ sub_lang
]
877 sub_filename
= subtitles_filename ( filename
, sub_lang
, sub_format
)
878 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( sub_filename
)):
879 self
. to_screen ( '[info] Video subtitle %s . %s is already_present' % ( sub_lang
, sub_format
))
881 self
. to_screen ( '[info] Writing video subtitles to: ' + sub_filename
)
882 with io
. open ( encodeFilename ( sub_filename
), 'w' , encoding
= 'utf-8' ) as subfile
:
884 except ( OSError , IOError ):
885 self
. report_error ( 'Cannot write subtitles file ' + descfn
)
888 if self
. params
. get ( 'writeinfojson' , False ):
889 infofn
= os
. path
. splitext ( filename
)[ 0 ] + '.info.json'
890 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( infofn
)):
891 self
. to_screen ( '[info] Video description metadata is already present' )
893 self
. to_screen ( '[info] Writing video description metadata as JSON to: ' + infofn
)
895 write_json_file ( info_dict
, encodeFilename ( infofn
))
896 except ( OSError , IOError ):
897 self
. report_error ( 'Cannot write metadata to JSON file ' + infofn
)
900 if self
. params
. get ( 'writethumbnail' , False ):
901 if info_dict
. get ( 'thumbnail' ) is not None :
902 thumb_format
= determine_ext ( info_dict
[ 'thumbnail' ], 'jpg' )
903 thumb_filename
= os
. path
. splitext ( filename
)[ 0 ] + '.' + thumb_format
904 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( thumb_filename
)):
905 self
. to_screen ( '[ %s ] %s : Thumbnail is already present' %
906 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
908 self
. to_screen ( '[ %s ] %s : Downloading thumbnail ...' %
909 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
911 uf
= compat_urllib_request
. urlopen ( info_dict
[ 'thumbnail' ])
912 with open ( thumb_filename
, 'wb' ) as thumbf
:
913 shutil
. copyfileobj ( uf
, thumbf
)
914 self
. to_screen ( '[ %s ] %s : Writing thumbnail to: %s ' %
915 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ], thumb_filename
))
916 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
917 self
. report_warning ( 'Unable to download thumbnail " %s ": %s ' %
918 ( info_dict
[ 'thumbnail' ], compat_str ( err
)))
920 if not self
. params
. get ( 'skip_download' , False ):
921 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( filename
)):
926 fd
= get_suitable_downloader ( info
)( self
, self
. params
)
927 for ph
in self
._ progress
_ hooks
:
928 fd
. add_progress_hook ( ph
)
929 return fd
. download ( name
, info
)
930 if info_dict
. get ( 'requested_formats' ) is not None :
933 merger
= FFmpegMergerPP ( self
)
934 if not merger
._ get
_ executable
():
936 self
. report_warning ( 'You have requested multiple '
937 'formats but ffmpeg or avconv are not installed.'
938 ' The formats won \' t be merged' )
940 postprocessors
= [ merger
]
941 for f
in info_dict
[ 'requested_formats' ]:
942 new_info
= dict ( info_dict
)
944 fname
= self
. prepare_filename ( new_info
)
945 fname
= prepend_extension ( fname
, 'f %s ' % f
[ 'format_id' ])
946 downloaded
. append ( fname
)
947 partial_success
= dl ( fname
, new_info
)
948 success
= success
and partial_success
949 info_dict
[ '__postprocessors' ] = postprocessors
950 info_dict
[ '__files_to_merge' ] = downloaded
953 success
= dl ( filename
, info_dict
)
954 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
955 self
. report_error ( 'unable to download video data: %s ' % str ( err
))
957 except ( OSError , IOError ) as err
:
958 raise UnavailableVideoError ( err
)
959 except ( ContentTooShortError
, ) as err
:
960 self
. report_error ( 'content too short (expected %s bytes and served %s )' % ( err
. expected
, err
. downloaded
))
965 self
. post_process ( filename
, info_dict
)
966 except ( PostProcessingError
) as err
:
967 self
. report_error ( 'postprocessing: %s ' % str ( err
))
970 self
. record_download_archive ( info_dict
)
972 def download ( self
, url_list
):
973 """Download a given list of URLs."""
974 if ( len ( url_list
) > 1 and
975 '%' not in self
. params
[ 'outtmpl' ]
976 and self
. params
. get ( 'max_downloads' ) != 1 ):
977 raise SameFileError ( self
. params
[ 'outtmpl' ])
981 #It also downloads the videos
982 self
. extract_info ( url
)
983 except UnavailableVideoError
:
984 self
. report_error ( 'unable to download video' )
985 except MaxDownloadsReached
:
986 self
. to_screen ( '[info] Maximum number of downloaded files reached.' )
989 return self
._ download
_ retcode
991 def download_with_info_file ( self
, info_filename
):
992 with io
. open ( info_filename
, 'r' , encoding
= 'utf-8' ) as f
:
995 self
. process_ie_result ( info
, download
= True )
996 except DownloadError
:
997 webpage_url
= info
. get ( 'webpage_url' )
998 if webpage_url
is not None :
999 self
. report_warning ( 'The info failed to download, trying with " %s "' % webpage_url
)
1000 return self
. download ([ webpage_url
])
1003 return self
._ download
_ retcode
1005 def post_process ( self
, filename
, ie_info
):
1006 """Run all the postprocessors on the given file."""
1007 info
= dict ( ie_info
)
1008 info
[ 'filepath' ] = filename
1011 if ie_info
. get ( '__postprocessors' ) is not None :
1012 pps_chain
. extend ( ie_info
[ '__postprocessors' ])
1013 pps_chain
. extend ( self
._ pps
)
1014 for pp
in pps_chain
:
1016 keep_video_wish
, new_info
= pp
. run ( info
)
1017 if keep_video_wish
is not None :
1019 keep_video
= keep_video_wish
1020 elif keep_video
is None :
1021 # No clear decision yet, let IE decide
1022 keep_video
= keep_video_wish
1023 except PostProcessingError
as e
:
1024 self
. report_error ( e
. msg
)
1025 if keep_video
is False and not self
. params
. get ( 'keepvideo' , False ):
1027 self
. to_screen ( 'Deleting original file %s (pass -k to keep)' % filename
)
1028 os
. remove ( encodeFilename ( filename
))
1029 except ( IOError , OSError ):
1030 self
. report_warning ( 'Unable to remove downloaded video file' )
1032 def _make_archive_id ( self
, info_dict
):
1033 # Future-proof against any change in case
1034 # and backwards compatibility with prior versions
1035 extractor
= info_dict
. get ( 'extractor_key' )
1036 if extractor
is None :
1037 if 'id' in info_dict
:
1038 extractor
= info_dict
. get ( 'ie_key' ) # key in a playlist
1039 if extractor
is None :
1040 return None # Incomplete video information
1041 return extractor
. lower () + ' ' + info_dict
[ 'id' ]
1043 def in_download_archive ( self
, info_dict
):
1044 fn
= self
. params
. get ( 'download_archive' )
1048 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1050 return False # Incomplete video information
1053 with locked_file ( fn
, 'r' , encoding
= 'utf-8' ) as archive_file
:
1054 for line
in archive_file
:
1055 if line
. strip () == vid_id
:
1057 except IOError as ioe
:
1058 if ioe
. errno
!= errno
. ENOENT
:
1062 def record_download_archive ( self
, info_dict
):
1063 fn
= self
. params
. get ( 'download_archive' )
1066 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1068 with locked_file ( fn
, 'a' , encoding
= 'utf-8' ) as archive_file
:
1069 archive_file
. write ( vid_id
+ ' \n ' )
1072 def format_resolution ( format
, default
= 'unknown' ):
1073 if format
. get ( 'vcodec' ) == 'none' :
1075 if format
. get ( 'resolution' ) is not None :
1076 return format
[ 'resolution' ]
1077 if format
. get ( 'height' ) is not None :
1078 if format
. get ( 'width' ) is not None :
1079 res
= ' %sx%s ' % ( format
[ 'width' ], format
[ 'height' ])
1081 res
= ' %s p' % format
[ 'height' ]
1082 elif format
. get ( 'width' ) is not None :
1083 res
= '?x %d ' % format
[ 'width' ]
1088 def list_formats ( self
, info_dict
):
1089 def format_note ( fdict
):
1091 if fdict
. get ( 'ext' ) in [ 'f4f' , 'f4m' ]:
1092 res
+= '(unsupported) '
1093 if fdict
. get ( 'format_note' ) is not None :
1094 res
+= fdict
[ 'format_note' ] + ' '
1095 if fdict
. get ( 'tbr' ) is not None :
1096 res
+= '%4dk ' % fdict
[ 'tbr' ]
1097 if fdict
. get ( 'container' ) is not None :
1100 res
+= ' %s container' % fdict
[ 'container' ]
1101 if ( fdict
. get ( 'vcodec' ) is not None and
1102 fdict
. get ( 'vcodec' ) != 'none' ):
1105 res
+= fdict
[ 'vcodec' ]
1106 if fdict
. get ( 'vbr' ) is not None :
1108 elif fdict
. get ( 'vbr' ) is not None and fdict
. get ( 'abr' ) is not None :
1110 if fdict
. get ( 'vbr' ) is not None :
1111 res
+= '%4dk' % fdict
[ 'vbr' ]
1112 if fdict
. get ( 'acodec' ) is not None :
1115 if fdict
[ 'acodec' ] == 'none' :
1118 res
+= ' %- 5s' % fdict
[ 'acodec' ]
1119 elif fdict
. get ( 'abr' ) is not None :
1123 if fdict
. get ( 'abr' ) is not None :
1124 res
+= '@%3dk' % fdict
[ 'abr' ]
1125 if fdict
. get ( 'asr' ) is not None :
1126 res
+= ' (%5dHz)' % fdict
[ 'asr' ]
1127 if fdict
. get ( 'filesize' ) is not None :
1130 res
+= format_bytes ( fdict
[ 'filesize' ])
1133 def line ( format
, idlen
= 20 ):
1134 return (( ' %- ' + compat_str ( idlen
+ 1 ) + 's %- 10s %- 12s %s ' ) % (
1135 format
[ 'format_id' ],
1137 self
. format_resolution ( format
),
1138 format_note ( format
),
1141 formats
= info_dict
. get ( 'formats' , [ info_dict
])
1142 idlen
= max ( len ( 'format code' ),
1143 max ( len ( f
[ 'format_id' ]) for f
in formats
))
1144 formats_s
= [ line ( f
, idlen
) for f
in formats
]
1145 if len ( formats
) > 1 :
1146 formats_s
[ 0 ] += ( ' ' if format_note ( formats
[ 0 ]) else '' ) + '(worst)'
1147 formats_s
[- 1 ] += ( ' ' if format_note ( formats
[- 1 ]) else '' ) + '(best)'
1149 header_line
= line ({
1150 'format_id' : 'format code' , 'ext' : 'extension' ,
1151 'resolution' : 'resolution' , 'format_note' : 'note' }, idlen
= idlen
)
1152 self
. to_screen ( '[info] Available formats for %s : \n %s \n %s ' %
1153 ( info_dict
[ 'id' ], header_line
, ' \n ' . join ( formats_s
)))
1155 def urlopen ( self
, req
):
1156 """ Start an HTTP download """
1157 return self
._ opener
. open ( req
)
1159 def print_debug_header ( self
):
1160 if not self
. params
. get ( 'verbose' ):
1162 write_string ( '[debug] youtube-dl version ' + __version__
+ ' \n ' )
1164 sp
= subprocess
. Popen (
1165 [ 'git' , 'rev-parse' , '--short' , 'HEAD' ],
1166 stdout
= subprocess
. PIPE
, stderr
= subprocess
. PIPE
,
1167 cwd
= os
. path
. dirname ( os
. path
. abspath ( __file__
)))
1168 out
, err
= sp
. communicate ()
1169 out
= out
. decode (). strip ()
1170 if re
. match ( '[0-9a-f]+' , out
):
1171 write_string ( '[debug] Git HEAD: ' + out
+ ' \n ' )
1177 write_string ( '[debug] Python version %s - %s ' %
1178 ( platform
. python_version (), platform_name ()) + ' \n ' )
1181 for handler
in self
._ opener
. handlers
:
1182 if hasattr ( handler
, 'proxies' ):
1183 proxy_map
. update ( handler
. proxies
)
1184 write_string ( '[debug] Proxy map: ' + compat_str ( proxy_map
) + ' \n ' )
1186 def _setup_opener ( self
):
1187 timeout_val
= self
. params
. get ( 'socket_timeout' )
1188 timeout
= 600 if timeout_val
is None else float ( timeout_val
)
1190 opts_cookiefile
= self
. params
. get ( 'cookiefile' )
1191 opts_proxy
= self
. params
. get ( 'proxy' )
1193 if opts_cookiefile
is None :
1194 self
. cookiejar
= compat_cookiejar
. CookieJar ()
1196 self
. cookiejar
= compat_cookiejar
. MozillaCookieJar (
1198 if os
. access ( opts_cookiefile
, os
. R_OK
):
1199 self
. cookiejar
. load ()
1201 cookie_processor
= compat_urllib_request
. HTTPCookieProcessor (
1203 if opts_proxy
is not None :
1204 if opts_proxy
== '' :
1207 proxies
= { 'http' : opts_proxy
, 'https' : opts_proxy
}
1209 proxies
= compat_urllib_request
. getproxies ()
1210 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1211 if 'http' in proxies
and 'https' not in proxies
:
1212 proxies
[ 'https' ] = proxies
[ 'http' ]
1213 proxy_handler
= compat_urllib_request
. ProxyHandler ( proxies
)
1215 debuglevel
= 1 if self
. params
. get ( 'debug_printtraffic' ) else 0
1216 https_handler
= make_HTTPS_handler (
1217 self
. params
. get ( 'nocheckcertificate' , False ), debuglevel
= debuglevel
)
1218 ydlh
= YoutubeDLHandler ( debuglevel
= debuglevel
)
1219 opener
= compat_urllib_request
. build_opener (
1220 https_handler
, proxy_handler
, cookie_processor
, ydlh
)
1221 # Delete the default user-agent header, which would otherwise apply in
1222 # cases where our custom HTTP handler doesn't come into play
1223 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1224 opener
. addheaders
= []
1225 self
._ opener
= opener
1227 # TODO remove this global modification
1228 compat_urllib_request
. install_opener ( opener
)
1229 socket
. setdefaulttimeout ( timeout
)