]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
28 compat_urllib_request
,
49 UnavailableVideoError
,
55 from . extractor
import get_info_extractor
, gen_extractors
56 from . FileDownloader
import FileDownloader
57 from . version
import __version__
60 class YoutubeDL ( object ):
63 YoutubeDL objects are the ones responsible of downloading the
64 actual video file and writing it to disk if the user has requested
65 it, among some other tasks. In most cases there should be one per
66 program. As, given a video URL, the downloader doesn't know how to
67 extract all the needed information, task that InfoExtractors do, it
68 has to pass the URL to one of them.
70 For this, YoutubeDL objects have a method that allows
71 InfoExtractors to be registered in a given order. When it is passed
72 a URL, the YoutubeDL object handles it to the first InfoExtractor it
73 finds that reports being able to handle it. The InfoExtractor extracts
74 all the information about the video or videos the URL refers to, and
75 YoutubeDL process the extracted information, possibly using a File
76 Downloader to download the video.
78 YoutubeDL objects accept a lot of parameters. In order not to saturate
79 the object constructor with arguments, it receives a dictionary of
80 options instead. These options are available through the params
81 attribute for the InfoExtractors to use. The YoutubeDL also
82 registers itself as the downloader in charge for the InfoExtractors
83 that are added to it, so this is a "mutual registration".
87 username: Username for authentication purposes.
88 password: Password for authentication purposes.
89 videopassword: Password for acces a video.
90 usenetrc: Use netrc for authentication instead.
91 verbose: Print additional info to stdout.
92 quiet: Do not print messages to stdout.
93 forceurl: Force printing final URL.
94 forcetitle: Force printing title.
95 forceid: Force printing ID.
96 forcethumbnail: Force printing thumbnail URL.
97 forcedescription: Force printing description.
98 forcefilename: Force printing final filename.
99 forceduration: Force printing duration.
100 forcejson: Force printing info_dict as JSON.
101 simulate: Do not download the video files.
102 format: Video format code.
103 format_limit: Highest quality format to try.
104 outtmpl: Template for output names.
105 restrictfilenames: Do not allow "&" and spaces in file names
106 ignoreerrors: Do not stop on download errors.
107 nooverwrites: Prevent overwriting files.
108 playliststart: Playlist item to start at.
109 playlistend: Playlist item to end at.
110 matchtitle: Download only matching titles.
111 rejecttitle: Reject downloads for matching titles.
112 logger: Log messages to a logging.Logger instance.
113 logtostderr: Log messages to stderr instead of stdout.
114 writedescription: Write the video description to a .description file
115 writeinfojson: Write the video description to a .info.json file
116 writeannotations: Write the video annotations to a .annotations.xml file
117 writethumbnail: Write the thumbnail image to a file
118 writesubtitles: Write the video subtitles to a file
119 writeautomaticsub: Write the automatic subtitles to a file
120 allsubtitles: Downloads all the subtitles of the video
121 (requires writesubtitles or writeautomaticsub)
122 listsubtitles: Lists all available subtitles for the video
123 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
124 subtitleslangs: List of languages of the subtitles to download
125 keepvideo: Keep the video file after post-processing
126 daterange: A DateRange object, download only if the upload_date is in the range.
127 skip_download: Skip the actual download of the video file
128 cachedir: Location of the cache files in the filesystem.
129 None to disable filesystem cache.
130 noplaylist: Download single video instead of a playlist if in doubt.
131 age_limit: An integer representing the user's age in years.
132 Unsuitable videos for the given age are skipped.
133 min_views: An integer representing the minimum view count the video
134 must have in order to not be skipped.
135 Videos without view count information are always
136 downloaded. None for no limit.
137 max_views: An integer representing the maximum view count.
138 Videos that are more popular than that are not
140 Videos without view count information are always
141 downloaded. None for no limit.
142 download_archive: File name of a file where all downloads are recorded.
143 Videos already present in the file are not downloaded
145 cookiefile: File name where cookies should be read from and dumped to.
146 nocheckcertificate:Do not verify SSL certificates
147 proxy: URL of the proxy server to use
148 socket_timeout: Time to wait for unresponsive hosts, in seconds
149 bidi_workaround: Work around buggy terminals without bidirectional text
150 support, using fridibi
152 The following parameters are not used by YoutubeDL itself, they are used by
154 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
155 noresizebuffer, retries, continuedl, noprogress, consoletitle
161 _download_retcode
= None
162 _num_downloads
= None
165 def __init__ ( self
, params
= None ):
166 """Create a FileDownloader object with the given options."""
168 self
._ ies
_ instances
= {}
170 self
._ progress
_ hooks
= []
171 self
._ download
_ retcode
= 0
172 self
._ num
_ downloads
= 0
173 self
._ screen
_ file
= [ sys
. stdout
, sys
. stderr
][ params
. get ( 'logtostderr' , False )]
174 self
._ err
_ file
= sys
. stderr
175 self
. params
= {} if params
is None else params
177 if params
. get ( 'bidi_workaround' , False ):
180 master
, slave
= pty
. openpty ()
181 width
= get_term_width ()
185 width_args
= [ '-w' , str ( width
)]
186 self
._ fribidi
= subprocess
. Popen (
187 [ 'fribidi' , '-c' , 'UTF-8' ] + width_args
,
188 stdin
= subprocess
. PIPE
,
190 stderr
= self
._ err
_ file
)
191 self
._ fribidi
_ channel
= os
. fdopen ( master
, 'rb' )
192 except OSError as ose
:
194 self
. report_warning ( u
'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.' )
198 if ( sys
. version_info
>= ( 3 ,) and sys
. platform
!= 'win32' and
199 sys
. getfilesystemencoding () in [ 'ascii' , 'ANSI_X3.4-1968' ]
200 and not params
[ 'restrictfilenames' ]):
201 # On Python 3, the Unicode filesystem API will throw errors (#1474)
203 u
'Assuming --restrict-filenames since file system encoding '
204 u
'cannot encode all charactes. '
205 u
'Set the LC_ALL environment variable to fix this.' )
206 self
. params
[ 'restrictfilenames' ] = True
208 self
. fd
= FileDownloader ( self
, self
. params
)
210 if ' %(stitle)s ' in self
. params
. get ( 'outtmpl' , '' ):
211 self
. report_warning ( u
' %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.' )
215 def add_info_extractor ( self
, ie
):
216 """Add an InfoExtractor object to the end of the list."""
218 self
._ ies
_ instances
[ ie
. ie_key ()] = ie
219 ie
. set_downloader ( self
)
221 def get_info_extractor ( self
, ie_key
):
223 Get an instance of an IE with name ie_key, it will try to get one from
224 the _ies list, if there's no instance it will create a new one and add
225 it to the extractor list.
227 ie
= self
._ ies
_ instances
. get ( ie_key
)
229 ie
= get_info_extractor ( ie_key
)()
230 self
. add_info_extractor ( ie
)
233 def add_default_info_extractors ( self
):
235 Add the InfoExtractors returned by gen_extractors to the end of the list
237 for ie
in gen_extractors ():
238 self
. add_info_extractor ( ie
)
240 def add_post_processor ( self
, pp
):
241 """Add a PostProcessor object to the end of the chain."""
243 pp
. set_downloader ( self
)
245 def _bidi_workaround ( self
, message
):
246 if not hasattr ( self
, '_fribidi_channel' ):
249 assert type ( message
) == type ( u
'' )
250 line_count
= message
. count ( u
' \n ' ) + 1
251 self
._ fribidi
. stdin
. write (( message
+ u
' \n ' ). encode ( 'utf-8' ))
252 self
._ fribidi
. stdin
. flush ()
253 res
= u
'' . join ( self
._ fribidi
_ channel
. readline (). decode ( 'utf-8' )
254 for _
in range ( line_count
))
255 return res
[:- len ( u
' \n ' )]
257 def to_screen ( self
, message
, skip_eol
= False ):
258 """Print message to stdout if not in quiet mode."""
259 return self
. to_stdout ( message
, skip_eol
, check_quiet
= True )
261 def to_stdout ( self
, message
, skip_eol
= False , check_quiet
= False ):
262 """Print message to stdout if not in quiet mode."""
263 if self
. params
. get ( 'logger' ):
264 self
. params
[ 'logger' ]. debug ( message
)
265 elif not check_quiet
or not self
. params
. get ( 'quiet' , False ):
266 message
= self
._ bidi
_ workaround
( message
)
267 terminator
= [ u
' \n ' , u
'' ][ skip_eol
]
268 output
= message
+ terminator
270 write_string ( output
, self
._ screen
_ file
)
272 def to_stderr ( self
, message
):
273 """Print message to stderr."""
274 assert type ( message
) == type ( u
'' )
275 if self
. params
. get ( 'logger' ):
276 self
. params
[ 'logger' ]. error ( message
)
278 message
= self
._ bidi
_ workaround
( message
)
279 output
= message
+ u
' \n '
280 write_string ( output
, self
._ err
_ file
)
282 def to_console_title ( self
, message
):
283 if not self
. params
. get ( 'consoletitle' , False ):
285 if os
. name
== 'nt' and ctypes
. windll
. kernel32
. GetConsoleWindow ():
286 # c_wchar_p() might not be necessary if `message` is
287 # already of type unicode()
288 ctypes
. windll
. kernel32
. SetConsoleTitleW ( ctypes
. c_wchar_p ( message
))
289 elif 'TERM' in os
. environ
:
290 write_string ( u
' \033 ]0; %s \007 ' % message
, self
._ screen
_ file
)
292 def save_console_title ( self
):
293 if not self
. params
. get ( 'consoletitle' , False ):
295 if 'TERM' in os
. environ
:
296 # Save the title on stack
297 write_string ( u
' \033 [22;0t' , self
._ screen
_ file
)
299 def restore_console_title ( self
):
300 if not self
. params
. get ( 'consoletitle' , False ):
302 if 'TERM' in os
. environ
:
303 # Restore the title from stack
304 write_string ( u
' \033 [23;0t' , self
._ screen
_ file
)
307 self
. save_console_title ()
310 def __exit__ ( self
, * args
):
311 self
. restore_console_title ()
313 if self
. params
. get ( 'cookiefile' ) is not None :
314 self
. cookiejar
. save ()
316 def trouble ( self
, message
= None , tb
= None ):
317 """Determine action to take when a download problem appears.
319 Depending on if the downloader has been configured to ignore
320 download errors or not, this method may throw an exception or
321 not when errors are found, after printing the message.
323 tb, if given, is additional traceback information.
325 if message
is not None :
326 self
. to_stderr ( message
)
327 if self
. params
. get ( 'verbose' ):
329 if sys
. exc_info ()[ 0 ]: # if .trouble has been called from an except block
331 if hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
332 tb
+= u
'' . join ( traceback
. format_exception (* sys
. exc_info ()[ 1 ]. exc_info
))
333 tb
+= compat_str ( traceback
. format_exc ())
335 tb_data
= traceback
. format_list ( traceback
. extract_stack ())
336 tb
= u
'' . join ( tb_data
)
338 if not self
. params
. get ( 'ignoreerrors' , False ):
339 if sys
. exc_info ()[ 0 ] and hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
340 exc_info
= sys
. exc_info ()[ 1 ]. exc_info
342 exc_info
= sys
. exc_info ()
343 raise DownloadError ( message
, exc_info
)
344 self
._ download
_ retcode
= 1
346 def report_warning ( self
, message
):
348 Print the message to stderr, it will be prefixed with 'WARNING:'
349 If stderr is a tty file the 'WARNING:' will be colored
351 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
352 _msg_header
= u
' \033 [0;33mWARNING: \033 [0m'
354 _msg_header
= u
'WARNING:'
355 warning_message
= u
' %s %s ' % ( _msg_header
, message
)
356 self
. to_stderr ( warning_message
)
358 def report_error ( self
, message
, tb
= None ):
360 Do the same as trouble, but prefixes the message with 'ERROR:', colored
361 in red if stderr is a tty file.
363 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
364 _msg_header
= u
' \033 [0;31mERROR: \033 [0m'
366 _msg_header
= u
'ERROR:'
367 error_message
= u
' %s %s ' % ( _msg_header
, message
)
368 self
. trouble ( error_message
, tb
)
370 def report_file_already_downloaded ( self
, file_name
):
371 """Report file has already been fully downloaded."""
373 self
. to_screen ( u
'[download] %s has already been downloaded' % file_name
)
374 except UnicodeEncodeError :
375 self
. to_screen ( u
'[download] The file has already been downloaded' )
377 def increment_downloads ( self
):
378 """Increment the ordinal that assigns a number to each file."""
379 self
._ num
_ downloads
+= 1
381 def prepare_filename ( self
, info_dict
):
382 """Generate the output filename."""
384 template_dict
= dict ( info_dict
)
386 template_dict
[ 'epoch' ] = int ( time
. time ())
387 autonumber_size
= self
. params
. get ( 'autonumber_size' )
388 if autonumber_size
is None :
390 autonumber_templ
= u
' %0 ' + str ( autonumber_size
) + u
'd'
391 template_dict
[ 'autonumber' ] = autonumber_templ
% self
._ num
_ downloads
392 if template_dict
. get ( 'playlist_index' ) is not None :
393 template_dict
[ 'playlist_index' ] = u
' %0 5d' % template_dict
[ 'playlist_index' ]
395 sanitize
= lambda k
, v
: sanitize_filename (
397 restricted
= self
. params
. get ( 'restrictfilenames' ),
399 template_dict
= dict (( k
, sanitize ( k
, v
))
400 for k
, v
in template_dict
. items ()
402 template_dict
= collections
. defaultdict ( lambda : u
'NA' , template_dict
)
404 tmpl
= os
. path
. expanduser ( self
. params
[ 'outtmpl' ])
405 filename
= tmpl
% template_dict
407 except ValueError as err
:
408 self
. report_error ( u
'Error in output template: ' + str ( err
) + u
' (encoding: ' + repr ( preferredencoding ()) + ')' )
411 def _match_entry ( self
, info_dict
):
412 """ Returns None iff the file should be downloaded """
414 video_title
= info_dict
. get ( 'title' , info_dict
. get ( 'id' , u
'video' ))
415 if 'title' in info_dict
:
416 # This can happen when we're just evaluating the playlist
417 title
= info_dict
[ 'title' ]
418 matchtitle
= self
. params
. get ( 'matchtitle' , False )
420 if not re
. search ( matchtitle
, title
, re
. IGNORECASE
):
421 return u
'"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
422 rejecttitle
= self
. params
. get ( 'rejecttitle' , False )
424 if re
. search ( rejecttitle
, title
, re
. IGNORECASE
):
425 return u
'"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
426 date
= info_dict
. get ( 'upload_date' , None )
428 dateRange
= self
. params
. get ( 'daterange' , DateRange ())
429 if date
not in dateRange
:
430 return u
' %s upload date is not in range %s ' % ( date_from_str ( date
). isoformat (), dateRange
)
431 view_count
= info_dict
. get ( 'view_count' , None )
432 if view_count
is not None :
433 min_views
= self
. params
. get ( 'min_views' )
434 if min_views
is not None and view_count
< min_views
:
435 return u
'Skipping %s , because it has not reached minimum view count ( %d / %d )' % ( video_title
, view_count
, min_views
)
436 max_views
= self
. params
. get ( 'max_views' )
437 if max_views
is not None and view_count
> max_views
:
438 return u
'Skipping %s , because it has exceeded the maximum view count ( %d / %d )' % ( video_title
, view_count
, max_views
)
439 age_limit
= self
. params
. get ( 'age_limit' )
440 if age_limit
is not None :
441 if age_limit
< info_dict
. get ( 'age_limit' , 0 ):
442 return u
'Skipping "' + title
+ '" because it is age restricted'
443 if self
. in_download_archive ( info_dict
):
444 return u
' %s has already been recorded in archive' % video_title
448 def add_extra_info ( info_dict
, extra_info
):
449 '''Set the keys from extra_info in info dict if they are missing'''
450 for key
, value
in extra_info
. items ():
451 info_dict
. setdefault ( key
, value
)
453 def extract_info ( self
, url
, download
= True , ie_key
= None , extra_info
={},
456 Returns a list with a dictionary for each video we find.
457 If 'download', also downloads the videos.
458 extra_info is a dict containing the extra values to add to each result
462 ies
= [ self
. get_info_extractor ( ie_key
)]
467 if not ie
. suitable ( url
):
471 self
. report_warning ( u
'The program functionality for this site has been marked as broken, '
472 u
'and will probably not work.' )
475 ie_result
= ie
. extract ( url
)
476 if ie_result
is None : # Finished already (backwards compatibility; listformats and friends should be moved here)
478 if isinstance ( ie_result
, list ):
479 # Backwards compatibility: old IE result format
481 '_type' : 'compat_list' ,
482 'entries' : ie_result
,
484 self
. add_extra_info ( ie_result
,
486 'extractor' : ie
. IE_NAME
,
488 'webpage_url_basename' : url_basename ( url
),
489 'extractor_key' : ie
. ie_key (),
492 return self
. process_ie_result ( ie_result
, download
, extra_info
)
495 except ExtractorError
as de
: # An error we somewhat expected
496 self
. report_error ( compat_str ( de
), de
. format_traceback ())
498 except Exception as e
:
499 if self
. params
. get ( 'ignoreerrors' , False ):
500 self
. report_error ( compat_str ( e
), tb
= compat_str ( traceback
. format_exc ()))
505 self
. report_error ( u
'no suitable InfoExtractor: %s ' % url
)
507 def process_ie_result ( self
, ie_result
, download
= True , extra_info
={}):
509 Take the result of the ie(may be modified) and resolve all unresolved
510 references (URLs, playlist items).
512 It will also download the videos if 'download'.
513 Returns the resolved ie_result.
516 result_type
= ie_result
. get ( '_type' , 'video' ) # If not given we suppose it's a video, support the default old system
517 if result_type
== 'video' :
518 self
. add_extra_info ( ie_result
, extra_info
)
519 return self
. process_video_result ( ie_result
, download
= download
)
520 elif result_type
== 'url' :
521 # We have to add extra_info to the results because it may be
522 # contained in a playlist
523 return self
. extract_info ( ie_result
[ 'url' ],
525 ie_key
= ie_result
. get ( 'ie_key' ),
526 extra_info
= extra_info
)
527 elif result_type
== 'url_transparent' :
528 # Use the information from the embedding page
529 info
= self
. extract_info (
530 ie_result
[ 'url' ], ie_key
= ie_result
. get ( 'ie_key' ),
531 extra_info
= extra_info
, download
= False , process
= False )
533 def make_result ( embedded_info
):
534 new_result
= ie_result
. copy ()
535 for f
in ( '_type' , 'url' , 'ext' , 'player_url' , 'formats' ,
536 'entries' , 'urlhandle' , 'ie_key' , 'duration' ,
537 'subtitles' , 'annotations' , 'format' ,
538 'thumbnail' , 'thumbnails' ):
541 if f
in embedded_info
:
542 new_result
[ f
] = embedded_info
[ f
]
544 new_result
= make_result ( info
)
546 assert new_result
. get ( '_type' ) != 'url_transparent'
547 if new_result
. get ( '_type' ) == 'compat_list' :
548 new_result
[ 'entries' ] = [
549 make_result ( e
) for e
in new_result
[ 'entries' ]]
551 return self
. process_ie_result (
552 new_result
, download
= download
, extra_info
= extra_info
)
553 elif result_type
== 'playlist' :
554 # We process each entry in the playlist
555 playlist
= ie_result
. get ( 'title' , None ) or ie_result
. get ( 'id' , None )
556 self
. to_screen ( u
'[download] Downloading playlist: %s ' % playlist
)
558 playlist_results
= []
560 n_all_entries
= len ( ie_result
[ 'entries' ])
561 playliststart
= self
. params
. get ( 'playliststart' , 1 ) - 1
562 playlistend
= self
. params
. get ( 'playlistend' , None )
563 # For backwards compatibility, interpret -1 as whole list
564 if playlistend
== - 1 :
567 entries
= ie_result
[ 'entries' ][ playliststart
: playlistend
]
568 n_entries
= len ( entries
)
571 u
"[ %s ] playlist ' %s ': Collected %d video ids (downloading %d of them)" %
572 ( ie_result
[ 'extractor' ], playlist
, n_all_entries
, n_entries
))
574 for i
, entry
in enumerate ( entries
, 1 ):
575 self
. to_screen ( u
'[download] Downloading video # %s of %s ' % ( i
, n_entries
))
577 'playlist' : playlist
,
578 'playlist_index' : i
+ playliststart
,
579 'extractor' : ie_result
[ 'extractor' ],
580 'webpage_url' : ie_result
[ 'webpage_url' ],
581 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
582 'extractor_key' : ie_result
[ 'extractor_key' ],
585 reason
= self
._ match
_ entry
( entry
)
586 if reason
is not None :
587 self
. to_screen ( u
'[download] ' + reason
)
590 entry_result
= self
. process_ie_result ( entry
,
593 playlist_results
. append ( entry_result
)
594 ie_result
[ 'entries' ] = playlist_results
596 elif result_type
== 'compat_list' :
598 self
. add_extra_info ( r
,
600 'extractor' : ie_result
[ 'extractor' ],
601 'webpage_url' : ie_result
[ 'webpage_url' ],
602 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
603 'extractor_key' : ie_result
[ 'extractor_key' ],
606 ie_result
[ 'entries' ] = [
607 self
. process_ie_result ( _fixup ( r
), download
, extra_info
)
608 for r
in ie_result
[ 'entries' ]
612 raise Exception ( 'Invalid result type: %s ' % result_type
)
614 def select_format ( self
, format_spec
, available_formats
):
615 if format_spec
== 'best' or format_spec
is None :
616 return available_formats
[- 1 ]
617 elif format_spec
== 'worst' :
618 return available_formats
[ 0 ]
620 extensions
= [ u
'mp4' , u
'flv' , u
'webm' , u
'3gp' ]
621 if format_spec
in extensions
:
622 filter_f
= lambda f
: f
[ 'ext' ] == format_spec
624 filter_f
= lambda f
: f
[ 'format_id' ] == format_spec
625 matches
= list ( filter ( filter_f
, available_formats
))
630 def process_video_result ( self
, info_dict
, download
= True ):
631 assert info_dict
. get ( '_type' , 'video' ) == 'video'
633 if 'playlist' not in info_dict
:
634 # It isn't part of a playlist
635 info_dict
[ 'playlist' ] = None
636 info_dict
[ 'playlist_index' ] = None
638 # This extractors handle format selection themselves
639 if info_dict
[ 'extractor' ] in [ u
'youtube' , u
'Youku' ]:
641 self
. process_info ( info_dict
)
644 # We now pick which formats have to be downloaded
645 if info_dict
. get ( 'formats' ) is None :
646 # There's only one format available
647 formats
= [ info_dict
]
649 formats
= info_dict
[ 'formats' ]
651 # We check that all the formats have the format and format_id fields
652 for ( i
, format
) in enumerate ( formats
):
653 if format
. get ( 'format_id' ) is None :
654 format
[ 'format_id' ] = compat_str ( i
)
655 if format
. get ( 'format' ) is None :
656 format
[ 'format' ] = u
' {id} - {res}{note} ' . format (
657 id = format
[ 'format_id' ],
658 res
= self
. format_resolution ( format
),
659 note
= u
' ( {0} )' . format ( format
[ 'format_note' ]) if format
. get ( 'format_note' ) is not None else '' ,
661 # Automatically determine file extension if missing
662 if 'ext' not in format
:
663 format
[ 'ext' ] = determine_ext ( format
[ 'url' ])
665 if self
. params
. get ( 'listformats' , None ):
666 self
. list_formats ( info_dict
)
669 format_limit
= self
. params
. get ( 'format_limit' , None )
671 formats
= list ( takewhile_inclusive (
672 lambda f
: f
[ 'format_id' ] != format_limit
, formats
674 if self
. params
. get ( 'prefer_free_formats' ):
675 def _free_formats_key ( f
):
677 ext_ord
= [ u
'flv' , u
'mp4' , u
'webm' ]. index ( f
[ 'ext' ])
680 # We only compare the extension if they have the same height and width
681 return ( f
. get ( 'height' ), f
. get ( 'width' ), ext_ord
)
682 formats
= sorted ( formats
, key
= _free_formats_key
)
684 req_format
= self
. params
. get ( 'format' , 'best' )
685 if req_format
is None :
687 formats_to_download
= []
688 # The -1 is for supporting YoutubeIE
689 if req_format
in ( '-1' , 'all' ):
690 formats_to_download
= formats
692 # We can accept formats requestd in the format: 34/5/best, we pick
693 # the first that is available, starting from left
694 req_formats
= req_format
. split ( '/' )
695 for rf
in req_formats
:
696 selected_format
= self
. select_format ( rf
, formats
)
697 if selected_format
is not None :
698 formats_to_download
= [ selected_format
]
700 if not formats_to_download
:
701 raise ExtractorError ( u
'requested format not available' ,
705 if len ( formats_to_download
) > 1 :
706 self
. to_screen ( u
'[info] %s : downloading video in %s formats' % ( info_dict
[ 'id' ], len ( formats_to_download
)))
707 for format
in formats_to_download
:
708 new_info
= dict ( info_dict
)
709 new_info
. update ( format
)
710 self
. process_info ( new_info
)
711 # We update the info dict with the best quality format (backwards compatibility)
712 info_dict
. update ( formats_to_download
[- 1 ])
715 def process_info ( self
, info_dict
):
716 """Process a single resolved IE result."""
718 assert info_dict
. get ( '_type' , 'video' ) == 'video'
719 #We increment the download the download count here to match the previous behaviour.
720 self
. increment_downloads ()
722 info_dict
[ 'fulltitle' ] = info_dict
[ 'title' ]
723 if len ( info_dict
[ 'title' ]) > 200 :
724 info_dict
[ 'title' ] = info_dict
[ 'title' ][: 197 ] + u
'...'
726 # Keep for backwards compatibility
727 info_dict
[ 'stitle' ] = info_dict
[ 'title' ]
729 if not 'format' in info_dict
:
730 info_dict
[ 'format' ] = info_dict
[ 'ext' ]
732 reason
= self
._ match
_ entry
( info_dict
)
733 if reason
is not None :
734 self
. to_screen ( u
'[download] ' + reason
)
737 max_downloads
= self
. params
. get ( 'max_downloads' )
738 if max_downloads
is not None :
739 if self
._ num
_ downloads
> int ( max_downloads
):
740 raise MaxDownloadsReached ()
742 filename
= self
. prepare_filename ( info_dict
)
745 if self
. params
. get ( 'forcetitle' , False ):
746 self
. to_stdout ( info_dict
[ 'fulltitle' ])
747 if self
. params
. get ( 'forceid' , False ):
748 self
. to_stdout ( info_dict
[ 'id' ])
749 if self
. params
. get ( 'forceurl' , False ):
750 # For RTMP URLs, also include the playpath
751 self
. to_stdout ( info_dict
[ 'url' ] + info_dict
. get ( 'play_path' , u
'' ))
752 if self
. params
. get ( 'forcethumbnail' , False ) and info_dict
. get ( 'thumbnail' ) is not None :
753 self
. to_stdout ( info_dict
[ 'thumbnail' ])
754 if self
. params
. get ( 'forcedescription' , False ) and info_dict
. get ( 'description' ) is not None :
755 self
. to_stdout ( info_dict
[ 'description' ])
756 if self
. params
. get ( 'forcefilename' , False ) and filename
is not None :
757 self
. to_stdout ( filename
)
758 if self
. params
. get ( 'forceduration' , False ) and info_dict
. get ( 'duration' ) is not None :
759 self
. to_stdout ( formatSeconds ( info_dict
[ 'duration' ]))
760 if self
. params
. get ( 'forceformat' , False ):
761 self
. to_stdout ( info_dict
[ 'format' ])
762 if self
. params
. get ( 'forcejson' , False ):
763 info_dict
[ '_filename' ] = filename
764 self
. to_stdout ( json
. dumps ( info_dict
))
766 # Do nothing else if in simulate mode
767 if self
. params
. get ( 'simulate' , False ):
774 dn
= os
. path
. dirname ( encodeFilename ( filename
))
775 if dn
!= '' and not os
. path
. exists ( dn
):
777 except ( OSError , IOError ) as err
:
778 self
. report_error ( u
'unable to create directory ' + compat_str ( err
))
781 if self
. params
. get ( 'writedescription' , False ):
782 descfn
= filename
+ u
'.description'
783 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( descfn
)):
784 self
. to_screen ( u
'[info] Video description is already present' )
787 self
. to_screen ( u
'[info] Writing video description to: ' + descfn
)
788 with io
. open ( encodeFilename ( descfn
), 'w' , encoding
= 'utf-8' ) as descfile
:
789 descfile
. write ( info_dict
[ 'description' ])
790 except ( KeyError , TypeError ):
791 self
. report_warning ( u
'There \' s no description to write.' )
792 except ( OSError , IOError ):
793 self
. report_error ( u
'Cannot write description file ' + descfn
)
796 if self
. params
. get ( 'writeannotations' , False ):
797 annofn
= filename
+ u
'.annotations.xml'
798 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( annofn
)):
799 self
. to_screen ( u
'[info] Video annotations are already present' )
802 self
. to_screen ( u
'[info] Writing video annotations to: ' + annofn
)
803 with io
. open ( encodeFilename ( annofn
), 'w' , encoding
= 'utf-8' ) as annofile
:
804 annofile
. write ( info_dict
[ 'annotations' ])
805 except ( KeyError , TypeError ):
806 self
. report_warning ( u
'There are no annotations to write.' )
807 except ( OSError , IOError ):
808 self
. report_error ( u
'Cannot write annotations file: ' + annofn
)
811 subtitles_are_requested
= any ([ self
. params
. get ( 'writesubtitles' , False ),
812 self
. params
. get ( 'writeautomaticsub' )])
814 if subtitles_are_requested
and 'subtitles' in info_dict
and info_dict
[ 'subtitles' ]:
815 # subtitles download errors are already managed as troubles in relevant IE
816 # that way it will silently go on when used with unsupporting IE
817 subtitles
= info_dict
[ 'subtitles' ]
818 sub_format
= self
. params
. get ( 'subtitlesformat' , 'srt' )
819 for sub_lang
in subtitles
. keys ():
820 sub
= subtitles
[ sub_lang
]
824 sub_filename
= subtitles_filename ( filename
, sub_lang
, sub_format
)
825 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( sub_filename
)):
826 self
. to_screen ( u
'[info] Video subtitle %s . %s is already_present' % ( sub_lang
, sub_format
))
828 self
. to_screen ( u
'[info] Writing video subtitles to: ' + sub_filename
)
829 with io
. open ( encodeFilename ( sub_filename
), 'w' , encoding
= 'utf-8' ) as subfile
:
831 except ( OSError , IOError ):
832 self
. report_error ( u
'Cannot write subtitles file ' + descfn
)
835 if self
. params
. get ( 'writeinfojson' , False ):
836 infofn
= os
. path
. splitext ( filename
)[ 0 ] + u
'.info.json'
837 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( infofn
)):
838 self
. to_screen ( u
'[info] Video description metadata is already present' )
840 self
. to_screen ( u
'[info] Writing video description metadata as JSON to: ' + infofn
)
842 json_info_dict
= dict (( k
, v
) for k
, v
in info_dict
. items () if not k
in [ 'urlhandle' ])
843 write_json_file ( json_info_dict
, encodeFilename ( infofn
))
844 except ( OSError , IOError ):
845 self
. report_error ( u
'Cannot write metadata to JSON file ' + infofn
)
848 if self
. params
. get ( 'writethumbnail' , False ):
849 if info_dict
. get ( 'thumbnail' ) is not None :
850 thumb_format
= determine_ext ( info_dict
[ 'thumbnail' ], u
'jpg' )
851 thumb_filename
= os
. path
. splitext ( filename
)[ 0 ] + u
'.' + thumb_format
852 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( thumb_filename
)):
853 self
. to_screen ( u
'[ %s ] %s : Thumbnail is already present' %
854 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
856 self
. to_screen ( u
'[ %s ] %s : Downloading thumbnail ...' %
857 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
859 uf
= compat_urllib_request
. urlopen ( info_dict
[ 'thumbnail' ])
860 with open ( thumb_filename
, 'wb' ) as thumbf
:
861 shutil
. copyfileobj ( uf
, thumbf
)
862 self
. to_screen ( u
'[ %s ] %s : Writing thumbnail to: %s ' %
863 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ], thumb_filename
))
864 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
865 self
. report_warning ( u
'Unable to download thumbnail " %s ": %s ' %
866 ( info_dict
[ 'thumbnail' ], compat_str ( err
)))
868 if not self
. params
. get ( 'skip_download' , False ):
869 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( filename
)):
873 success
= self
. fd
._ do
_ download
( filename
, info_dict
)
874 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
875 self
. report_error ( u
'unable to download video data: %s ' % str ( err
))
877 except ( OSError , IOError ) as err
:
878 raise UnavailableVideoError ( err
)
879 except ( ContentTooShortError
, ) as err
:
880 self
. report_error ( u
'content too short (expected %s bytes and served %s )' % ( err
. expected
, err
. downloaded
))
885 self
. post_process ( filename
, info_dict
)
886 except ( PostProcessingError
) as err
:
887 self
. report_error ( u
'postprocessing: %s ' % str ( err
))
890 self
. record_download_archive ( info_dict
)
892 def download ( self
, url_list
):
893 """Download a given list of URLs."""
894 if ( len ( url_list
) > 1 and
895 '%' not in self
. params
[ 'outtmpl' ]
896 and self
. params
. get ( 'max_downloads' ) != 1 ):
897 raise SameFileError ( self
. params
[ 'outtmpl' ])
901 #It also downloads the videos
902 self
. extract_info ( url
)
903 except UnavailableVideoError
:
904 self
. report_error ( u
'unable to download video' )
905 except MaxDownloadsReached
:
906 self
. to_screen ( u
'[info] Maximum number of downloaded files reached.' )
909 return self
._ download
_ retcode
911 def download_with_info_file ( self
, info_filename
):
912 with io
. open ( info_filename
, 'r' , encoding
= 'utf-8' ) as f
:
915 self
. process_ie_result ( info
, download
= True )
916 except DownloadError
:
917 webpage_url
= info
. get ( 'webpage_url' )
918 if webpage_url
is not None :
919 self
. report_warning ( u
'The info failed to download, trying with " %s "' % webpage_url
)
920 return self
. download ([ webpage_url
])
923 return self
._ download
_ retcode
925 def post_process ( self
, filename
, ie_info
):
926 """Run all the postprocessors on the given file."""
928 info
[ 'filepath' ] = filename
932 keep_video_wish
, new_info
= pp
. run ( info
)
933 if keep_video_wish
is not None :
935 keep_video
= keep_video_wish
936 elif keep_video
is None :
937 # No clear decision yet, let IE decide
938 keep_video
= keep_video_wish
939 except PostProcessingError
as e
:
940 self
. report_error ( e
. msg
)
941 if keep_video
is False and not self
. params
. get ( 'keepvideo' , False ):
943 self
. to_screen ( u
'Deleting original file %s (pass -k to keep)' % filename
)
944 os
. remove ( encodeFilename ( filename
))
945 except ( IOError , OSError ):
946 self
. report_warning ( u
'Unable to remove downloaded video file' )
948 def _make_archive_id ( self
, info_dict
):
949 # Future-proof against any change in case
950 # and backwards compatibility with prior versions
951 extractor
= info_dict
. get ( 'extractor_key' )
952 if extractor
is None :
953 if 'id' in info_dict
:
954 extractor
= info_dict
. get ( 'ie_key' ) # key in a playlist
955 if extractor
is None :
956 return None # Incomplete video information
957 return extractor
. lower () + u
' ' + info_dict
[ 'id' ]
959 def in_download_archive ( self
, info_dict
):
960 fn
= self
. params
. get ( 'download_archive' )
964 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
966 return False # Incomplete video information
969 with locked_file ( fn
, 'r' , encoding
= 'utf-8' ) as archive_file
:
970 for line
in archive_file
:
971 if line
. strip () == vid_id
:
973 except IOError as ioe
:
974 if ioe
. errno
!= errno
. ENOENT
:
978 def record_download_archive ( self
, info_dict
):
979 fn
= self
. params
. get ( 'download_archive' )
982 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
984 with locked_file ( fn
, 'a' , encoding
= 'utf-8' ) as archive_file
:
985 archive_file
. write ( vid_id
+ u
' \n ' )
988 def format_resolution ( format
, default
= 'unknown' ):
989 if format
. get ( 'vcodec' ) == 'none' :
991 if format
. get ( '_resolution' ) is not None :
992 return format
[ '_resolution' ]
993 if format
. get ( 'height' ) is not None :
994 if format
. get ( 'width' ) is not None :
995 res
= u
' %sx%s ' % ( format
[ 'width' ], format
[ 'height' ])
997 res
= u
' %s p' % format
[ 'height' ]
1002 def list_formats ( self
, info_dict
):
1003 def format_note ( fdict
):
1005 if fdict
. get ( 'format_note' ) is not None :
1006 res
+= fdict
[ 'format_note' ] + u
' '
1007 if ( fdict
. get ( 'vcodec' ) is not None and
1008 fdict
. get ( 'vcodec' ) != 'none' ):
1009 res
+= u
' %- 5s' % fdict
[ 'vcodec' ]
1010 elif fdict
. get ( 'vbr' ) is not None :
1012 if fdict
. get ( 'vbr' ) is not None :
1013 res
+= u
'@%4dk' % fdict
[ 'vbr' ]
1014 if fdict
. get ( 'acodec' ) is not None :
1017 res
+= u
' %- 5s' % fdict
[ 'acodec' ]
1018 elif fdict
. get ( 'abr' ) is not None :
1022 if fdict
. get ( 'abr' ) is not None :
1023 res
+= u
'@%3dk' % fdict
[ 'abr' ]
1024 if fdict
. get ( 'filesize' ) is not None :
1027 res
+= format_bytes ( fdict
[ 'filesize' ])
1030 def line ( format
, idlen
= 20 ):
1031 return (( u
' %- ' + compat_str ( idlen
+ 1 ) + u
's %- 10s %- 12s %s ' ) % (
1032 format
[ 'format_id' ],
1034 self
. format_resolution ( format
),
1035 format_note ( format
),
1038 formats
= info_dict
. get ( 'formats' , [ info_dict
])
1039 idlen
= max ( len ( u
'format code' ),
1040 max ( len ( f
[ 'format_id' ]) for f
in formats
))
1041 formats_s
= [ line ( f
, idlen
) for f
in formats
]
1042 if len ( formats
) > 1 :
1043 formats_s
[ 0 ] += ( ' ' if format_note ( formats
[ 0 ]) else '' ) + '(worst)'
1044 formats_s
[- 1 ] += ( ' ' if format_note ( formats
[- 1 ]) else '' ) + '(best)'
1046 header_line
= line ({
1047 'format_id' : u
'format code' , 'ext' : u
'extension' ,
1048 '_resolution' : u
'resolution' , 'format_note' : u
'note' }, idlen
= idlen
)
1049 self
. to_screen ( u
'[info] Available formats for %s : \n %s \n %s ' %
1050 ( info_dict
[ 'id' ], header_line
, u
" \n " . join ( formats_s
)))
1052 def urlopen ( self
, req
):
1053 """ Start an HTTP download """
1054 return self
._ opener
. open ( req
)
1056 def print_debug_header ( self
):
1057 if not self
. params
. get ( 'verbose' ):
1059 write_string ( u
'[debug] youtube-dl version ' + __version__
+ u
' \n ' )
1061 sp
= subprocess
. Popen (
1062 [ 'git' , 'rev-parse' , '--short' , 'HEAD' ],
1063 stdout
= subprocess
. PIPE
, stderr
= subprocess
. PIPE
,
1064 cwd
= os
. path
. dirname ( os
. path
. abspath ( __file__
)))
1065 out
, err
= sp
. communicate ()
1066 out
= out
. decode (). strip ()
1067 if re
. match ( '[0-9a-f]+' , out
):
1068 write_string ( u
'[debug] Git HEAD: ' + out
+ u
' \n ' )
1074 write_string ( u
'[debug] Python version %s - %s ' %
1075 ( platform
. python_version (), platform_name ()) + u
' \n ' )
1078 for handler
in self
._ opener
. handlers
:
1079 if hasattr ( handler
, 'proxies' ):
1080 proxy_map
. update ( handler
. proxies
)
1081 write_string ( u
'[debug] Proxy map: ' + compat_str ( proxy_map
) + u
' \n ' )
1083 def _setup_opener ( self
):
1084 timeout_val
= self
. params
. get ( 'socket_timeout' )
1085 timeout
= 600 if timeout_val
is None else float ( timeout_val
)
1087 opts_cookiefile
= self
. params
. get ( 'cookiefile' )
1088 opts_proxy
= self
. params
. get ( 'proxy' )
1090 if opts_cookiefile
is None :
1091 self
. cookiejar
= compat_cookiejar
. CookieJar ()
1093 self
. cookiejar
= compat_cookiejar
. MozillaCookieJar (
1095 if os
. access ( opts_cookiefile
, os
. R_OK
):
1096 self
. cookiejar
. load ()
1098 cookie_processor
= compat_urllib_request
. HTTPCookieProcessor (
1100 if opts_proxy
is not None :
1101 if opts_proxy
== '' :
1104 proxies
= { 'http' : opts_proxy
, 'https' : opts_proxy
}
1106 proxies
= compat_urllib_request
. getproxies ()
1107 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1108 if 'http' in proxies
and 'https' not in proxies
:
1109 proxies
[ 'https' ] = proxies
[ 'http' ]
1110 proxy_handler
= compat_urllib_request
. ProxyHandler ( proxies
)
1111 https_handler
= make_HTTPS_handler (
1112 self
. params
. get ( 'nocheckcertificate' , False ))
1113 opener
= compat_urllib_request
. build_opener (
1114 https_handler
, proxy_handler
, cookie_processor
, YoutubeDLHandler ())
1115 # Delete the default user-agent header, which would otherwise apply in
1116 # cases where our custom HTTP handler doesn't come into play
1117 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1118 opener
. addheaders
= []
1119 self
._ opener
= opener
1121 # TODO remove this global modification
1122 compat_urllib_request
. install_opener ( opener
)
1123 socket
. setdefaulttimeout ( timeout
)