]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
, unicode_literals
31 compat_urllib_request
,
55 UnavailableVideoError
,
62 from . cache
import Cache
63 from . extractor
import get_info_extractor
, gen_extractors
64 from . downloader
import get_suitable_downloader
65 from . postprocessor
import FFmpegMergerPP
, FFmpegPostProcessor
66 from . version
import __version__
69 class YoutubeDL ( object ):
72 YoutubeDL objects are the ones responsible of downloading the
73 actual video file and writing it to disk if the user has requested
74 it, among some other tasks. In most cases there should be one per
75 program. As, given a video URL, the downloader doesn't know how to
76 extract all the needed information, task that InfoExtractors do, it
77 has to pass the URL to one of them.
79 For this, YoutubeDL objects have a method that allows
80 InfoExtractors to be registered in a given order. When it is passed
81 a URL, the YoutubeDL object handles it to the first InfoExtractor it
82 finds that reports being able to handle it. The InfoExtractor extracts
83 all the information about the video or videos the URL refers to, and
84 YoutubeDL process the extracted information, possibly using a File
85 Downloader to download the video.
87 YoutubeDL objects accept a lot of parameters. In order not to saturate
88 the object constructor with arguments, it receives a dictionary of
89 options instead. These options are available through the params
90 attribute for the InfoExtractors to use. The YoutubeDL also
91 registers itself as the downloader in charge for the InfoExtractors
92 that are added to it, so this is a "mutual registration".
96 username: Username for authentication purposes.
97 password: Password for authentication purposes.
98 videopassword: Password for acces a video.
99 usenetrc: Use netrc for authentication instead.
100 verbose: Print additional info to stdout.
101 quiet: Do not print messages to stdout.
102 no_warnings: Do not print out anything for warnings.
103 forceurl: Force printing final URL.
104 forcetitle: Force printing title.
105 forceid: Force printing ID.
106 forcethumbnail: Force printing thumbnail URL.
107 forcedescription: Force printing description.
108 forcefilename: Force printing final filename.
109 forceduration: Force printing duration.
110 forcejson: Force printing info_dict as JSON.
111 dump_single_json: Force printing the info_dict of the whole playlist
112 (or video) as a single JSON line.
113 simulate: Do not download the video files.
114 format: Video format code.
115 format_limit: Highest quality format to try.
116 outtmpl: Template for output names.
117 restrictfilenames: Do not allow "&" and spaces in file names
118 ignoreerrors: Do not stop on download errors.
119 nooverwrites: Prevent overwriting files.
120 playliststart: Playlist item to start at.
121 playlistend: Playlist item to end at.
122 matchtitle: Download only matching titles.
123 rejecttitle: Reject downloads for matching titles.
124 logger: Log messages to a logging.Logger instance.
125 logtostderr: Log messages to stderr instead of stdout.
126 writedescription: Write the video description to a .description file
127 writeinfojson: Write the video description to a .info.json file
128 writeannotations: Write the video annotations to a .annotations.xml file
129 writethumbnail: Write the thumbnail image to a file
130 writesubtitles: Write the video subtitles to a file
131 writeautomaticsub: Write the automatic subtitles to a file
132 allsubtitles: Downloads all the subtitles of the video
133 (requires writesubtitles or writeautomaticsub)
134 listsubtitles: Lists all available subtitles for the video
135 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
136 subtitleslangs: List of languages of the subtitles to download
137 keepvideo: Keep the video file after post-processing
138 daterange: A DateRange object, download only if the upload_date is in the range.
139 skip_download: Skip the actual download of the video file
140 cachedir: Location of the cache files in the filesystem.
141 False to disable filesystem cache.
142 noplaylist: Download single video instead of a playlist if in doubt.
143 age_limit: An integer representing the user's age in years.
144 Unsuitable videos for the given age are skipped.
145 min_views: An integer representing the minimum view count the video
146 must have in order to not be skipped.
147 Videos without view count information are always
148 downloaded. None for no limit.
149 max_views: An integer representing the maximum view count.
150 Videos that are more popular than that are not
152 Videos without view count information are always
153 downloaded. None for no limit.
154 download_archive: File name of a file where all downloads are recorded.
155 Videos already present in the file are not downloaded
157 cookiefile: File name where cookies should be read from and dumped to.
158 nocheckcertificate:Do not verify SSL certificates
159 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
160 At the moment, this is only supported by YouTube.
161 proxy: URL of the proxy server to use
162 socket_timeout: Time to wait for unresponsive hosts, in seconds
163 bidi_workaround: Work around buggy terminals without bidirectional text
164 support, using fridibi
165 debug_printtraffic:Print out sent and received HTTP traffic
166 include_ads: Download ads as well
167 default_search: Prepend this string if an input url is not valid.
168 'auto' for elaborate guessing
169 encoding: Use this encoding instead of the system-specified.
170 extract_flat: Do not resolve URLs, return the immediate result.
171 Pass in 'in_playlist' to only show this behavior for
174 The following parameters are not used by YoutubeDL itself, they are used by
176 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
177 noresizebuffer, retries, continuedl, noprogress, consoletitle
179 The following options are used by the post processors:
180 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
181 otherwise prefer avconv.
182 exec_cmd: Arbitrary command to run after downloading
188 _download_retcode
= None
189 _num_downloads
= None
192 def __init__ ( self
, params
= None , auto_init
= True ):
193 """Create a FileDownloader object with the given options."""
197 self
._ ies
_ instances
= {}
199 self
._ progress
_ hooks
= []
200 self
._ download
_ retcode
= 0
201 self
._ num
_ downloads
= 0
202 self
._ screen
_ file
= [ sys
. stdout
, sys
. stderr
][ params
. get ( 'logtostderr' , False )]
203 self
._ err
_ file
= sys
. stderr
205 self
. cache
= Cache ( self
)
207 if params
. get ( 'bidi_workaround' , False ):
210 master
, slave
= pty
. openpty ()
211 width
= get_term_width ()
215 width_args
= [ '-w' , str ( width
)]
217 stdin
= subprocess
. PIPE
,
219 stderr
= self
._ err
_ file
)
221 self
._ output
_ process
= subprocess
. Popen (
222 [ 'bidiv' ] + width_args
, ** sp_kwargs
225 self
._ output
_ process
= subprocess
. Popen (
226 [ 'fribidi' , '-c' , 'UTF-8' ] + width_args
, ** sp_kwargs
)
227 self
._ output
_ channel
= os
. fdopen ( master
, 'rb' )
228 except OSError as ose
:
230 self
. report_warning ( 'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.' )
234 if ( sys
. version_info
>= ( 3 ,) and sys
. platform
!= 'win32' and
235 sys
. getfilesystemencoding () in [ 'ascii' , 'ANSI_X3.4-1968' ]
236 and not params
. get ( 'restrictfilenames' , False )):
237 # On Python 3, the Unicode filesystem API will throw errors (#1474)
239 'Assuming --restrict-filenames since file system encoding '
240 'cannot encode all characters. '
241 'Set the LC_ALL environment variable to fix this.' )
242 self
. params
[ 'restrictfilenames' ] = True
244 if ' %(stitle)s ' in self
. params
. get ( 'outtmpl' , '' ):
245 self
. report_warning ( ' %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.' )
250 self
. print_debug_header ()
251 self
. add_default_info_extractors ()
253 def add_info_extractor ( self
, ie
):
254 """Add an InfoExtractor object to the end of the list."""
256 self
._ ies
_ instances
[ ie
. ie_key ()] = ie
257 ie
. set_downloader ( self
)
259 def get_info_extractor ( self
, ie_key
):
261 Get an instance of an IE with name ie_key, it will try to get one from
262 the _ies list, if there's no instance it will create a new one and add
263 it to the extractor list.
265 ie
= self
._ ies
_ instances
. get ( ie_key
)
267 ie
= get_info_extractor ( ie_key
)()
268 self
. add_info_extractor ( ie
)
271 def add_default_info_extractors ( self
):
273 Add the InfoExtractors returned by gen_extractors to the end of the list
275 for ie
in gen_extractors ():
276 self
. add_info_extractor ( ie
)
278 def add_post_processor ( self
, pp
):
279 """Add a PostProcessor object to the end of the chain."""
281 pp
. set_downloader ( self
)
283 def add_progress_hook ( self
, ph
):
284 """Add the progress hook (currently only for the file downloader)"""
285 self
._ progress
_ hooks
. append ( ph
)
287 def _bidi_workaround ( self
, message
):
288 if not hasattr ( self
, '_output_channel' ):
291 assert hasattr ( self
, '_output_process' )
292 assert isinstance ( message
, compat_str
)
293 line_count
= message
. count ( ' \n ' ) + 1
294 self
._ output
_ process
. stdin
. write (( message
+ ' \n ' ). encode ( 'utf-8' ))
295 self
._ output
_ process
. stdin
. flush ()
296 res
= '' . join ( self
._ output
_ channel
. readline (). decode ( 'utf-8' )
297 for _
in range ( line_count
))
298 return res
[:- len ( ' \n ' )]
300 def to_screen ( self
, message
, skip_eol
= False ):
301 """Print message to stdout if not in quiet mode."""
302 return self
. to_stdout ( message
, skip_eol
, check_quiet
= True )
304 def _write_string ( self
, s
, out
= None ):
305 write_string ( s
, out
= out
, encoding
= self
. params
. get ( 'encoding' ))
307 def to_stdout ( self
, message
, skip_eol
= False , check_quiet
= False ):
308 """Print message to stdout if not in quiet mode."""
309 if self
. params
. get ( 'logger' ):
310 self
. params
[ 'logger' ]. debug ( message
)
311 elif not check_quiet
or not self
. params
. get ( 'quiet' , False ):
312 message
= self
._ bidi
_ workaround
( message
)
313 terminator
= [ ' \n ' , '' ][ skip_eol
]
314 output
= message
+ terminator
316 self
._ write
_ string
( output
, self
._ screen
_ file
)
318 def to_stderr ( self
, message
):
319 """Print message to stderr."""
320 assert isinstance ( message
, compat_str
)
321 if self
. params
. get ( 'logger' ):
322 self
. params
[ 'logger' ]. error ( message
)
324 message
= self
._ bidi
_ workaround
( message
)
325 output
= message
+ ' \n '
326 self
._ write
_ string
( output
, self
._ err
_ file
)
328 def to_console_title ( self
, message
):
329 if not self
. params
. get ( 'consoletitle' , False ):
331 if os
. name
== 'nt' and ctypes
. windll
. kernel32
. GetConsoleWindow ():
332 # c_wchar_p() might not be necessary if `message` is
333 # already of type unicode()
334 ctypes
. windll
. kernel32
. SetConsoleTitleW ( ctypes
. c_wchar_p ( message
))
335 elif 'TERM' in os
. environ
:
336 self
._ write
_ string
( ' \033 ]0; %s \007 ' % message
, self
._ screen
_ file
)
338 def save_console_title ( self
):
339 if not self
. params
. get ( 'consoletitle' , False ):
341 if 'TERM' in os
. environ
:
342 # Save the title on stack
343 self
._ write
_ string
( ' \033 [22;0t' , self
._ screen
_ file
)
345 def restore_console_title ( self
):
346 if not self
. params
. get ( 'consoletitle' , False ):
348 if 'TERM' in os
. environ
:
349 # Restore the title from stack
350 self
._ write
_ string
( ' \033 [23;0t' , self
._ screen
_ file
)
353 self
. save_console_title ()
356 def __exit__ ( self
, * args
):
357 self
. restore_console_title ()
359 if self
. params
. get ( 'cookiefile' ) is not None :
360 self
. cookiejar
. save ()
362 def trouble ( self
, message
= None , tb
= None ):
363 """Determine action to take when a download problem appears.
365 Depending on if the downloader has been configured to ignore
366 download errors or not, this method may throw an exception or
367 not when errors are found, after printing the message.
369 tb, if given, is additional traceback information.
371 if message
is not None :
372 self
. to_stderr ( message
)
373 if self
. params
. get ( 'verbose' ):
375 if sys
. exc_info ()[ 0 ]: # if .trouble has been called from an except block
377 if hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
378 tb
+= '' . join ( traceback
. format_exception (* sys
. exc_info ()[ 1 ]. exc_info
))
379 tb
+= compat_str ( traceback
. format_exc ())
381 tb_data
= traceback
. format_list ( traceback
. extract_stack ())
382 tb
= '' . join ( tb_data
)
384 if not self
. params
. get ( 'ignoreerrors' , False ):
385 if sys
. exc_info ()[ 0 ] and hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
386 exc_info
= sys
. exc_info ()[ 1 ]. exc_info
388 exc_info
= sys
. exc_info ()
389 raise DownloadError ( message
, exc_info
)
390 self
._ download
_ retcode
= 1
392 def report_warning ( self
, message
):
394 Print the message to stderr, it will be prefixed with 'WARNING:'
395 If stderr is a tty file the 'WARNING:' will be colored
397 if self
. params
. get ( 'logger' ) is not None :
398 self
. params
[ 'logger' ]. warning ( message
)
400 if self
. params
. get ( 'no_warnings' ):
402 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
403 _msg_header
= ' \033 [0;33mWARNING: \033 [0m'
405 _msg_header
= 'WARNING:'
406 warning_message
= ' %s %s ' % ( _msg_header
, message
)
407 self
. to_stderr ( warning_message
)
409 def report_error ( self
, message
, tb
= None ):
411 Do the same as trouble, but prefixes the message with 'ERROR:', colored
412 in red if stderr is a tty file.
414 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
415 _msg_header
= ' \033 [0;31mERROR: \033 [0m'
417 _msg_header
= 'ERROR:'
418 error_message
= ' %s %s ' % ( _msg_header
, message
)
419 self
. trouble ( error_message
, tb
)
421 def report_file_already_downloaded ( self
, file_name
):
422 """Report file has already been fully downloaded."""
424 self
. to_screen ( '[download] %s has already been downloaded' % file_name
)
425 except UnicodeEncodeError :
426 self
. to_screen ( '[download] The file has already been downloaded' )
428 def prepare_filename ( self
, info_dict
):
429 """Generate the output filename."""
431 template_dict
= dict ( info_dict
)
433 template_dict
[ 'epoch' ] = int ( time
. time ())
434 autonumber_size
= self
. params
. get ( 'autonumber_size' )
435 if autonumber_size
is None :
437 autonumber_templ
= ' %0 ' + str ( autonumber_size
) + 'd'
438 template_dict
[ 'autonumber' ] = autonumber_templ
% self
._ num
_ downloads
439 if template_dict
. get ( 'playlist_index' ) is not None :
440 template_dict
[ 'playlist_index' ] = ' %0 *d' % ( len ( str ( template_dict
[ 'n_entries' ])), template_dict
[ 'playlist_index' ])
441 if template_dict
. get ( 'resolution' ) is None :
442 if template_dict
. get ( 'width' ) and template_dict
. get ( 'height' ):
443 template_dict
[ 'resolution' ] = ' %dx%d ' % ( template_dict
[ 'width' ], template_dict
[ 'height' ])
444 elif template_dict
. get ( 'height' ):
445 template_dict
[ 'resolution' ] = ' %s p' % template_dict
[ 'height' ]
446 elif template_dict
. get ( 'width' ):
447 template_dict
[ 'resolution' ] = '?x %d ' % template_dict
[ 'width' ]
449 sanitize
= lambda k
, v
: sanitize_filename (
451 restricted
= self
. params
. get ( 'restrictfilenames' ),
453 template_dict
= dict (( k
, sanitize ( k
, v
))
454 for k
, v
in template_dict
. items ()
456 template_dict
= collections
. defaultdict ( lambda : 'NA' , template_dict
)
458 outtmpl
= self
. params
. get ( 'outtmpl' , DEFAULT_OUTTMPL
)
459 tmpl
= compat_expanduser ( outtmpl
)
460 filename
= tmpl
% template_dict
462 except ValueError as err
:
463 self
. report_error ( 'Error in output template: ' + str ( err
) + ' (encoding: ' + repr ( preferredencoding ()) + ')' )
466 def _match_entry ( self
, info_dict
):
467 """ Returns None iff the file should be downloaded """
469 video_title
= info_dict
. get ( 'title' , info_dict
. get ( 'id' , 'video' ))
470 if 'title' in info_dict
:
471 # This can happen when we're just evaluating the playlist
472 title
= info_dict
[ 'title' ]
473 matchtitle
= self
. params
. get ( 'matchtitle' , False )
475 if not re
. search ( matchtitle
, title
, re
. IGNORECASE
):
476 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
477 rejecttitle
= self
. params
. get ( 'rejecttitle' , False )
479 if re
. search ( rejecttitle
, title
, re
. IGNORECASE
):
480 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
481 date
= info_dict
. get ( 'upload_date' , None )
483 dateRange
= self
. params
. get ( 'daterange' , DateRange ())
484 if date
not in dateRange
:
485 return ' %s upload date is not in range %s ' % ( date_from_str ( date
). isoformat (), dateRange
)
486 view_count
= info_dict
. get ( 'view_count' , None )
487 if view_count
is not None :
488 min_views
= self
. params
. get ( 'min_views' )
489 if min_views
is not None and view_count
< min_views
:
490 return 'Skipping %s , because it has not reached minimum view count ( %d / %d )' % ( video_title
, view_count
, min_views
)
491 max_views
= self
. params
. get ( 'max_views' )
492 if max_views
is not None and view_count
> max_views
:
493 return 'Skipping %s , because it has exceeded the maximum view count ( %d / %d )' % ( video_title
, view_count
, max_views
)
494 age_limit
= self
. params
. get ( 'age_limit' )
495 if age_limit
is not None :
496 actual_age_limit
= info_dict
. get ( 'age_limit' )
497 if actual_age_limit
is None :
499 if age_limit
< actual_age_limit
:
500 return 'Skipping "' + title
+ '" because it is age restricted'
501 if self
. in_download_archive ( info_dict
):
502 return ' %s has already been recorded in archive' % video_title
506 def add_extra_info ( info_dict
, extra_info
):
507 '''Set the keys from extra_info in info dict if they are missing'''
508 for key
, value
in extra_info
. items ():
509 info_dict
. setdefault ( key
, value
)
511 def extract_info ( self
, url
, download
= True , ie_key
= None , extra_info
={},
514 Returns a list with a dictionary for each video we find.
515 If 'download', also downloads the videos.
516 extra_info is a dict containing the extra values to add to each result
520 ies
= [ self
. get_info_extractor ( ie_key
)]
525 if not ie
. suitable ( url
):
529 self
. report_warning ( 'The program functionality for this site has been marked as broken, '
530 'and will probably not work.' )
533 ie_result
= ie
. extract ( url
)
534 if ie_result
is None : # Finished already (backwards compatibility; listformats and friends should be moved here)
536 if isinstance ( ie_result
, list ):
537 # Backwards compatibility: old IE result format
539 '_type' : 'compat_list' ,
540 'entries' : ie_result
,
542 self
. add_default_extra_info ( ie_result
, ie
, url
)
544 return self
. process_ie_result ( ie_result
, download
, extra_info
)
547 except ExtractorError
as de
: # An error we somewhat expected
548 self
. report_error ( compat_str ( de
), de
. format_traceback ())
550 except MaxDownloadsReached
:
552 except Exception as e
:
553 if self
. params
. get ( 'ignoreerrors' , False ):
554 self
. report_error ( compat_str ( e
), tb
= compat_str ( traceback
. format_exc ()))
559 self
. report_error ( 'no suitable InfoExtractor for URL %s ' % url
)
561 def add_default_extra_info ( self
, ie_result
, ie
, url
):
562 self
. add_extra_info ( ie_result
, {
563 'extractor' : ie
. IE_NAME
,
565 'webpage_url_basename' : url_basename ( url
),
566 'extractor_key' : ie
. ie_key (),
569 def process_ie_result ( self
, ie_result
, download
= True , extra_info
={}):
571 Take the result of the ie(may be modified) and resolve all unresolved
572 references (URLs, playlist items).
574 It will also download the videos if 'download'.
575 Returns the resolved ie_result.
578 result_type
= ie_result
. get ( '_type' , 'video' )
580 if result_type
in ( 'url' , 'url_transparent' ):
581 extract_flat
= self
. params
. get ( 'extract_flat' , False )
582 if (( extract_flat
== 'in_playlist' and 'playlist' in extra_info
) or
583 extract_flat
is True ):
584 if self
. params
. get ( 'forcejson' , False ):
585 self
. to_stdout ( json
. dumps ( ie_result
))
588 if result_type
== 'video' :
589 self
. add_extra_info ( ie_result
, extra_info
)
590 return self
. process_video_result ( ie_result
, download
= download
)
591 elif result_type
== 'url' :
592 # We have to add extra_info to the results because it may be
593 # contained in a playlist
594 return self
. extract_info ( ie_result
[ 'url' ],
596 ie_key
= ie_result
. get ( 'ie_key' ),
597 extra_info
= extra_info
)
598 elif result_type
== 'url_transparent' :
599 # Use the information from the embedding page
600 info
= self
. extract_info (
601 ie_result
[ 'url' ], ie_key
= ie_result
. get ( 'ie_key' ),
602 extra_info
= extra_info
, download
= False , process
= False )
604 def make_result ( embedded_info
):
605 new_result
= ie_result
. copy ()
606 for f
in ( '_type' , 'url' , 'ext' , 'player_url' , 'formats' ,
607 'entries' , 'ie_key' , 'duration' ,
608 'subtitles' , 'annotations' , 'format' ,
609 'thumbnail' , 'thumbnails' ):
612 if f
in embedded_info
:
613 new_result
[ f
] = embedded_info
[ f
]
615 new_result
= make_result ( info
)
617 assert new_result
. get ( '_type' ) != 'url_transparent'
618 if new_result
. get ( '_type' ) == 'compat_list' :
619 new_result
[ 'entries' ] = [
620 make_result ( e
) for e
in new_result
[ 'entries' ]]
622 return self
. process_ie_result (
623 new_result
, download
= download
, extra_info
= extra_info
)
624 elif result_type
== 'playlist' :
625 # We process each entry in the playlist
626 playlist
= ie_result
. get ( 'title' , None ) or ie_result
. get ( 'id' , None )
627 self
. to_screen ( '[download] Downloading playlist: %s ' % playlist
)
629 playlist_results
= []
631 playliststart
= self
. params
. get ( 'playliststart' , 1 ) - 1
632 playlistend
= self
. params
. get ( 'playlistend' , None )
633 # For backwards compatibility, interpret -1 as whole list
634 if playlistend
== - 1 :
637 if isinstance ( ie_result
[ 'entries' ], list ):
638 n_all_entries
= len ( ie_result
[ 'entries' ])
639 entries
= ie_result
[ 'entries' ][ playliststart
: playlistend
]
640 n_entries
= len ( entries
)
642 "[ %s ] playlist %s : Collected %d video ids (downloading %d of them)" %
643 ( ie_result
[ 'extractor' ], playlist
, n_all_entries
, n_entries
))
645 assert isinstance ( ie_result
[ 'entries' ], PagedList
)
646 entries
= ie_result
[ 'entries' ]. getslice (
647 playliststart
, playlistend
)
648 n_entries
= len ( entries
)
650 "[ %s ] playlist %s : Downloading %d videos" %
651 ( ie_result
[ 'extractor' ], playlist
, n_entries
))
653 for i
, entry
in enumerate ( entries
, 1 ):
654 self
. to_screen ( '[download] Downloading video # %s of %s ' % ( i
, n_entries
))
656 'n_entries' : n_entries
,
657 'playlist' : playlist
,
658 'playlist_index' : i
+ playliststart
,
659 'extractor' : ie_result
[ 'extractor' ],
660 'webpage_url' : ie_result
[ 'webpage_url' ],
661 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
662 'extractor_key' : ie_result
[ 'extractor_key' ],
665 reason
= self
._ match
_ entry
( entry
)
666 if reason
is not None :
667 self
. to_screen ( '[download] ' + reason
)
670 entry_result
= self
. process_ie_result ( entry
,
673 playlist_results
. append ( entry_result
)
674 ie_result
[ 'entries' ] = playlist_results
676 elif result_type
== 'compat_list' :
678 self
. add_extra_info ( r
,
680 'extractor' : ie_result
[ 'extractor' ],
681 'webpage_url' : ie_result
[ 'webpage_url' ],
682 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
683 'extractor_key' : ie_result
[ 'extractor_key' ],
686 ie_result
[ 'entries' ] = [
687 self
. process_ie_result ( _fixup ( r
), download
, extra_info
)
688 for r
in ie_result
[ 'entries' ]
692 raise Exception ( 'Invalid result type: %s ' % result_type
)
694 def select_format ( self
, format_spec
, available_formats
):
695 if format_spec
== 'best' or format_spec
is None :
696 return available_formats
[- 1 ]
697 elif format_spec
== 'worst' :
698 return available_formats
[ 0 ]
699 elif format_spec
== 'bestaudio' :
701 f
for f
in available_formats
702 if f
. get ( 'vcodec' ) == 'none' ]
704 return audio_formats
[- 1 ]
705 elif format_spec
== 'worstaudio' :
707 f
for f
in available_formats
708 if f
. get ( 'vcodec' ) == 'none' ]
710 return audio_formats
[ 0 ]
711 elif format_spec
== 'bestvideo' :
713 f
for f
in available_formats
714 if f
. get ( 'acodec' ) == 'none' ]
716 return video_formats
[- 1 ]
717 elif format_spec
== 'worstvideo' :
719 f
for f
in available_formats
720 if f
. get ( 'acodec' ) == 'none' ]
722 return video_formats
[ 0 ]
724 extensions
= [ 'mp4' , 'flv' , 'webm' , '3gp' , 'm4a' ]
725 if format_spec
in extensions
:
726 filter_f
= lambda f
: f
[ 'ext' ] == format_spec
728 filter_f
= lambda f
: f
[ 'format_id' ] == format_spec
729 matches
= list ( filter ( filter_f
, available_formats
))
734 def process_video_result ( self
, info_dict
, download
= True ):
735 assert info_dict
. get ( '_type' , 'video' ) == 'video'
737 if 'id' not in info_dict
:
738 raise ExtractorError ( 'Missing "id" field in extractor result' )
739 if 'title' not in info_dict
:
740 raise ExtractorError ( 'Missing "title" field in extractor result' )
742 if 'playlist' not in info_dict
:
743 # It isn't part of a playlist
744 info_dict
[ 'playlist' ] = None
745 info_dict
[ 'playlist_index' ] = None
747 thumbnails
= info_dict
. get ( 'thumbnails' )
749 thumbnails
. sort ( key
= lambda t
: (
750 t
. get ( 'width' ), t
. get ( 'height' ), t
. get ( 'url' )))
752 if 'width' in t
and 'height' in t
:
753 t
[ 'resolution' ] = ' %dx%d ' % ( t
[ 'width' ], t
[ 'height' ])
755 if thumbnails
and 'thumbnail' not in info_dict
:
756 info_dict
[ 'thumbnail' ] = thumbnails
[- 1 ][ 'url' ]
758 if 'display_id' not in info_dict
and 'id' in info_dict
:
759 info_dict
[ 'display_id' ] = info_dict
[ 'id' ]
761 if info_dict
. get ( 'upload_date' ) is None and info_dict
. get ( 'timestamp' ) is not None :
762 upload_date
= datetime
. datetime
. utcfromtimestamp (
763 info_dict
[ 'timestamp' ])
764 info_dict
[ 'upload_date' ] = upload_date
. strftime ( '%Y%m %d ' )
766 # This extractors handle format selection themselves
767 if info_dict
[ 'extractor' ] in [ 'Youku' ]:
769 self
. process_info ( info_dict
)
772 # We now pick which formats have to be downloaded
773 if info_dict
. get ( 'formats' ) is None :
774 # There's only one format available
775 formats
= [ info_dict
]
777 formats
= info_dict
[ 'formats' ]
780 raise ExtractorError ( 'No video formats found!' )
782 # We check that all the formats have the format and format_id fields
783 for i
, format
in enumerate ( formats
):
784 if 'url' not in format
:
785 raise ExtractorError ( 'Missing "url" key in result (index %d )' % i
)
787 if format
. get ( 'format_id' ) is None :
788 format
[ 'format_id' ] = compat_str ( i
)
789 if format
. get ( 'format' ) is None :
790 format
[ 'format' ] = ' {id} - {res}{note} ' . format (
791 id = format
[ 'format_id' ],
792 res
= self
. format_resolution ( format
),
793 note
= ' ( {0} )' . format ( format
[ 'format_note' ]) if format
. get ( 'format_note' ) is not None else '' ,
795 # Automatically determine file extension if missing
796 if 'ext' not in format
:
797 format
[ 'ext' ] = determine_ext ( format
[ 'url' ]). lower ()
799 format_limit
= self
. params
. get ( 'format_limit' , None )
801 formats
= list ( takewhile_inclusive (
802 lambda f
: f
[ 'format_id' ] != format_limit
, formats
805 # TODO Central sorting goes here
807 if formats
[ 0 ] is not info_dict
:
808 # only set the 'formats' fields if the original info_dict list them
809 # otherwise we end up with a circular reference, the first (and unique)
810 # element in the 'formats' field in info_dict is info_dict itself,
811 # wich can't be exported to json
812 info_dict
[ 'formats' ] = formats
813 if self
. params
. get ( 'listformats' , None ):
814 self
. list_formats ( info_dict
)
817 req_format
= self
. params
. get ( 'format' )
818 if req_format
is None :
820 formats_to_download
= []
821 # The -1 is for supporting YoutubeIE
822 if req_format
in ( '-1' , 'all' ):
823 formats_to_download
= formats
825 for rfstr
in req_format
. split ( ',' ):
826 # We can accept formats requested in the format: 34/5/best, we pick
827 # the first that is available, starting from left
828 req_formats
= rfstr
. split ( '/' )
829 for rf
in req_formats
:
830 if re
. match ( r
'.+?\+.+?' , rf
) is not None :
831 # Two formats have been requested like '137+139'
832 format_1
, format_2
= rf
. split ( '+' )
833 formats_info
= ( self
. select_format ( format_1
, formats
),
834 self
. select_format ( format_2
, formats
))
835 if all ( formats_info
):
837 'requested_formats' : formats_info
,
839 'ext' : formats_info
[ 0 ][ 'ext' ],
842 selected_format
= None
844 selected_format
= self
. select_format ( rf
, formats
)
845 if selected_format
is not None :
846 formats_to_download
. append ( selected_format
)
848 if not formats_to_download
:
849 raise ExtractorError ( 'requested format not available' ,
853 if len ( formats_to_download
) > 1 :
854 self
. to_screen ( '[info] %s : downloading video in %s formats' % ( info_dict
[ 'id' ], len ( formats_to_download
)))
855 for format
in formats_to_download
:
856 new_info
= dict ( info_dict
)
857 new_info
. update ( format
)
858 self
. process_info ( new_info
)
859 # We update the info dict with the best quality format (backwards compatibility)
860 info_dict
. update ( formats_to_download
[- 1 ])
863 def process_info ( self
, info_dict
):
864 """Process a single resolved IE result."""
866 assert info_dict
. get ( '_type' , 'video' ) == 'video'
868 max_downloads
= self
. params
. get ( 'max_downloads' )
869 if max_downloads
is not None :
870 if self
._ num
_ downloads
>= int ( max_downloads
):
871 raise MaxDownloadsReached ()
873 info_dict
[ 'fulltitle' ] = info_dict
[ 'title' ]
874 if len ( info_dict
[ 'title' ]) > 200 :
875 info_dict
[ 'title' ] = info_dict
[ 'title' ][: 197 ] + '...'
877 # Keep for backwards compatibility
878 info_dict
[ 'stitle' ] = info_dict
[ 'title' ]
880 if 'format' not in info_dict
:
881 info_dict
[ 'format' ] = info_dict
[ 'ext' ]
883 reason
= self
._ match
_ entry
( info_dict
)
884 if reason
is not None :
885 self
. to_screen ( '[download] ' + reason
)
888 self
._ num
_ downloads
+= 1
890 filename
= self
. prepare_filename ( info_dict
)
893 if self
. params
. get ( 'forcetitle' , False ):
894 self
. to_stdout ( info_dict
[ 'fulltitle' ])
895 if self
. params
. get ( 'forceid' , False ):
896 self
. to_stdout ( info_dict
[ 'id' ])
897 if self
. params
. get ( 'forceurl' , False ):
898 # For RTMP URLs, also include the playpath
899 self
. to_stdout ( info_dict
[ 'url' ] + info_dict
. get ( 'play_path' , '' ))
900 if self
. params
. get ( 'forcethumbnail' , False ) and info_dict
. get ( 'thumbnail' ) is not None :
901 self
. to_stdout ( info_dict
[ 'thumbnail' ])
902 if self
. params
. get ( 'forcedescription' , False ) and info_dict
. get ( 'description' ) is not None :
903 self
. to_stdout ( info_dict
[ 'description' ])
904 if self
. params
. get ( 'forcefilename' , False ) and filename
is not None :
905 self
. to_stdout ( filename
)
906 if self
. params
. get ( 'forceduration' , False ) and info_dict
. get ( 'duration' ) is not None :
907 self
. to_stdout ( formatSeconds ( info_dict
[ 'duration' ]))
908 if self
. params
. get ( 'forceformat' , False ):
909 self
. to_stdout ( info_dict
[ 'format' ])
910 if self
. params
. get ( 'forcejson' , False ):
911 info_dict
[ '_filename' ] = filename
912 self
. to_stdout ( json
. dumps ( info_dict
))
913 if self
. params
. get ( 'dump_single_json' , False ):
914 info_dict
[ '_filename' ] = filename
916 # Do nothing else if in simulate mode
917 if self
. params
. get ( 'simulate' , False ):
924 dn
= os
. path
. dirname ( encodeFilename ( filename
))
925 if dn
and not os
. path
. exists ( dn
):
927 except ( OSError , IOError ) as err
:
928 self
. report_error ( 'unable to create directory ' + compat_str ( err
))
931 if self
. params
. get ( 'writedescription' , False ):
932 descfn
= filename
+ '.description'
933 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( descfn
)):
934 self
. to_screen ( '[info] Video description is already present' )
937 self
. to_screen ( '[info] Writing video description to: ' + descfn
)
938 with io
. open ( encodeFilename ( descfn
), 'w' , encoding
= 'utf-8' ) as descfile
:
939 descfile
. write ( info_dict
[ 'description' ])
940 except ( KeyError , TypeError ):
941 self
. report_warning ( 'There \' s no description to write.' )
942 except ( OSError , IOError ):
943 self
. report_error ( 'Cannot write description file ' + descfn
)
946 if self
. params
. get ( 'writeannotations' , False ):
947 annofn
= filename
+ '.annotations.xml'
948 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( annofn
)):
949 self
. to_screen ( '[info] Video annotations are already present' )
952 self
. to_screen ( '[info] Writing video annotations to: ' + annofn
)
953 with io
. open ( encodeFilename ( annofn
), 'w' , encoding
= 'utf-8' ) as annofile
:
954 annofile
. write ( info_dict
[ 'annotations' ])
955 except ( KeyError , TypeError ):
956 self
. report_warning ( 'There are no annotations to write.' )
957 except ( OSError , IOError ):
958 self
. report_error ( 'Cannot write annotations file: ' + annofn
)
961 subtitles_are_requested
= any ([ self
. params
. get ( 'writesubtitles' , False ),
962 self
. params
. get ( 'writeautomaticsub' )])
964 if subtitles_are_requested
and 'subtitles' in info_dict
and info_dict
[ 'subtitles' ]:
965 # subtitles download errors are already managed as troubles in relevant IE
966 # that way it will silently go on when used with unsupporting IE
967 subtitles
= info_dict
[ 'subtitles' ]
968 sub_format
= self
. params
. get ( 'subtitlesformat' , 'srt' )
969 for sub_lang
in subtitles
. keys ():
970 sub
= subtitles
[ sub_lang
]
974 sub_filename
= subtitles_filename ( filename
, sub_lang
, sub_format
)
975 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( sub_filename
)):
976 self
. to_screen ( '[info] Video subtitle %s . %s is already_present' % ( sub_lang
, sub_format
))
978 self
. to_screen ( '[info] Writing video subtitles to: ' + sub_filename
)
979 with io
. open ( encodeFilename ( sub_filename
), 'w' , encoding
= 'utf-8' ) as subfile
:
981 except ( OSError , IOError ):
982 self
. report_error ( 'Cannot write subtitles file ' + sub_filename
)
985 if self
. params
. get ( 'writeinfojson' , False ):
986 infofn
= os
. path
. splitext ( filename
)[ 0 ] + '.info.json'
987 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( infofn
)):
988 self
. to_screen ( '[info] Video description metadata is already present' )
990 self
. to_screen ( '[info] Writing video description metadata as JSON to: ' + infofn
)
992 write_json_file ( info_dict
, encodeFilename ( infofn
))
993 except ( OSError , IOError ):
994 self
. report_error ( 'Cannot write metadata to JSON file ' + infofn
)
997 if self
. params
. get ( 'writethumbnail' , False ):
998 if info_dict
. get ( 'thumbnail' ) is not None :
999 thumb_format
= determine_ext ( info_dict
[ 'thumbnail' ], 'jpg' )
1000 thumb_filename
= os
. path
. splitext ( filename
)[ 0 ] + '.' + thumb_format
1001 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( thumb_filename
)):
1002 self
. to_screen ( '[ %s ] %s : Thumbnail is already present' %
1003 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
1005 self
. to_screen ( '[ %s ] %s : Downloading thumbnail ...' %
1006 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
1008 uf
= self
. urlopen ( info_dict
[ 'thumbnail' ])
1009 with open ( thumb_filename
, 'wb' ) as thumbf
:
1010 shutil
. copyfileobj ( uf
, thumbf
)
1011 self
. to_screen ( '[ %s ] %s : Writing thumbnail to: %s ' %
1012 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ], thumb_filename
))
1013 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
1014 self
. report_warning ( 'Unable to download thumbnail " %s ": %s ' %
1015 ( info_dict
[ 'thumbnail' ], compat_str ( err
)))
1017 if not self
. params
. get ( 'skip_download' , False ):
1018 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( filename
)):
1023 fd
= get_suitable_downloader ( info
)( self
, self
. params
)
1024 for ph
in self
._ progress
_ hooks
:
1025 fd
. add_progress_hook ( ph
)
1026 if self
. params
. get ( 'verbose' ):
1027 self
. to_stdout ( '[debug] Invoking downloader on %r ' % info
. get ( 'url' ))
1028 return fd
. download ( name
, info
)
1029 if info_dict
. get ( 'requested_formats' ) is not None :
1032 merger
= FFmpegMergerPP ( self
, not self
. params
. get ( 'keepvideo' ))
1033 if not merger
._ executable
:
1035 self
. report_warning ( 'You have requested multiple '
1036 'formats but ffmpeg or avconv are not installed.'
1037 ' The formats won \' t be merged' )
1039 postprocessors
= [ merger
]
1040 for f
in info_dict
[ 'requested_formats' ]:
1041 new_info
= dict ( info_dict
)
1043 fname
= self
. prepare_filename ( new_info
)
1044 fname
= prepend_extension ( fname
, 'f %s ' % f
[ 'format_id' ])
1045 downloaded
. append ( fname
)
1046 partial_success
= dl ( fname
, new_info
)
1047 success
= success
and partial_success
1048 info_dict
[ '__postprocessors' ] = postprocessors
1049 info_dict
[ '__files_to_merge' ] = downloaded
1051 # Just a single file
1052 success
= dl ( filename
, info_dict
)
1053 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
1054 self
. report_error ( 'unable to download video data: %s ' % str ( err
))
1056 except ( OSError , IOError ) as err
:
1057 raise UnavailableVideoError ( err
)
1058 except ( ContentTooShortError
, ) as err
:
1059 self
. report_error ( 'content too short (expected %s bytes and served %s )' % ( err
. expected
, err
. downloaded
))
1064 self
. post_process ( filename
, info_dict
)
1065 except ( PostProcessingError
) as err
:
1066 self
. report_error ( 'postprocessing: %s ' % str ( err
))
1069 self
. record_download_archive ( info_dict
)
1071 def download ( self
, url_list
):
1072 """Download a given list of URLs."""
1073 outtmpl
= self
. params
. get ( 'outtmpl' , DEFAULT_OUTTMPL
)
1074 if ( len ( url_list
) > 1 and
1076 and self
. params
. get ( 'max_downloads' ) != 1 ):
1077 raise SameFileError ( outtmpl
)
1079 for url
in url_list
:
1081 #It also downloads the videos
1082 res
= self
. extract_info ( url
)
1083 except UnavailableVideoError
:
1084 self
. report_error ( 'unable to download video' )
1085 except MaxDownloadsReached
:
1086 self
. to_screen ( '[info] Maximum number of downloaded files reached.' )
1089 if self
. params
. get ( 'dump_single_json' , False ):
1090 self
. to_stdout ( json
. dumps ( res
))
1092 return self
._ download
_ retcode
1094 def download_with_info_file ( self
, info_filename
):
1095 with io
. open ( info_filename
, 'r' , encoding
= 'utf-8' ) as f
:
1098 self
. process_ie_result ( info
, download
= True )
1099 except DownloadError
:
1100 webpage_url
= info
. get ( 'webpage_url' )
1101 if webpage_url
is not None :
1102 self
. report_warning ( 'The info failed to download, trying with " %s "' % webpage_url
)
1103 return self
. download ([ webpage_url
])
1106 return self
._ download
_ retcode
1108 def post_process ( self
, filename
, ie_info
):
1109 """Run all the postprocessors on the given file."""
1110 info
= dict ( ie_info
)
1111 info
[ 'filepath' ] = filename
1114 if ie_info
. get ( '__postprocessors' ) is not None :
1115 pps_chain
. extend ( ie_info
[ '__postprocessors' ])
1116 pps_chain
. extend ( self
._ pps
)
1117 for pp
in pps_chain
:
1119 keep_video_wish
, new_info
= pp
. run ( info
)
1120 if keep_video_wish
is not None :
1122 keep_video
= keep_video_wish
1123 elif keep_video
is None :
1124 # No clear decision yet, let IE decide
1125 keep_video
= keep_video_wish
1126 except PostProcessingError
as e
:
1127 self
. report_error ( e
. msg
)
1128 if keep_video
is False and not self
. params
. get ( 'keepvideo' , False ):
1130 self
. to_screen ( 'Deleting original file %s (pass -k to keep)' % filename
)
1131 os
. remove ( encodeFilename ( filename
))
1132 except ( IOError , OSError ):
1133 self
. report_warning ( 'Unable to remove downloaded video file' )
1135 def _make_archive_id ( self
, info_dict
):
1136 # Future-proof against any change in case
1137 # and backwards compatibility with prior versions
1138 extractor
= info_dict
. get ( 'extractor_key' )
1139 if extractor
is None :
1140 if 'id' in info_dict
:
1141 extractor
= info_dict
. get ( 'ie_key' ) # key in a playlist
1142 if extractor
is None :
1143 return None # Incomplete video information
1144 return extractor
. lower () + ' ' + info_dict
[ 'id' ]
1146 def in_download_archive ( self
, info_dict
):
1147 fn
= self
. params
. get ( 'download_archive' )
1151 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1153 return False # Incomplete video information
1156 with locked_file ( fn
, 'r' , encoding
= 'utf-8' ) as archive_file
:
1157 for line
in archive_file
:
1158 if line
. strip () == vid_id
:
1160 except IOError as ioe
:
1161 if ioe
. errno
!= errno
. ENOENT
:
1165 def record_download_archive ( self
, info_dict
):
1166 fn
= self
. params
. get ( 'download_archive' )
1169 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1171 with locked_file ( fn
, 'a' , encoding
= 'utf-8' ) as archive_file
:
1172 archive_file
. write ( vid_id
+ ' \n ' )
1175 def format_resolution ( format
, default
= 'unknown' ):
1176 if format
. get ( 'vcodec' ) == 'none' :
1178 if format
. get ( 'resolution' ) is not None :
1179 return format
[ 'resolution' ]
1180 if format
. get ( 'height' ) is not None :
1181 if format
. get ( 'width' ) is not None :
1182 res
= ' %sx%s ' % ( format
[ 'width' ], format
[ 'height' ])
1184 res
= ' %s p' % format
[ 'height' ]
1185 elif format
. get ( 'width' ) is not None :
1186 res
= '?x %d ' % format
[ 'width' ]
1191 def _format_note ( self
, fdict
):
1193 if fdict
. get ( 'ext' ) in [ 'f4f' , 'f4m' ]:
1194 res
+= '(unsupported) '
1195 if fdict
. get ( 'format_note' ) is not None :
1196 res
+= fdict
[ 'format_note' ] + ' '
1197 if fdict
. get ( 'tbr' ) is not None :
1198 res
+= '%4dk ' % fdict
[ 'tbr' ]
1199 if fdict
. get ( 'container' ) is not None :
1202 res
+= ' %s container' % fdict
[ 'container' ]
1203 if ( fdict
. get ( 'vcodec' ) is not None and
1204 fdict
. get ( 'vcodec' ) != 'none' ):
1207 res
+= fdict
[ 'vcodec' ]
1208 if fdict
. get ( 'vbr' ) is not None :
1210 elif fdict
. get ( 'vbr' ) is not None and fdict
. get ( 'abr' ) is not None :
1212 if fdict
. get ( 'vbr' ) is not None :
1213 res
+= '%4dk' % fdict
[ 'vbr' ]
1214 if fdict
. get ( 'fps' ) is not None :
1215 res
+= ', %sf ps' % fdict
[ 'fps' ]
1216 if fdict
. get ( 'acodec' ) is not None :
1219 if fdict
[ 'acodec' ] == 'none' :
1222 res
+= ' %- 5s' % fdict
[ 'acodec' ]
1223 elif fdict
. get ( 'abr' ) is not None :
1227 if fdict
. get ( 'abr' ) is not None :
1228 res
+= '@%3dk' % fdict
[ 'abr' ]
1229 if fdict
. get ( 'asr' ) is not None :
1230 res
+= ' (%5dHz)' % fdict
[ 'asr' ]
1231 if fdict
. get ( 'filesize' ) is not None :
1234 res
+= format_bytes ( fdict
[ 'filesize' ])
1235 elif fdict
. get ( 'filesize_approx' ) is not None :
1238 res
+= '~' + format_bytes ( fdict
[ 'filesize_approx' ])
1241 def list_formats ( self
, info_dict
):
1242 def line ( format
, idlen
= 20 ):
1243 return (( ' %- ' + compat_str ( idlen
+ 1 ) + 's %- 10s %- 12s %s ' ) % (
1244 format
[ 'format_id' ],
1246 self
. format_resolution ( format
),
1247 self
._ format
_ note
( format
),
1250 formats
= info_dict
. get ( 'formats' , [ info_dict
])
1251 idlen
= max ( len ( 'format code' ),
1252 max ( len ( f
[ 'format_id' ]) for f
in formats
))
1253 formats_s
= [ line ( f
, idlen
) for f
in formats
]
1254 if len ( formats
) > 1 :
1255 formats_s
[ 0 ] += ( ' ' if self
._ format
_ note
( formats
[ 0 ]) else '' ) + '(worst)'
1256 formats_s
[- 1 ] += ( ' ' if self
._ format
_ note
( formats
[- 1 ]) else '' ) + '(best)'
1258 header_line
= line ({
1259 'format_id' : 'format code' , 'ext' : 'extension' ,
1260 'resolution' : 'resolution' , 'format_note' : 'note' }, idlen
= idlen
)
1261 self
. to_screen ( '[info] Available formats for %s : \n %s \n %s ' %
1262 ( info_dict
[ 'id' ], header_line
, ' \n ' . join ( formats_s
)))
1264 def urlopen ( self
, req
):
1265 """ Start an HTTP download """
1267 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1268 # always respected by websites, some tend to give out URLs with non percent-encoded
1269 # non-ASCII characters (see telemb.py, ard.py [#3412])
1270 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1271 # To work around aforementioned issue we will replace request's original URL with
1272 # percent-encoded one
1273 req_is_string
= isinstance ( req
, basestring
if sys
. version_info
< ( 3 , 0 ) else compat_str
)
1274 url
= req
if req_is_string
else req
. get_full_url ()
1275 url_escaped
= escape_url ( url
)
1277 # Substitute URL if any change after escaping
1278 if url
!= url_escaped
:
1282 req
= compat_urllib_request
. Request (
1283 url_escaped
, data
= req
. data
, headers
= req
. headers
,
1284 origin_req_host
= req
. origin_req_host
, unverifiable
= req
. unverifiable
)
1286 return self
._ opener
. open ( req
, timeout
= self
._ socket
_ timeout
)
1288 def print_debug_header ( self
):
1289 if not self
. params
. get ( 'verbose' ):
1292 if type ( '' ) is not compat_str
:
1293 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1294 self
. report_warning (
1295 'Your Python is broken! Update to a newer and supported version' )
1298 '[debug] Encodings: locale %s , fs %s , out %s , pref %s \n ' % (
1299 locale
. getpreferredencoding (),
1300 sys
. getfilesystemencoding (),
1301 sys
. stdout
. encoding
,
1302 self
. get_encoding ()))
1303 write_string ( encoding_str
, encoding
= None )
1305 self
._ write
_ string
( '[debug] youtube-dl version ' + __version__
+ ' \n ' )
1307 sp
= subprocess
. Popen (
1308 [ 'git' , 'rev-parse' , '--short' , 'HEAD' ],
1309 stdout
= subprocess
. PIPE
, stderr
= subprocess
. PIPE
,
1310 cwd
= os
. path
. dirname ( os
. path
. abspath ( __file__
)))
1311 out
, err
= sp
. communicate ()
1312 out
= out
. decode (). strip ()
1313 if re
. match ( '[0-9a-f]+' , out
):
1314 self
._ write
_ string
( '[debug] Git HEAD: ' + out
+ ' \n ' )
1320 self
._ write
_ string
( '[debug] Python version %s - %s \n ' % (
1321 platform
. python_version (), platform_name ()))
1323 exe_versions
= FFmpegPostProcessor
. get_versions ()
1324 exe_str
= ', ' . join (
1326 for exe
, v
in sorted ( exe_versions
. items ())
1331 self
._ write
_ string
( '[debug] exe versions: %s \n ' % exe_str
)
1334 for handler
in self
._ opener
. handlers
:
1335 if hasattr ( handler
, 'proxies' ):
1336 proxy_map
. update ( handler
. proxies
)
1337 self
._ write
_ string
( '[debug] Proxy map: ' + compat_str ( proxy_map
) + ' \n ' )
1339 def _setup_opener ( self
):
1340 timeout_val
= self
. params
. get ( 'socket_timeout' )
1341 self
._ socket
_ timeout
= 600 if timeout_val
is None else float ( timeout_val
)
1343 opts_cookiefile
= self
. params
. get ( 'cookiefile' )
1344 opts_proxy
= self
. params
. get ( 'proxy' )
1346 if opts_cookiefile
is None :
1347 self
. cookiejar
= compat_cookiejar
. CookieJar ()
1349 self
. cookiejar
= compat_cookiejar
. MozillaCookieJar (
1351 if os
. access ( opts_cookiefile
, os
. R_OK
):
1352 self
. cookiejar
. load ()
1354 cookie_processor
= compat_urllib_request
. HTTPCookieProcessor (
1356 if opts_proxy
is not None :
1357 if opts_proxy
== '' :
1360 proxies
= { 'http' : opts_proxy
, 'https' : opts_proxy
}
1362 proxies
= compat_urllib_request
. getproxies ()
1363 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1364 if 'http' in proxies
and 'https' not in proxies
:
1365 proxies
[ 'https' ] = proxies
[ 'http' ]
1366 proxy_handler
= compat_urllib_request
. ProxyHandler ( proxies
)
1368 debuglevel
= 1 if self
. params
. get ( 'debug_printtraffic' ) else 0
1369 https_handler
= make_HTTPS_handler (
1370 self
. params
. get ( 'nocheckcertificate' , False ), debuglevel
= debuglevel
)
1371 ydlh
= YoutubeDLHandler ( debuglevel
= debuglevel
)
1372 opener
= compat_urllib_request
. build_opener (
1373 https_handler
, proxy_handler
, cookie_processor
, ydlh
)
1374 # Delete the default user-agent header, which would otherwise apply in
1375 # cases where our custom HTTP handler doesn't come into play
1376 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1377 opener
. addheaders
= []
1378 self
._ opener
= opener
1380 def encode ( self
, s
):
1381 if isinstance ( s
, bytes ):
1382 return s
# Already encoded
1385 return s
. encode ( self
. get_encoding ())
1386 except UnicodeEncodeError as err
:
1387 err
. reason
= err
. reason
+ '. Check your system encoding configuration or use the --encoding option.'
1390 def get_encoding ( self
):
1391 encoding
= self
. params
. get ( 'encoding' )
1392 if encoding
is None :
1393 encoding
= preferredencoding ()