]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
, unicode_literals
30 compat_urllib_request
,
53 UnavailableVideoError
,
60 from . extractor
import get_info_extractor
, gen_extractors
61 from . downloader
import get_suitable_downloader
62 from . postprocessor
import FFmpegMergerPP
63 from . version
import __version__
66 class YoutubeDL ( object ):
69 YoutubeDL objects are the ones responsible of downloading the
70 actual video file and writing it to disk if the user has requested
71 it, among some other tasks. In most cases there should be one per
72 program. As, given a video URL, the downloader doesn't know how to
73 extract all the needed information, task that InfoExtractors do, it
74 has to pass the URL to one of them.
76 For this, YoutubeDL objects have a method that allows
77 InfoExtractors to be registered in a given order. When it is passed
78 a URL, the YoutubeDL object handles it to the first InfoExtractor it
79 finds that reports being able to handle it. The InfoExtractor extracts
80 all the information about the video or videos the URL refers to, and
81 YoutubeDL process the extracted information, possibly using a File
82 Downloader to download the video.
84 YoutubeDL objects accept a lot of parameters. In order not to saturate
85 the object constructor with arguments, it receives a dictionary of
86 options instead. These options are available through the params
87 attribute for the InfoExtractors to use. The YoutubeDL also
88 registers itself as the downloader in charge for the InfoExtractors
89 that are added to it, so this is a "mutual registration".
93 username: Username for authentication purposes.
94 password: Password for authentication purposes.
95 videopassword: Password for acces a video.
96 usenetrc: Use netrc for authentication instead.
97 verbose: Print additional info to stdout.
98 quiet: Do not print messages to stdout.
99 no_warnings: Do not print out anything for warnings.
100 forceurl: Force printing final URL.
101 forcetitle: Force printing title.
102 forceid: Force printing ID.
103 forcethumbnail: Force printing thumbnail URL.
104 forcedescription: Force printing description.
105 forcefilename: Force printing final filename.
106 forceduration: Force printing duration.
107 forcejson: Force printing info_dict as JSON.
108 simulate: Do not download the video files.
109 format: Video format code.
110 format_limit: Highest quality format to try.
111 outtmpl: Template for output names.
112 restrictfilenames: Do not allow "&" and spaces in file names
113 ignoreerrors: Do not stop on download errors.
114 nooverwrites: Prevent overwriting files.
115 playliststart: Playlist item to start at.
116 playlistend: Playlist item to end at.
117 matchtitle: Download only matching titles.
118 rejecttitle: Reject downloads for matching titles.
119 logger: Log messages to a logging.Logger instance.
120 logtostderr: Log messages to stderr instead of stdout.
121 writedescription: Write the video description to a .description file
122 writeinfojson: Write the video description to a .info.json file
123 writeannotations: Write the video annotations to a .annotations.xml file
124 writethumbnail: Write the thumbnail image to a file
125 writesubtitles: Write the video subtitles to a file
126 writeautomaticsub: Write the automatic subtitles to a file
127 allsubtitles: Downloads all the subtitles of the video
128 (requires writesubtitles or writeautomaticsub)
129 listsubtitles: Lists all available subtitles for the video
130 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
131 subtitleslangs: List of languages of the subtitles to download
132 keepvideo: Keep the video file after post-processing
133 daterange: A DateRange object, download only if the upload_date is in the range.
134 skip_download: Skip the actual download of the video file
135 cachedir: Location of the cache files in the filesystem.
136 None to disable filesystem cache.
137 noplaylist: Download single video instead of a playlist if in doubt.
138 age_limit: An integer representing the user's age in years.
139 Unsuitable videos for the given age are skipped.
140 min_views: An integer representing the minimum view count the video
141 must have in order to not be skipped.
142 Videos without view count information are always
143 downloaded. None for no limit.
144 max_views: An integer representing the maximum view count.
145 Videos that are more popular than that are not
147 Videos without view count information are always
148 downloaded. None for no limit.
149 download_archive: File name of a file where all downloads are recorded.
150 Videos already present in the file are not downloaded
152 cookiefile: File name where cookies should be read from and dumped to.
153 nocheckcertificate:Do not verify SSL certificates
154 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
155 At the moment, this is only supported by YouTube.
156 proxy: URL of the proxy server to use
157 socket_timeout: Time to wait for unresponsive hosts, in seconds
158 bidi_workaround: Work around buggy terminals without bidirectional text
159 support, using fridibi
160 debug_printtraffic:Print out sent and received HTTP traffic
161 include_ads: Download ads as well
162 default_search: Prepend this string if an input url is not valid.
163 'auto' for elaborate guessing
164 encoding: Use this encoding instead of the system-specified.
166 The following parameters are not used by YoutubeDL itself, they are used by
168 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
169 noresizebuffer, retries, continuedl, noprogress, consoletitle
171 The following options are used by the post processors:
172 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
173 otherwise prefer avconv.
179 _download_retcode
= None
180 _num_downloads
= None
183 def __init__ ( self
, params
= None ):
184 """Create a FileDownloader object with the given options."""
188 self
._ ies
_ instances
= {}
190 self
._ progress
_ hooks
= []
191 self
._ download
_ retcode
= 0
192 self
._ num
_ downloads
= 0
193 self
._ screen
_ file
= [ sys
. stdout
, sys
. stderr
][ params
. get ( 'logtostderr' , False )]
194 self
._ err
_ file
= sys
. stderr
197 if params
. get ( 'bidi_workaround' , False ):
200 master
, slave
= pty
. openpty ()
201 width
= get_term_width ()
205 width_args
= [ '-w' , str ( width
)]
207 stdin
= subprocess
. PIPE
,
209 stderr
= self
._ err
_ file
)
211 self
._ output
_ process
= subprocess
. Popen (
212 [ 'bidiv' ] + width_args
, ** sp_kwargs
215 self
._ output
_ process
= subprocess
. Popen (
216 [ 'fribidi' , '-c' , 'UTF-8' ] + width_args
, ** sp_kwargs
)
217 self
._ output
_ channel
= os
. fdopen ( master
, 'rb' )
218 except OSError as ose
:
220 self
. report_warning ( 'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.' )
224 if ( sys
. version_info
>= ( 3 ,) and sys
. platform
!= 'win32' and
225 sys
. getfilesystemencoding () in [ 'ascii' , 'ANSI_X3.4-1968' ]
226 and not params
[ 'restrictfilenames' ]):
227 # On Python 3, the Unicode filesystem API will throw errors (#1474)
229 'Assuming --restrict-filenames since file system encoding '
230 'cannot encode all charactes. '
231 'Set the LC_ALL environment variable to fix this.' )
232 self
. params
[ 'restrictfilenames' ] = True
234 if ' %(stitle)s ' in self
. params
. get ( 'outtmpl' , '' ):
235 self
. report_warning ( ' %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.' )
239 def add_info_extractor ( self
, ie
):
240 """Add an InfoExtractor object to the end of the list."""
242 self
._ ies
_ instances
[ ie
. ie_key ()] = ie
243 ie
. set_downloader ( self
)
245 def get_info_extractor ( self
, ie_key
):
247 Get an instance of an IE with name ie_key, it will try to get one from
248 the _ies list, if there's no instance it will create a new one and add
249 it to the extractor list.
251 ie
= self
._ ies
_ instances
. get ( ie_key
)
253 ie
= get_info_extractor ( ie_key
)()
254 self
. add_info_extractor ( ie
)
257 def add_default_info_extractors ( self
):
259 Add the InfoExtractors returned by gen_extractors to the end of the list
261 for ie
in gen_extractors ():
262 self
. add_info_extractor ( ie
)
264 def add_post_processor ( self
, pp
):
265 """Add a PostProcessor object to the end of the chain."""
267 pp
. set_downloader ( self
)
269 def add_progress_hook ( self
, ph
):
270 """Add the progress hook (currently only for the file downloader)"""
271 self
._ progress
_ hooks
. append ( ph
)
273 def _bidi_workaround ( self
, message
):
274 if not hasattr ( self
, '_output_channel' ):
277 assert hasattr ( self
, '_output_process' )
278 assert isinstance ( message
, compat_str
)
279 line_count
= message
. count ( ' \n ' ) + 1
280 self
._ output
_ process
. stdin
. write (( message
+ ' \n ' ). encode ( 'utf-8' ))
281 self
._ output
_ process
. stdin
. flush ()
282 res
= '' . join ( self
._ output
_ channel
. readline (). decode ( 'utf-8' )
283 for _
in range ( line_count
))
284 return res
[:- len ( ' \n ' )]
286 def to_screen ( self
, message
, skip_eol
= False ):
287 """Print message to stdout if not in quiet mode."""
288 return self
. to_stdout ( message
, skip_eol
, check_quiet
= True )
290 def _write_string ( self
, s
, out
= None ):
291 write_string ( s
, out
= out
, encoding
= self
. params
. get ( 'encoding' ))
293 def to_stdout ( self
, message
, skip_eol
= False , check_quiet
= False ):
294 """Print message to stdout if not in quiet mode."""
295 if self
. params
. get ( 'logger' ):
296 self
. params
[ 'logger' ]. debug ( message
)
297 elif not check_quiet
or not self
. params
. get ( 'quiet' , False ):
298 message
= self
._ bidi
_ workaround
( message
)
299 terminator
= [ ' \n ' , '' ][ skip_eol
]
300 output
= message
+ terminator
302 self
._ write
_ string
( output
, self
._ screen
_ file
)
304 def to_stderr ( self
, message
):
305 """Print message to stderr."""
306 assert isinstance ( message
, compat_str
)
307 if self
. params
. get ( 'logger' ):
308 self
. params
[ 'logger' ]. error ( message
)
310 message
= self
._ bidi
_ workaround
( message
)
311 output
= message
+ ' \n '
312 self
._ write
_ string
( output
, self
._ err
_ file
)
314 def to_console_title ( self
, message
):
315 if not self
. params
. get ( 'consoletitle' , False ):
317 if os
. name
== 'nt' and ctypes
. windll
. kernel32
. GetConsoleWindow ():
318 # c_wchar_p() might not be necessary if `message` is
319 # already of type unicode()
320 ctypes
. windll
. kernel32
. SetConsoleTitleW ( ctypes
. c_wchar_p ( message
))
321 elif 'TERM' in os
. environ
:
322 self
._ write
_ string
( ' \033 ]0; %s \007 ' % message
, self
._ screen
_ file
)
324 def save_console_title ( self
):
325 if not self
. params
. get ( 'consoletitle' , False ):
327 if 'TERM' in os
. environ
:
328 # Save the title on stack
329 self
._ write
_ string
( ' \033 [22;0t' , self
._ screen
_ file
)
331 def restore_console_title ( self
):
332 if not self
. params
. get ( 'consoletitle' , False ):
334 if 'TERM' in os
. environ
:
335 # Restore the title from stack
336 self
._ write
_ string
( ' \033 [23;0t' , self
._ screen
_ file
)
339 self
. save_console_title ()
342 def __exit__ ( self
, * args
):
343 self
. restore_console_title ()
345 if self
. params
. get ( 'cookiefile' ) is not None :
346 self
. cookiejar
. save ()
348 def trouble ( self
, message
= None , tb
= None ):
349 """Determine action to take when a download problem appears.
351 Depending on if the downloader has been configured to ignore
352 download errors or not, this method may throw an exception or
353 not when errors are found, after printing the message.
355 tb, if given, is additional traceback information.
357 if message
is not None :
358 self
. to_stderr ( message
)
359 if self
. params
. get ( 'verbose' ):
361 if sys
. exc_info ()[ 0 ]: # if .trouble has been called from an except block
363 if hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
364 tb
+= '' . join ( traceback
. format_exception (* sys
. exc_info ()[ 1 ]. exc_info
))
365 tb
+= compat_str ( traceback
. format_exc ())
367 tb_data
= traceback
. format_list ( traceback
. extract_stack ())
368 tb
= '' . join ( tb_data
)
370 if not self
. params
. get ( 'ignoreerrors' , False ):
371 if sys
. exc_info ()[ 0 ] and hasattr ( sys
. exc_info ()[ 1 ], 'exc_info' ) and sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:
372 exc_info
= sys
. exc_info ()[ 1 ]. exc_info
374 exc_info
= sys
. exc_info ()
375 raise DownloadError ( message
, exc_info
)
376 self
._ download
_ retcode
= 1
378 def report_warning ( self
, message
):
380 Print the message to stderr, it will be prefixed with 'WARNING:'
381 If stderr is a tty file the 'WARNING:' will be colored
383 if self
. params
. get ( 'logger' ) is not None :
384 self
. params
[ 'logger' ]. warning ( message
)
386 if self
. params
. get ( 'no_warnings' ):
388 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
389 _msg_header
= ' \033 [0;33mWARNING: \033 [0m'
391 _msg_header
= 'WARNING:'
392 warning_message
= ' %s %s ' % ( _msg_header
, message
)
393 self
. to_stderr ( warning_message
)
395 def report_error ( self
, message
, tb
= None ):
397 Do the same as trouble, but prefixes the message with 'ERROR:', colored
398 in red if stderr is a tty file.
400 if self
._ err
_ file
. isatty () and os
. name
!= 'nt' :
401 _msg_header
= ' \033 [0;31mERROR: \033 [0m'
403 _msg_header
= 'ERROR:'
404 error_message
= ' %s %s ' % ( _msg_header
, message
)
405 self
. trouble ( error_message
, tb
)
407 def report_file_already_downloaded ( self
, file_name
):
408 """Report file has already been fully downloaded."""
410 self
. to_screen ( '[download] %s has already been downloaded' % file_name
)
411 except UnicodeEncodeError :
412 self
. to_screen ( '[download] The file has already been downloaded' )
414 def prepare_filename ( self
, info_dict
):
415 """Generate the output filename."""
417 template_dict
= dict ( info_dict
)
419 template_dict
[ 'epoch' ] = int ( time
. time ())
420 autonumber_size
= self
. params
. get ( 'autonumber_size' )
421 if autonumber_size
is None :
423 autonumber_templ
= ' %0 ' + str ( autonumber_size
) + 'd'
424 template_dict
[ 'autonumber' ] = autonumber_templ
% self
._ num
_ downloads
425 if template_dict
. get ( 'playlist_index' ) is not None :
426 template_dict
[ 'playlist_index' ] = ' %0 5d' % template_dict
[ 'playlist_index' ]
427 if template_dict
. get ( 'resolution' ) is None :
428 if template_dict
. get ( 'width' ) and template_dict
. get ( 'height' ):
429 template_dict
[ 'resolution' ] = ' %dx%d ' % ( template_dict
[ 'width' ], template_dict
[ 'height' ])
430 elif template_dict
. get ( 'height' ):
431 template_dict
[ 'resolution' ] = ' %s p' % template_dict
[ 'height' ]
432 elif template_dict
. get ( 'width' ):
433 template_dict
[ 'resolution' ] = '?x %d ' % template_dict
[ 'width' ]
435 sanitize
= lambda k
, v
: sanitize_filename (
437 restricted
= self
. params
. get ( 'restrictfilenames' ),
439 template_dict
= dict (( k
, sanitize ( k
, v
))
440 for k
, v
in template_dict
. items ()
442 template_dict
= collections
. defaultdict ( lambda : 'NA' , template_dict
)
444 outtmpl
= self
. params
. get ( 'outtmpl' , DEFAULT_OUTTMPL
)
445 tmpl
= os
. path
. expanduser ( outtmpl
)
446 filename
= tmpl
% template_dict
448 except ValueError as err
:
449 self
. report_error ( 'Error in output template: ' + str ( err
) + ' (encoding: ' + repr ( preferredencoding ()) + ')' )
452 def _match_entry ( self
, info_dict
):
453 """ Returns None iff the file should be downloaded """
455 video_title
= info_dict
. get ( 'title' , info_dict
. get ( 'id' , 'video' ))
456 if 'title' in info_dict
:
457 # This can happen when we're just evaluating the playlist
458 title
= info_dict
[ 'title' ]
459 matchtitle
= self
. params
. get ( 'matchtitle' , False )
461 if not re
. search ( matchtitle
, title
, re
. IGNORECASE
):
462 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
463 rejecttitle
= self
. params
. get ( 'rejecttitle' , False )
465 if re
. search ( rejecttitle
, title
, re
. IGNORECASE
):
466 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
467 date
= info_dict
. get ( 'upload_date' , None )
469 dateRange
= self
. params
. get ( 'daterange' , DateRange ())
470 if date
not in dateRange
:
471 return ' %s upload date is not in range %s ' % ( date_from_str ( date
). isoformat (), dateRange
)
472 view_count
= info_dict
. get ( 'view_count' , None )
473 if view_count
is not None :
474 min_views
= self
. params
. get ( 'min_views' )
475 if min_views
is not None and view_count
< min_views
:
476 return 'Skipping %s , because it has not reached minimum view count ( %d / %d )' % ( video_title
, view_count
, min_views
)
477 max_views
= self
. params
. get ( 'max_views' )
478 if max_views
is not None and view_count
> max_views
:
479 return 'Skipping %s , because it has exceeded the maximum view count ( %d / %d )' % ( video_title
, view_count
, max_views
)
480 age_limit
= self
. params
. get ( 'age_limit' )
481 if age_limit
is not None :
482 if age_limit
< info_dict
. get ( 'age_limit' , 0 ):
483 return 'Skipping "' + title
+ '" because it is age restricted'
484 if self
. in_download_archive ( info_dict
):
485 return ' %s has already been recorded in archive' % video_title
489 def add_extra_info ( info_dict
, extra_info
):
490 '''Set the keys from extra_info in info dict if they are missing'''
491 for key
, value
in extra_info
. items ():
492 info_dict
. setdefault ( key
, value
)
494 def extract_info ( self
, url
, download
= True , ie_key
= None , extra_info
={},
497 Returns a list with a dictionary for each video we find.
498 If 'download', also downloads the videos.
499 extra_info is a dict containing the extra values to add to each result
503 ies
= [ self
. get_info_extractor ( ie_key
)]
508 if not ie
. suitable ( url
):
512 self
. report_warning ( 'The program functionality for this site has been marked as broken, '
513 'and will probably not work.' )
516 ie_result
= ie
. extract ( url
)
517 if ie_result
is None : # Finished already (backwards compatibility; listformats and friends should be moved here)
519 if isinstance ( ie_result
, list ):
520 # Backwards compatibility: old IE result format
522 '_type' : 'compat_list' ,
523 'entries' : ie_result
,
525 self
. add_default_extra_info ( ie_result
, ie
, url
)
527 return self
. process_ie_result ( ie_result
, download
, extra_info
)
530 except ExtractorError
as de
: # An error we somewhat expected
531 self
. report_error ( compat_str ( de
), de
. format_traceback ())
533 except MaxDownloadsReached
:
535 except Exception as e
:
536 if self
. params
. get ( 'ignoreerrors' , False ):
537 self
. report_error ( compat_str ( e
), tb
= compat_str ( traceback
. format_exc ()))
542 self
. report_error ( 'no suitable InfoExtractor for URL %s ' % url
)
544 def add_default_extra_info ( self
, ie_result
, ie
, url
):
545 self
. add_extra_info ( ie_result
, {
546 'extractor' : ie
. IE_NAME
,
548 'webpage_url_basename' : url_basename ( url
),
549 'extractor_key' : ie
. ie_key (),
552 def process_ie_result ( self
, ie_result
, download
= True , extra_info
={}):
554 Take the result of the ie(may be modified) and resolve all unresolved
555 references (URLs, playlist items).
557 It will also download the videos if 'download'.
558 Returns the resolved ie_result.
561 result_type
= ie_result
. get ( '_type' , 'video' ) # If not given we suppose it's a video, support the default old system
562 if result_type
== 'video' :
563 self
. add_extra_info ( ie_result
, extra_info
)
564 return self
. process_video_result ( ie_result
, download
= download
)
565 elif result_type
== 'url' :
566 # We have to add extra_info to the results because it may be
567 # contained in a playlist
568 return self
. extract_info ( ie_result
[ 'url' ],
570 ie_key
= ie_result
. get ( 'ie_key' ),
571 extra_info
= extra_info
)
572 elif result_type
== 'url_transparent' :
573 # Use the information from the embedding page
574 info
= self
. extract_info (
575 ie_result
[ 'url' ], ie_key
= ie_result
. get ( 'ie_key' ),
576 extra_info
= extra_info
, download
= False , process
= False )
578 def make_result ( embedded_info
):
579 new_result
= ie_result
. copy ()
580 for f
in ( '_type' , 'url' , 'ext' , 'player_url' , 'formats' ,
581 'entries' , 'ie_key' , 'duration' ,
582 'subtitles' , 'annotations' , 'format' ,
583 'thumbnail' , 'thumbnails' ):
586 if f
in embedded_info
:
587 new_result
[ f
] = embedded_info
[ f
]
589 new_result
= make_result ( info
)
591 assert new_result
. get ( '_type' ) != 'url_transparent'
592 if new_result
. get ( '_type' ) == 'compat_list' :
593 new_result
[ 'entries' ] = [
594 make_result ( e
) for e
in new_result
[ 'entries' ]]
596 return self
. process_ie_result (
597 new_result
, download
= download
, extra_info
= extra_info
)
598 elif result_type
== 'playlist' :
599 # We process each entry in the playlist
600 playlist
= ie_result
. get ( 'title' , None ) or ie_result
. get ( 'id' , None )
601 self
. to_screen ( '[download] Downloading playlist: %s ' % playlist
)
603 playlist_results
= []
605 playliststart
= self
. params
. get ( 'playliststart' , 1 ) - 1
606 playlistend
= self
. params
. get ( 'playlistend' , None )
607 # For backwards compatibility, interpret -1 as whole list
608 if playlistend
== - 1 :
611 if isinstance ( ie_result
[ 'entries' ], list ):
612 n_all_entries
= len ( ie_result
[ 'entries' ])
613 entries
= ie_result
[ 'entries' ][ playliststart
: playlistend
]
614 n_entries
= len ( entries
)
616 "[ %s ] playlist %s : Collected %d video ids (downloading %d of them)" %
617 ( ie_result
[ 'extractor' ], playlist
, n_all_entries
, n_entries
))
619 assert isinstance ( ie_result
[ 'entries' ], PagedList
)
620 entries
= ie_result
[ 'entries' ]. getslice (
621 playliststart
, playlistend
)
622 n_entries
= len ( entries
)
624 "[ %s ] playlist %s : Downloading %d videos" %
625 ( ie_result
[ 'extractor' ], playlist
, n_entries
))
627 for i
, entry
in enumerate ( entries
, 1 ):
628 self
. to_screen ( '[download] Downloading video # %s of %s ' % ( i
, n_entries
))
630 'playlist' : playlist
,
631 'playlist_index' : i
+ playliststart
,
632 'extractor' : ie_result
[ 'extractor' ],
633 'webpage_url' : ie_result
[ 'webpage_url' ],
634 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
635 'extractor_key' : ie_result
[ 'extractor_key' ],
638 reason
= self
._ match
_ entry
( entry
)
639 if reason
is not None :
640 self
. to_screen ( '[download] ' + reason
)
643 entry_result
= self
. process_ie_result ( entry
,
646 playlist_results
. append ( entry_result
)
647 ie_result
[ 'entries' ] = playlist_results
649 elif result_type
== 'compat_list' :
651 self
. add_extra_info ( r
,
653 'extractor' : ie_result
[ 'extractor' ],
654 'webpage_url' : ie_result
[ 'webpage_url' ],
655 'webpage_url_basename' : url_basename ( ie_result
[ 'webpage_url' ]),
656 'extractor_key' : ie_result
[ 'extractor_key' ],
659 ie_result
[ 'entries' ] = [
660 self
. process_ie_result ( _fixup ( r
), download
, extra_info
)
661 for r
in ie_result
[ 'entries' ]
665 raise Exception ( 'Invalid result type: %s ' % result_type
)
667 def select_format ( self
, format_spec
, available_formats
):
668 if format_spec
== 'best' or format_spec
is None :
669 return available_formats
[- 1 ]
670 elif format_spec
== 'worst' :
671 return available_formats
[ 0 ]
672 elif format_spec
== 'bestaudio' :
674 f
for f
in available_formats
675 if f
. get ( 'vcodec' ) == 'none' ]
677 return audio_formats
[- 1 ]
678 elif format_spec
== 'worstaudio' :
680 f
for f
in available_formats
681 if f
. get ( 'vcodec' ) == 'none' ]
683 return audio_formats
[ 0 ]
684 elif format_spec
== 'bestvideo' :
686 f
for f
in available_formats
687 if f
. get ( 'acodec' ) == 'none' ]
689 return video_formats
[- 1 ]
690 elif format_spec
== 'worstvideo' :
692 f
for f
in available_formats
693 if f
. get ( 'acodec' ) == 'none' ]
695 return video_formats
[ 0 ]
697 extensions
= [ 'mp4' , 'flv' , 'webm' , '3gp' ]
698 if format_spec
in extensions
:
699 filter_f
= lambda f
: f
[ 'ext' ] == format_spec
701 filter_f
= lambda f
: f
[ 'format_id' ] == format_spec
702 matches
= list ( filter ( filter_f
, available_formats
))
707 def process_video_result ( self
, info_dict
, download
= True ):
708 assert info_dict
. get ( '_type' , 'video' ) == 'video'
710 if 'id' not in info_dict
:
711 raise ExtractorError ( 'Missing "id" field in extractor result' )
712 if 'title' not in info_dict
:
713 raise ExtractorError ( 'Missing "title" field in extractor result' )
715 if 'playlist' not in info_dict
:
716 # It isn't part of a playlist
717 info_dict
[ 'playlist' ] = None
718 info_dict
[ 'playlist_index' ] = None
720 thumbnails
= info_dict
. get ( 'thumbnails' )
722 thumbnails
. sort ( key
= lambda t
: (
723 t
. get ( 'width' ), t
. get ( 'height' ), t
. get ( 'url' )))
725 if 'width' in t
and 'height' in t
:
726 t
[ 'resolution' ] = ' %dx%d ' % ( t
[ 'width' ], t
[ 'height' ])
728 if thumbnails
and 'thumbnail' not in info_dict
:
729 info_dict
[ 'thumbnail' ] = thumbnails
[- 1 ][ 'url' ]
731 if 'display_id' not in info_dict
and 'id' in info_dict
:
732 info_dict
[ 'display_id' ] = info_dict
[ 'id' ]
734 if info_dict
. get ( 'upload_date' ) is None and info_dict
. get ( 'timestamp' ) is not None :
735 upload_date
= datetime
. datetime
. utcfromtimestamp (
736 info_dict
[ 'timestamp' ])
737 info_dict
[ 'upload_date' ] = upload_date
. strftime ( '%Y%m %d ' )
739 # This extractors handle format selection themselves
740 if info_dict
[ 'extractor' ] in [ 'Youku' ]:
742 self
. process_info ( info_dict
)
745 # We now pick which formats have to be downloaded
746 if info_dict
. get ( 'formats' ) is None :
747 # There's only one format available
748 formats
= [ info_dict
]
750 formats
= info_dict
[ 'formats' ]
753 raise ExtractorError ( 'No video formats found!' )
755 # We check that all the formats have the format and format_id fields
756 for i
, format
in enumerate ( formats
):
757 if 'url' not in format
:
758 raise ExtractorError ( 'Missing "url" key in result (index %d )' % i
)
760 if format
. get ( 'format_id' ) is None :
761 format
[ 'format_id' ] = compat_str ( i
)
762 if format
. get ( 'format' ) is None :
763 format
[ 'format' ] = ' {id} - {res}{note} ' . format (
764 id = format
[ 'format_id' ],
765 res
= self
. format_resolution ( format
),
766 note
= ' ( {0} )' . format ( format
[ 'format_note' ]) if format
. get ( 'format_note' ) is not None else '' ,
768 # Automatically determine file extension if missing
769 if 'ext' not in format
:
770 format
[ 'ext' ] = determine_ext ( format
[ 'url' ]). lower ()
772 format_limit
= self
. params
. get ( 'format_limit' , None )
774 formats
= list ( takewhile_inclusive (
775 lambda f
: f
[ 'format_id' ] != format_limit
, formats
778 # TODO Central sorting goes here
780 if formats
[ 0 ] is not info_dict
:
781 # only set the 'formats' fields if the original info_dict list them
782 # otherwise we end up with a circular reference, the first (and unique)
783 # element in the 'formats' field in info_dict is info_dict itself,
784 # wich can't be exported to json
785 info_dict
[ 'formats' ] = formats
786 if self
. params
. get ( 'listformats' , None ):
787 self
. list_formats ( info_dict
)
790 req_format
= self
. params
. get ( 'format' )
791 if req_format
is None :
793 formats_to_download
= []
794 # The -1 is for supporting YoutubeIE
795 if req_format
in ( '-1' , 'all' ):
796 formats_to_download
= formats
798 # We can accept formats requested in the format: 34/5/best, we pick
799 # the first that is available, starting from left
800 req_formats
= req_format
. split ( '/' )
801 for rf
in req_formats
:
802 if re
. match ( r
'.+?\+.+?' , rf
) is not None :
803 # Two formats have been requested like '137+139'
804 format_1
, format_2
= rf
. split ( '+' )
805 formats_info
= ( self
. select_format ( format_1
, formats
),
806 self
. select_format ( format_2
, formats
))
807 if all ( formats_info
):
809 'requested_formats' : formats_info
,
811 'ext' : formats_info
[ 0 ][ 'ext' ],
814 selected_format
= None
816 selected_format
= self
. select_format ( rf
, formats
)
817 if selected_format
is not None :
818 formats_to_download
= [ selected_format
]
820 if not formats_to_download
:
821 raise ExtractorError ( 'requested format not available' ,
825 if len ( formats_to_download
) > 1 :
826 self
. to_screen ( '[info] %s : downloading video in %s formats' % ( info_dict
[ 'id' ], len ( formats_to_download
)))
827 for format
in formats_to_download
:
828 new_info
= dict ( info_dict
)
829 new_info
. update ( format
)
830 self
. process_info ( new_info
)
831 # We update the info dict with the best quality format (backwards compatibility)
832 info_dict
. update ( formats_to_download
[- 1 ])
835 def process_info ( self
, info_dict
):
836 """Process a single resolved IE result."""
838 assert info_dict
. get ( '_type' , 'video' ) == 'video'
840 max_downloads
= self
. params
. get ( 'max_downloads' )
841 if max_downloads
is not None :
842 if self
._ num
_ downloads
>= int ( max_downloads
):
843 raise MaxDownloadsReached ()
845 info_dict
[ 'fulltitle' ] = info_dict
[ 'title' ]
846 if len ( info_dict
[ 'title' ]) > 200 :
847 info_dict
[ 'title' ] = info_dict
[ 'title' ][: 197 ] + '...'
849 # Keep for backwards compatibility
850 info_dict
[ 'stitle' ] = info_dict
[ 'title' ]
852 if 'format' not in info_dict
:
853 info_dict
[ 'format' ] = info_dict
[ 'ext' ]
855 reason
= self
._ match
_ entry
( info_dict
)
856 if reason
is not None :
857 self
. to_screen ( '[download] ' + reason
)
860 self
._ num
_ downloads
+= 1
862 filename
= self
. prepare_filename ( info_dict
)
865 if self
. params
. get ( 'forcetitle' , False ):
866 self
. to_stdout ( info_dict
[ 'fulltitle' ])
867 if self
. params
. get ( 'forceid' , False ):
868 self
. to_stdout ( info_dict
[ 'id' ])
869 if self
. params
. get ( 'forceurl' , False ):
870 # For RTMP URLs, also include the playpath
871 self
. to_stdout ( info_dict
[ 'url' ] + info_dict
. get ( 'play_path' , '' ))
872 if self
. params
. get ( 'forcethumbnail' , False ) and info_dict
. get ( 'thumbnail' ) is not None :
873 self
. to_stdout ( info_dict
[ 'thumbnail' ])
874 if self
. params
. get ( 'forcedescription' , False ) and info_dict
. get ( 'description' ) is not None :
875 self
. to_stdout ( info_dict
[ 'description' ])
876 if self
. params
. get ( 'forcefilename' , False ) and filename
is not None :
877 self
. to_stdout ( filename
)
878 if self
. params
. get ( 'forceduration' , False ) and info_dict
. get ( 'duration' ) is not None :
879 self
. to_stdout ( formatSeconds ( info_dict
[ 'duration' ]))
880 if self
. params
. get ( 'forceformat' , False ):
881 self
. to_stdout ( info_dict
[ 'format' ])
882 if self
. params
. get ( 'forcejson' , False ):
883 info_dict
[ '_filename' ] = filename
884 self
. to_stdout ( json
. dumps ( info_dict
))
886 # Do nothing else if in simulate mode
887 if self
. params
. get ( 'simulate' , False ):
894 dn
= os
. path
. dirname ( encodeFilename ( filename
))
895 if dn
and not os
. path
. exists ( dn
):
897 except ( OSError , IOError ) as err
:
898 self
. report_error ( 'unable to create directory ' + compat_str ( err
))
901 if self
. params
. get ( 'writedescription' , False ):
902 descfn
= filename
+ '.description'
903 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( descfn
)):
904 self
. to_screen ( '[info] Video description is already present' )
907 self
. to_screen ( '[info] Writing video description to: ' + descfn
)
908 with io
. open ( encodeFilename ( descfn
), 'w' , encoding
= 'utf-8' ) as descfile
:
909 descfile
. write ( info_dict
[ 'description' ])
910 except ( KeyError , TypeError ):
911 self
. report_warning ( 'There \' s no description to write.' )
912 except ( OSError , IOError ):
913 self
. report_error ( 'Cannot write description file ' + descfn
)
916 if self
. params
. get ( 'writeannotations' , False ):
917 annofn
= filename
+ '.annotations.xml'
918 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( annofn
)):
919 self
. to_screen ( '[info] Video annotations are already present' )
922 self
. to_screen ( '[info] Writing video annotations to: ' + annofn
)
923 with io
. open ( encodeFilename ( annofn
), 'w' , encoding
= 'utf-8' ) as annofile
:
924 annofile
. write ( info_dict
[ 'annotations' ])
925 except ( KeyError , TypeError ):
926 self
. report_warning ( 'There are no annotations to write.' )
927 except ( OSError , IOError ):
928 self
. report_error ( 'Cannot write annotations file: ' + annofn
)
931 subtitles_are_requested
= any ([ self
. params
. get ( 'writesubtitles' , False ),
932 self
. params
. get ( 'writeautomaticsub' )])
934 if subtitles_are_requested
and 'subtitles' in info_dict
and info_dict
[ 'subtitles' ]:
935 # subtitles download errors are already managed as troubles in relevant IE
936 # that way it will silently go on when used with unsupporting IE
937 subtitles
= info_dict
[ 'subtitles' ]
938 sub_format
= self
. params
. get ( 'subtitlesformat' , 'srt' )
939 for sub_lang
in subtitles
. keys ():
940 sub
= subtitles
[ sub_lang
]
944 sub_filename
= subtitles_filename ( filename
, sub_lang
, sub_format
)
945 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( sub_filename
)):
946 self
. to_screen ( '[info] Video subtitle %s . %s is already_present' % ( sub_lang
, sub_format
))
948 self
. to_screen ( '[info] Writing video subtitles to: ' + sub_filename
)
949 with io
. open ( encodeFilename ( sub_filename
), 'w' , encoding
= 'utf-8' ) as subfile
:
951 except ( OSError , IOError ):
952 self
. report_error ( 'Cannot write subtitles file ' + sub_filename
)
955 if self
. params
. get ( 'writeinfojson' , False ):
956 infofn
= os
. path
. splitext ( filename
)[ 0 ] + '.info.json'
957 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( infofn
)):
958 self
. to_screen ( '[info] Video description metadata is already present' )
960 self
. to_screen ( '[info] Writing video description metadata as JSON to: ' + infofn
)
962 write_json_file ( info_dict
, encodeFilename ( infofn
))
963 except ( OSError , IOError ):
964 self
. report_error ( 'Cannot write metadata to JSON file ' + infofn
)
967 if self
. params
. get ( 'writethumbnail' , False ):
968 if info_dict
. get ( 'thumbnail' ) is not None :
969 thumb_format
= determine_ext ( info_dict
[ 'thumbnail' ], 'jpg' )
970 thumb_filename
= os
. path
. splitext ( filename
)[ 0 ] + '.' + thumb_format
971 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( thumb_filename
)):
972 self
. to_screen ( '[ %s ] %s : Thumbnail is already present' %
973 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
975 self
. to_screen ( '[ %s ] %s : Downloading thumbnail ...' %
976 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ]))
978 uf
= self
. urlopen ( info_dict
[ 'thumbnail' ])
979 with open ( thumb_filename
, 'wb' ) as thumbf
:
980 shutil
. copyfileobj ( uf
, thumbf
)
981 self
. to_screen ( '[ %s ] %s : Writing thumbnail to: %s ' %
982 ( info_dict
[ 'extractor' ], info_dict
[ 'id' ], thumb_filename
))
983 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
984 self
. report_warning ( 'Unable to download thumbnail " %s ": %s ' %
985 ( info_dict
[ 'thumbnail' ], compat_str ( err
)))
987 if not self
. params
. get ( 'skip_download' , False ):
988 if self
. params
. get ( 'nooverwrites' , False ) and os
. path
. exists ( encodeFilename ( filename
)):
993 fd
= get_suitable_downloader ( info
)( self
, self
. params
)
994 for ph
in self
._ progress
_ hooks
:
995 fd
. add_progress_hook ( ph
)
996 if self
. params
. get ( 'verbose' ):
997 self
. to_stdout ( '[debug] Invoking downloader on %r ' % info
. get ( 'url' ))
998 return fd
. download ( name
, info
)
999 if info_dict
. get ( 'requested_formats' ) is not None :
1002 merger
= FFmpegMergerPP ( self
, not self
. params
. get ( 'keepvideo' ))
1003 if not merger
._ get
_ executable
():
1005 self
. report_warning ( 'You have requested multiple '
1006 'formats but ffmpeg or avconv are not installed.'
1007 ' The formats won \' t be merged' )
1009 postprocessors
= [ merger
]
1010 for f
in info_dict
[ 'requested_formats' ]:
1011 new_info
= dict ( info_dict
)
1013 fname
= self
. prepare_filename ( new_info
)
1014 fname
= prepend_extension ( fname
, 'f %s ' % f
[ 'format_id' ])
1015 downloaded
. append ( fname
)
1016 partial_success
= dl ( fname
, new_info
)
1017 success
= success
and partial_success
1018 info_dict
[ '__postprocessors' ] = postprocessors
1019 info_dict
[ '__files_to_merge' ] = downloaded
1021 # Just a single file
1022 success
= dl ( filename
, info_dict
)
1023 except ( compat_urllib_error
. URLError
, compat_http_client
. HTTPException
, socket
. error
) as err
:
1024 self
. report_error ( 'unable to download video data: %s ' % str ( err
))
1026 except ( OSError , IOError ) as err
:
1027 raise UnavailableVideoError ( err
)
1028 except ( ContentTooShortError
, ) as err
:
1029 self
. report_error ( 'content too short (expected %s bytes and served %s )' % ( err
. expected
, err
. downloaded
))
1034 self
. post_process ( filename
, info_dict
)
1035 except ( PostProcessingError
) as err
:
1036 self
. report_error ( 'postprocessing: %s ' % str ( err
))
1039 self
. record_download_archive ( info_dict
)
1041 def download ( self
, url_list
):
1042 """Download a given list of URLs."""
1043 outtmpl
= self
. params
. get ( 'outtmpl' , DEFAULT_OUTTMPL
)
1044 if ( len ( url_list
) > 1 and
1046 and self
. params
. get ( 'max_downloads' ) != 1 ):
1047 raise SameFileError ( outtmpl
)
1049 for url
in url_list
:
1051 #It also downloads the videos
1052 self
. extract_info ( url
)
1053 except UnavailableVideoError
:
1054 self
. report_error ( 'unable to download video' )
1055 except MaxDownloadsReached
:
1056 self
. to_screen ( '[info] Maximum number of downloaded files reached.' )
1059 return self
._ download
_ retcode
1061 def download_with_info_file ( self
, info_filename
):
1062 with io
. open ( info_filename
, 'r' , encoding
= 'utf-8' ) as f
:
1065 self
. process_ie_result ( info
, download
= True )
1066 except DownloadError
:
1067 webpage_url
= info
. get ( 'webpage_url' )
1068 if webpage_url
is not None :
1069 self
. report_warning ( 'The info failed to download, trying with " %s "' % webpage_url
)
1070 return self
. download ([ webpage_url
])
1073 return self
._ download
_ retcode
1075 def post_process ( self
, filename
, ie_info
):
1076 """Run all the postprocessors on the given file."""
1077 info
= dict ( ie_info
)
1078 info
[ 'filepath' ] = filename
1081 if ie_info
. get ( '__postprocessors' ) is not None :
1082 pps_chain
. extend ( ie_info
[ '__postprocessors' ])
1083 pps_chain
. extend ( self
._ pps
)
1084 for pp
in pps_chain
:
1086 keep_video_wish
, new_info
= pp
. run ( info
)
1087 if keep_video_wish
is not None :
1089 keep_video
= keep_video_wish
1090 elif keep_video
is None :
1091 # No clear decision yet, let IE decide
1092 keep_video
= keep_video_wish
1093 except PostProcessingError
as e
:
1094 self
. report_error ( e
. msg
)
1095 if keep_video
is False and not self
. params
. get ( 'keepvideo' , False ):
1097 self
. to_screen ( 'Deleting original file %s (pass -k to keep)' % filename
)
1098 os
. remove ( encodeFilename ( filename
))
1099 except ( IOError , OSError ):
1100 self
. report_warning ( 'Unable to remove downloaded video file' )
1102 def _make_archive_id ( self
, info_dict
):
1103 # Future-proof against any change in case
1104 # and backwards compatibility with prior versions
1105 extractor
= info_dict
. get ( 'extractor_key' )
1106 if extractor
is None :
1107 if 'id' in info_dict
:
1108 extractor
= info_dict
. get ( 'ie_key' ) # key in a playlist
1109 if extractor
is None :
1110 return None # Incomplete video information
1111 return extractor
. lower () + ' ' + info_dict
[ 'id' ]
1113 def in_download_archive ( self
, info_dict
):
1114 fn
= self
. params
. get ( 'download_archive' )
1118 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1120 return False # Incomplete video information
1123 with locked_file ( fn
, 'r' , encoding
= 'utf-8' ) as archive_file
:
1124 for line
in archive_file
:
1125 if line
. strip () == vid_id
:
1127 except IOError as ioe
:
1128 if ioe
. errno
!= errno
. ENOENT
:
1132 def record_download_archive ( self
, info_dict
):
1133 fn
= self
. params
. get ( 'download_archive' )
1136 vid_id
= self
._ make
_ archive
_ id
( info_dict
)
1138 with locked_file ( fn
, 'a' , encoding
= 'utf-8' ) as archive_file
:
1139 archive_file
. write ( vid_id
+ ' \n ' )
1142 def format_resolution ( format
, default
= 'unknown' ):
1143 if format
. get ( 'vcodec' ) == 'none' :
1145 if format
. get ( 'resolution' ) is not None :
1146 return format
[ 'resolution' ]
1147 if format
. get ( 'height' ) is not None :
1148 if format
. get ( 'width' ) is not None :
1149 res
= ' %sx%s ' % ( format
[ 'width' ], format
[ 'height' ])
1151 res
= ' %s p' % format
[ 'height' ]
1152 elif format
. get ( 'width' ) is not None :
1153 res
= '?x %d ' % format
[ 'width' ]
1158 def _format_note ( self
, fdict
):
1160 if fdict
. get ( 'ext' ) in [ 'f4f' , 'f4m' ]:
1161 res
+= '(unsupported) '
1162 if fdict
. get ( 'format_note' ) is not None :
1163 res
+= fdict
[ 'format_note' ] + ' '
1164 if fdict
. get ( 'tbr' ) is not None :
1165 res
+= '%4dk ' % fdict
[ 'tbr' ]
1166 if fdict
. get ( 'container' ) is not None :
1169 res
+= ' %s container' % fdict
[ 'container' ]
1170 if ( fdict
. get ( 'vcodec' ) is not None and
1171 fdict
. get ( 'vcodec' ) != 'none' ):
1174 res
+= fdict
[ 'vcodec' ]
1175 if fdict
. get ( 'vbr' ) is not None :
1177 elif fdict
. get ( 'vbr' ) is not None and fdict
. get ( 'abr' ) is not None :
1179 if fdict
. get ( 'vbr' ) is not None :
1180 res
+= '%4dk' % fdict
[ 'vbr' ]
1181 if fdict
. get ( 'acodec' ) is not None :
1184 if fdict
[ 'acodec' ] == 'none' :
1187 res
+= ' %- 5s' % fdict
[ 'acodec' ]
1188 elif fdict
. get ( 'abr' ) is not None :
1192 if fdict
. get ( 'abr' ) is not None :
1193 res
+= '@%3dk' % fdict
[ 'abr' ]
1194 if fdict
. get ( 'asr' ) is not None :
1195 res
+= ' (%5dHz)' % fdict
[ 'asr' ]
1196 if fdict
. get ( 'filesize' ) is not None :
1199 res
+= format_bytes ( fdict
[ 'filesize' ])
1200 elif fdict
. get ( 'filesize_approx' ) is not None :
1203 res
+= '~' + format_bytes ( fdict
[ 'filesize_approx' ])
1206 def list_formats ( self
, info_dict
):
1207 def line ( format
, idlen
= 20 ):
1208 return (( ' %- ' + compat_str ( idlen
+ 1 ) + 's %- 10s %- 12s %s ' ) % (
1209 format
[ 'format_id' ],
1211 self
. format_resolution ( format
),
1212 self
._ format
_ note
( format
),
1215 formats
= info_dict
. get ( 'formats' , [ info_dict
])
1216 idlen
= max ( len ( 'format code' ),
1217 max ( len ( f
[ 'format_id' ]) for f
in formats
))
1218 formats_s
= [ line ( f
, idlen
) for f
in formats
]
1219 if len ( formats
) > 1 :
1220 formats_s
[ 0 ] += ( ' ' if self
._ format
_ note
( formats
[ 0 ]) else '' ) + '(worst)'
1221 formats_s
[- 1 ] += ( ' ' if self
._ format
_ note
( formats
[- 1 ]) else '' ) + '(best)'
1223 header_line
= line ({
1224 'format_id' : 'format code' , 'ext' : 'extension' ,
1225 'resolution' : 'resolution' , 'format_note' : 'note' }, idlen
= idlen
)
1226 self
. to_screen ( '[info] Available formats for %s : \n %s \n %s ' %
1227 ( info_dict
[ 'id' ], header_line
, ' \n ' . join ( formats_s
)))
1229 def urlopen ( self
, req
):
1230 """ Start an HTTP download """
1231 return self
._ opener
. open ( req
, timeout
= self
._ socket
_ timeout
)
1233 def print_debug_header ( self
):
1234 if not self
. params
. get ( 'verbose' ):
1237 if type ( '' ) is not compat_str
:
1238 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1239 self
. report_warning (
1240 'Your Python is broken! Update to a newer and supported version' )
1243 '[debug] Encodings: locale %s , fs %s , out %s , pref %s \n ' % (
1244 locale
. getpreferredencoding (),
1245 sys
. getfilesystemencoding (),
1246 sys
. stdout
. encoding
,
1247 self
. get_encoding ()))
1248 write_string ( encoding_str
, encoding
= None )
1250 self
._ write
_ string
( '[debug] youtube-dl version ' + __version__
+ ' \n ' )
1252 sp
= subprocess
. Popen (
1253 [ 'git' , 'rev-parse' , '--short' , 'HEAD' ],
1254 stdout
= subprocess
. PIPE
, stderr
= subprocess
. PIPE
,
1255 cwd
= os
. path
. dirname ( os
. path
. abspath ( __file__
)))
1256 out
, err
= sp
. communicate ()
1257 out
= out
. decode (). strip ()
1258 if re
. match ( '[0-9a-f]+' , out
):
1259 self
._ write
_ string
( '[debug] Git HEAD: ' + out
+ ' \n ' )
1265 self
._ write
_ string
( '[debug] Python version %s - %s ' %
1266 ( platform
. python_version (), platform_name ()) + ' \n ' )
1269 for handler
in self
._ opener
. handlers
:
1270 if hasattr ( handler
, 'proxies' ):
1271 proxy_map
. update ( handler
. proxies
)
1272 self
._ write
_ string
( '[debug] Proxy map: ' + compat_str ( proxy_map
) + ' \n ' )
1274 def _setup_opener ( self
):
1275 timeout_val
= self
. params
. get ( 'socket_timeout' )
1276 self
._ socket
_ timeout
= 600 if timeout_val
is None else float ( timeout_val
)
1278 opts_cookiefile
= self
. params
. get ( 'cookiefile' )
1279 opts_proxy
= self
. params
. get ( 'proxy' )
1281 if opts_cookiefile
is None :
1282 self
. cookiejar
= compat_cookiejar
. CookieJar ()
1284 self
. cookiejar
= compat_cookiejar
. MozillaCookieJar (
1286 if os
. access ( opts_cookiefile
, os
. R_OK
):
1287 self
. cookiejar
. load ()
1289 cookie_processor
= compat_urllib_request
. HTTPCookieProcessor (
1291 if opts_proxy
is not None :
1292 if opts_proxy
== '' :
1295 proxies
= { 'http' : opts_proxy
, 'https' : opts_proxy
}
1297 proxies
= compat_urllib_request
. getproxies ()
1298 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1299 if 'http' in proxies
and 'https' not in proxies
:
1300 proxies
[ 'https' ] = proxies
[ 'http' ]
1301 proxy_handler
= compat_urllib_request
. ProxyHandler ( proxies
)
1303 debuglevel
= 1 if self
. params
. get ( 'debug_printtraffic' ) else 0
1304 https_handler
= make_HTTPS_handler (
1305 self
. params
. get ( 'nocheckcertificate' , False ), debuglevel
= debuglevel
)
1306 ydlh
= YoutubeDLHandler ( debuglevel
= debuglevel
)
1307 opener
= compat_urllib_request
. build_opener (
1308 https_handler
, proxy_handler
, cookie_processor
, ydlh
)
1309 # Delete the default user-agent header, which would otherwise apply in
1310 # cases where our custom HTTP handler doesn't come into play
1311 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1312 opener
. addheaders
= []
1313 self
._ opener
= opener
1315 def encode ( self
, s
):
1316 if isinstance ( s
, bytes ):
1317 return s
# Already encoded
1320 return s
. encode ( self
. get_encoding ())
1321 except UnicodeEncodeError as err
:
1322 err
. reason
= err
. reason
+ '. Check your system encoding configuration or use the --encoding option.'
1325 def get_encoding ( self
):
1326 encoding
= self
. params
. get ( 'encoding' )
1327 if encoding
is None :
1328 encoding
= preferredencoding ()