]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
28 compat_urllib_request
,
47 UnavailableVideoError
,
52 from .extractor
import get_info_extractor
, gen_extractors
53 from .FileDownloader
import FileDownloader
54 from .version
import __version__
57 class YoutubeDL(object):
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 download_archive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
135 socket_timeout: Time to wait for unresponsive hosts, in seconds
137 The following parameters are not used by YoutubeDL itself, they are used by
139 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
140 noresizebuffer, retries, continuedl, noprogress, consoletitle
146 _download_retcode
= None
147 _num_downloads
= None
150 def __init__(self
, params
=None):
151 """Create a FileDownloader object with the given options."""
153 self
._ies
_instances
= {}
155 self
._progress
_hooks
= []
156 self
._download
_retcode
= 0
157 self
._num
_downloads
= 0
158 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
159 self
.params
= {} if params
is None else params
161 if (sys
.version_info
>= (3,) and sys
.platform
!= 'win32' and
162 sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
163 and not params
['restrictfilenames']):
164 # On Python 3, the Unicode filesystem API will throw errors (#1474)
166 u
'Assuming --restrict-filenames since file system encoding '
167 u
'cannot encode all charactes. '
168 u
'Set the LC_ALL environment variable to fix this.')
169 self
.params
['restrictfilenames'] = True
171 self
.fd
= FileDownloader(self
, self
.params
)
173 if '%(stitle)s' in self
.params
.get('outtmpl', ''):
174 self
.report_warning(u
'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
178 def add_info_extractor(self
, ie
):
179 """Add an InfoExtractor object to the end of the list."""
181 self
._ies
_instances
[ie
.ie_key()] = ie
182 ie
.set_downloader(self
)
184 def get_info_extractor(self
, ie_key
):
186 Get an instance of an IE with name ie_key, it will try to get one from
187 the _ies list, if there's no instance it will create a new one and add
188 it to the extractor list.
190 ie
= self
._ies
_instances
.get(ie_key
)
192 ie
= get_info_extractor(ie_key
)()
193 self
.add_info_extractor(ie
)
196 def add_default_info_extractors(self
):
198 Add the InfoExtractors returned by gen_extractors to the end of the list
200 for ie
in gen_extractors():
201 self
.add_info_extractor(ie
)
203 def add_post_processor(self
, pp
):
204 """Add a PostProcessor object to the end of the chain."""
206 pp
.set_downloader(self
)
208 def to_screen(self
, message
, skip_eol
=False):
209 """Print message to stdout if not in quiet mode."""
210 if self
.params
.get('logger'):
211 self
.params
['logger'].debug(message
)
212 elif not self
.params
.get('quiet', False):
213 terminator
= [u
'\n', u
''][skip_eol
]
214 output
= message
+ terminator
215 write_string(output
, self
._screen
_file
)
217 def to_stderr(self
, message
):
218 """Print message to stderr."""
219 assert type(message
) == type(u
'')
220 if self
.params
.get('logger'):
221 self
.params
['logger'].error(message
)
223 output
= message
+ u
'\n'
224 if 'b' in getattr(self
._screen
_file
, 'mode', '') or sys
.version_info
[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
225 output
= output
.encode(preferredencoding())
226 sys
.stderr
.write(output
)
228 def to_console_title(self
, message
):
229 if not self
.params
.get('consoletitle', False):
231 if os
.name
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow():
232 # c_wchar_p() might not be necessary if `message` is
233 # already of type unicode()
234 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
235 elif 'TERM' in os
.environ
:
236 write_string(u
'\033]0;%s\007' % message
, self
._screen
_file
)
238 def save_console_title(self
):
239 if not self
.params
.get('consoletitle', False):
241 if 'TERM' in os
.environ
:
242 # Save the title on stack
243 write_string(u
'\033[22;0t', self
._screen
_file
)
245 def restore_console_title(self
):
246 if not self
.params
.get('consoletitle', False):
248 if 'TERM' in os
.environ
:
249 # Restore the title from stack
250 write_string(u
'\033[23;0t', self
._screen
_file
)
253 self
.save_console_title()
256 def __exit__(self
, *args
):
257 self
.restore_console_title()
259 if self
.params
.get('cookiefile') is not None:
260 self
.cookiejar
.save()
262 def trouble(self
, message
=None, tb
=None):
263 """Determine action to take when a download problem appears.
265 Depending on if the downloader has been configured to ignore
266 download errors or not, this method may throw an exception or
267 not when errors are found, after printing the message.
269 tb, if given, is additional traceback information.
271 if message
is not None:
272 self
.to_stderr(message
)
273 if self
.params
.get('verbose'):
275 if sys
.exc_info()[0]: # if .trouble has been called from an except block
277 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
278 tb
+= u
''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
279 tb
+= compat_str(traceback
.format_exc())
281 tb_data
= traceback
.format_list(traceback
.extract_stack())
282 tb
= u
''.join(tb_data
)
284 if not self
.params
.get('ignoreerrors', False):
285 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
286 exc_info
= sys
.exc_info()[1].exc_info
288 exc_info
= sys
.exc_info()
289 raise DownloadError(message
, exc_info
)
290 self
._download
_retcode
= 1
292 def report_warning(self
, message
):
294 Print the message to stderr, it will be prefixed with 'WARNING:'
295 If stderr is a tty file the 'WARNING:' will be colored
297 if sys
.stderr
.isatty() and os
.name
!= 'nt':
298 _msg_header
= u
'\033[0;33mWARNING:\033[0m'
300 _msg_header
= u
'WARNING:'
301 warning_message
= u
'%s %s' % (_msg_header
, message
)
302 self
.to_stderr(warning_message
)
304 def report_error(self
, message
, tb
=None):
306 Do the same as trouble, but prefixes the message with 'ERROR:', colored
307 in red if stderr is a tty file.
309 if sys
.stderr
.isatty() and os
.name
!= 'nt':
310 _msg_header
= u
'\033[0;31mERROR:\033[0m'
312 _msg_header
= u
'ERROR:'
313 error_message
= u
'%s %s' % (_msg_header
, message
)
314 self
.trouble(error_message
, tb
)
316 def report_writedescription(self
, descfn
):
317 """ Report that the description file is being written """
318 self
.to_screen(u
'[info] Writing video description to: ' + descfn
)
320 def report_writesubtitles(self
, sub_filename
):
321 """ Report that the subtitles file is being written """
322 self
.to_screen(u
'[info] Writing video subtitles to: ' + sub_filename
)
324 def report_writeinfojson(self
, infofn
):
325 """ Report that the metadata file has been written """
326 self
.to_screen(u
'[info] Video description metadata as JSON to: ' + infofn
)
328 def report_writeannotations(self
, annofn
):
329 """ Report that the annotations file has been written. """
330 self
.to_screen(u
'[info] Writing video annotations to: ' + annofn
)
332 def report_file_already_downloaded(self
, file_name
):
333 """Report file has already been fully downloaded."""
335 self
.to_screen(u
'[download] %s has already been downloaded' % file_name
)
336 except UnicodeEncodeError:
337 self
.to_screen(u
'[download] The file has already been downloaded')
339 def increment_downloads(self
):
340 """Increment the ordinal that assigns a number to each file."""
341 self
._num
_downloads
+= 1
343 def prepare_filename(self
, info_dict
):
344 """Generate the output filename."""
346 template_dict
= dict(info_dict
)
348 template_dict
['epoch'] = int(time
.time())
349 autonumber_size
= self
.params
.get('autonumber_size')
350 if autonumber_size
is None:
352 autonumber_templ
= u
'%0' + str(autonumber_size
) + u
'd'
353 template_dict
['autonumber'] = autonumber_templ
% self
._num
_downloads
354 if template_dict
.get('playlist_index') is not None:
355 template_dict
['playlist_index'] = u
'%05d' % template_dict
['playlist_index']
357 sanitize
= lambda k
, v
: sanitize_filename(
358 u
'NA' if v
is None else compat_str(v
),
359 restricted
=self
.params
.get('restrictfilenames'),
361 template_dict
= dict((k
, sanitize(k
, v
))
362 for k
, v
in template_dict
.items())
364 tmpl
= os
.path
.expanduser(self
.params
['outtmpl'])
365 filename
= tmpl
% template_dict
367 except KeyError as err
:
368 self
.report_error(u
'Erroneous output template')
370 except ValueError as err
:
371 self
.report_error(u
'Error in output template: ' + str(err
) + u
' (encoding: ' + repr(preferredencoding()) + ')')
374 def _match_entry(self
, info_dict
):
375 """ Returns None iff the file should be downloaded """
377 if 'title' in info_dict
:
378 # This can happen when we're just evaluating the playlist
379 title
= info_dict
['title']
380 matchtitle
= self
.params
.get('matchtitle', False)
382 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
383 return u
'[download] "' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
384 rejecttitle
= self
.params
.get('rejecttitle', False)
386 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
387 return u
'"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
388 date
= info_dict
.get('upload_date', None)
390 dateRange
= self
.params
.get('daterange', DateRange())
391 if date
not in dateRange
:
392 return u
'[download] %s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
393 age_limit
= self
.params
.get('age_limit')
394 if age_limit
is not None:
395 if age_limit
< info_dict
.get('age_limit', 0):
396 return u
'Skipping "' + title
+ '" because it is age restricted'
397 if self
.in_download_archive(info_dict
):
398 return (u
'%s has already been recorded in archive'
399 % info_dict
.get('title', info_dict
.get('id', u
'video')))
403 def add_extra_info(info_dict
, extra_info
):
404 '''Set the keys from extra_info in info dict if they are missing'''
405 for key
, value
in extra_info
.items():
406 info_dict
.setdefault(key
, value
)
408 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
={}):
410 Returns a list with a dictionary for each video we find.
411 If 'download', also downloads the videos.
412 extra_info is a dict containing the extra values to add to each result
416 ies
= [self
.get_info_extractor(ie_key
)]
421 if not ie
.suitable(url
):
425 self
.report_warning(u
'The program functionality for this site has been marked as broken, '
426 u
'and will probably not work.')
429 ie_result
= ie
.extract(url
)
430 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
432 if isinstance(ie_result
, list):
433 # Backwards compatibility: old IE result format
435 '_type': 'compat_list',
436 'entries': ie_result
,
438 self
.add_extra_info(ie_result
,
440 'extractor': ie
.IE_NAME
,
442 'extractor_key': ie
.ie_key(),
444 return self
.process_ie_result(ie_result
, download
, extra_info
)
445 except ExtractorError
as de
: # An error we somewhat expected
446 self
.report_error(compat_str(de
), de
.format_traceback())
448 except Exception as e
:
449 if self
.params
.get('ignoreerrors', False):
450 self
.report_error(compat_str(e
), tb
=compat_str(traceback
.format_exc()))
455 self
.report_error(u
'no suitable InfoExtractor: %s' % url
)
457 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
459 Take the result of the ie(may be modified) and resolve all unresolved
460 references (URLs, playlist items).
462 It will also download the videos if 'download'.
463 Returns the resolved ie_result.
466 result_type
= ie_result
.get('_type', 'video') # If not given we suppose it's a video, support the default old system
467 if result_type
== 'video':
468 self
.add_extra_info(ie_result
, extra_info
)
469 return self
.process_video_result(ie_result
, download
=download
)
470 elif result_type
== 'url':
471 # We have to add extra_info to the results because it may be
472 # contained in a playlist
473 return self
.extract_info(ie_result
['url'],
475 ie_key
=ie_result
.get('ie_key'),
476 extra_info
=extra_info
)
477 elif result_type
== 'playlist':
479 # We process each entry in the playlist
480 playlist
= ie_result
.get('title', None) or ie_result
.get('id', None)
481 self
.to_screen(u
'[download] Downloading playlist: %s' % playlist
)
483 playlist_results
= []
485 n_all_entries
= len(ie_result
['entries'])
486 playliststart
= self
.params
.get('playliststart', 1) - 1
487 playlistend
= self
.params
.get('playlistend', -1)
489 if playlistend
== -1:
490 entries
= ie_result
['entries'][playliststart
:]
492 entries
= ie_result
['entries'][playliststart
:playlistend
]
494 n_entries
= len(entries
)
496 self
.to_screen(u
"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
497 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
499 for i
, entry
in enumerate(entries
, 1):
500 self
.to_screen(u
'[download] Downloading video #%s of %s' % (i
, n_entries
))
502 'playlist': playlist
,
503 'playlist_index': i
+ playliststart
,
504 'extractor': ie_result
['extractor'],
505 'webpage_url': ie_result
['webpage_url'],
506 'extractor_key': ie_result
['extractor_key'],
509 reason
= self
._match
_entry
(entry
)
510 if reason
is not None:
511 self
.to_screen(u
'[download] ' + reason
)
514 entry_result
= self
.process_ie_result(entry
,
517 playlist_results
.append(entry_result
)
518 ie_result
['entries'] = playlist_results
520 elif result_type
== 'compat_list':
522 self
.add_extra_info(r
,
524 'extractor': ie_result
['extractor'],
525 'webpage_url': ie_result
['webpage_url'],
526 'extractor_key': ie_result
['extractor_key'],
529 ie_result
['entries'] = [
530 self
.process_ie_result(_fixup(r
), download
, extra_info
)
531 for r
in ie_result
['entries']
535 raise Exception('Invalid result type: %s' % result_type
)
537 def select_format(self
, format_spec
, available_formats
):
538 if format_spec
== 'best' or format_spec
is None:
539 return available_formats
[-1]
540 elif format_spec
== 'worst':
541 return available_formats
[0]
543 extensions
= [u
'mp4', u
'flv', u
'webm', u
'3gp']
544 if format_spec
in extensions
:
545 filter_f
= lambda f
: f
['ext'] == format_spec
547 filter_f
= lambda f
: f
['format_id'] == format_spec
548 matches
= list(filter(filter_f
, available_formats
))
553 def process_video_result(self
, info_dict
, download
=True):
554 assert info_dict
.get('_type', 'video') == 'video'
556 if 'playlist' not in info_dict
:
557 # It isn't part of a playlist
558 info_dict
['playlist'] = None
559 info_dict
['playlist_index'] = None
561 # This extractors handle format selection themselves
562 if info_dict
['extractor'] in [u
'youtube', u
'Youku']:
564 self
.process_info(info_dict
)
567 # We now pick which formats have to be downloaded
568 if info_dict
.get('formats') is None:
569 # There's only one format available
570 formats
= [info_dict
]
572 formats
= info_dict
['formats']
574 # We check that all the formats have the format and format_id fields
575 for (i
, format
) in enumerate(formats
):
576 if format
.get('format_id') is None:
577 format
['format_id'] = compat_str(i
)
578 if format
.get('format') is None:
579 format
['format'] = u
'{id} - {res}{note}'.format(
580 id=format
['format_id'],
581 res
=self
.format_resolution(format
),
582 note
=u
' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
584 # Automatically determine file extension if missing
585 if 'ext' not in format
:
586 format
['ext'] = determine_ext(format
['url'])
588 if self
.params
.get('listformats', None):
589 self
.list_formats(info_dict
)
592 format_limit
= self
.params
.get('format_limit', None)
594 formats
= list(takewhile_inclusive(
595 lambda f
: f
['format_id'] != format_limit
, formats
597 if self
.params
.get('prefer_free_formats'):
598 def _free_formats_key(f
):
600 ext_ord
= [u
'flv', u
'mp4', u
'webm'].index(f
['ext'])
603 # We only compare the extension if they have the same height and width
604 return (f
.get('height'), f
.get('width'), ext_ord
)
605 formats
= sorted(formats
, key
=_free_formats_key
)
607 req_format
= self
.params
.get('format', 'best')
608 if req_format
is None:
610 formats_to_download
= []
611 # The -1 is for supporting YoutubeIE
612 if req_format
in ('-1', 'all'):
613 formats_to_download
= formats
615 # We can accept formats requestd in the format: 34/5/best, we pick
616 # the first that is available, starting from left
617 req_formats
= req_format
.split('/')
618 for rf
in req_formats
:
619 selected_format
= self
.select_format(rf
, formats
)
620 if selected_format
is not None:
621 formats_to_download
= [selected_format
]
623 if not formats_to_download
:
624 raise ExtractorError(u
'requested format not available',
628 if len(formats_to_download
) > 1:
629 self
.to_screen(u
'[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
)))
630 for format
in formats_to_download
:
631 new_info
= dict(info_dict
)
632 new_info
.update(format
)
633 self
.process_info(new_info
)
634 # We update the info dict with the best quality format (backwards compatibility)
635 info_dict
.update(formats_to_download
[-1])
638 def process_info(self
, info_dict
):
639 """Process a single resolved IE result."""
641 assert info_dict
.get('_type', 'video') == 'video'
642 #We increment the download the download count here to match the previous behaviour.
643 self
.increment_downloads()
645 info_dict
['fulltitle'] = info_dict
['title']
646 if len(info_dict
['title']) > 200:
647 info_dict
['title'] = info_dict
['title'][:197] + u
'...'
649 # Keep for backwards compatibility
650 info_dict
['stitle'] = info_dict
['title']
652 if not 'format' in info_dict
:
653 info_dict
['format'] = info_dict
['ext']
655 reason
= self
._match
_entry
(info_dict
)
656 if reason
is not None:
657 self
.to_screen(u
'[download] ' + reason
)
660 max_downloads
= self
.params
.get('max_downloads')
661 if max_downloads
is not None:
662 if self
._num
_downloads
> int(max_downloads
):
663 raise MaxDownloadsReached()
665 filename
= self
.prepare_filename(info_dict
)
668 if self
.params
.get('forcetitle', False):
669 compat_print(info_dict
['fulltitle'])
670 if self
.params
.get('forceid', False):
671 compat_print(info_dict
['id'])
672 if self
.params
.get('forceurl', False):
673 # For RTMP URLs, also include the playpath
674 compat_print(info_dict
['url'] + info_dict
.get('play_path', u
''))
675 if self
.params
.get('forcethumbnail', False) and info_dict
.get('thumbnail') is not None:
676 compat_print(info_dict
['thumbnail'])
677 if self
.params
.get('forcedescription', False) and info_dict
.get('description') is not None:
678 compat_print(info_dict
['description'])
679 if self
.params
.get('forcefilename', False) and filename
is not None:
680 compat_print(filename
)
681 if self
.params
.get('forceformat', False):
682 compat_print(info_dict
['format'])
683 if self
.params
.get('forcejson', False):
684 compat_print(json
.dumps(info_dict
))
686 # Do nothing else if in simulate mode
687 if self
.params
.get('simulate', False):
694 dn
= os
.path
.dirname(encodeFilename(filename
))
695 if dn
!= '' and not os
.path
.exists(dn
):
697 except (OSError, IOError) as err
:
698 self
.report_error(u
'unable to create directory ' + compat_str(err
))
701 if self
.params
.get('writedescription', False):
703 descfn
= filename
+ u
'.description'
704 self
.report_writedescription(descfn
)
705 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
706 descfile
.write(info_dict
['description'])
707 except (KeyError, TypeError):
708 self
.report_warning(u
'There\'s no description to write.')
709 except (OSError, IOError):
710 self
.report_error(u
'Cannot write description file ' + descfn
)
713 if self
.params
.get('writeannotations', False):
715 annofn
= filename
+ u
'.annotations.xml'
716 self
.report_writeannotations(annofn
)
717 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
718 annofile
.write(info_dict
['annotations'])
719 except (KeyError, TypeError):
720 self
.report_warning(u
'There are no annotations to write.')
721 except (OSError, IOError):
722 self
.report_error(u
'Cannot write annotations file: ' + annofn
)
725 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
726 self
.params
.get('writeautomaticsub')])
728 if subtitles_are_requested
and 'subtitles' in info_dict
and info_dict
['subtitles']:
729 # subtitles download errors are already managed as troubles in relevant IE
730 # that way it will silently go on when used with unsupporting IE
731 subtitles
= info_dict
['subtitles']
732 sub_format
= self
.params
.get('subtitlesformat', 'srt')
733 for sub_lang
in subtitles
.keys():
734 sub
= subtitles
[sub_lang
]
738 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
)
739 self
.report_writesubtitles(sub_filename
)
740 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8') as subfile
:
742 except (OSError, IOError):
743 self
.report_error(u
'Cannot write subtitles file ' + descfn
)
746 if self
.params
.get('writeinfojson', False):
747 infofn
= os
.path
.splitext(filename
)[0] + u
'.info.json'
748 self
.report_writeinfojson(infofn
)
750 json_info_dict
= dict((k
, v
) for k
, v
in info_dict
.items() if not k
in ['urlhandle'])
751 write_json_file(json_info_dict
, encodeFilename(infofn
))
752 except (OSError, IOError):
753 self
.report_error(u
'Cannot write metadata to JSON file ' + infofn
)
756 if self
.params
.get('writethumbnail', False):
757 if info_dict
.get('thumbnail') is not None:
758 thumb_format
= determine_ext(info_dict
['thumbnail'], u
'jpg')
759 thumb_filename
= filename
.rpartition('.')[0] + u
'.' + thumb_format
760 self
.to_screen(u
'[%s] %s: Downloading thumbnail ...' %
761 (info_dict
['extractor'], info_dict
['id']))
763 uf
= compat_urllib_request
.urlopen(info_dict
['thumbnail'])
764 with open(thumb_filename
, 'wb') as thumbf
:
765 shutil
.copyfileobj(uf
, thumbf
)
766 self
.to_screen(u
'[%s] %s: Writing thumbnail to: %s' %
767 (info_dict
['extractor'], info_dict
['id'], thumb_filename
))
768 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
769 self
.report_warning(u
'Unable to download thumbnail "%s": %s' %
770 (info_dict
['thumbnail'], compat_str(err
)))
772 if not self
.params
.get('skip_download', False):
773 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(filename
)):
777 success
= self
.fd
._do
_download
(filename
, info_dict
)
778 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
779 self
.report_error(u
'unable to download video data: %s' % str(err
))
781 except (OSError, IOError) as err
:
782 raise UnavailableVideoError(err
)
783 except (ContentTooShortError
, ) as err
:
784 self
.report_error(u
'content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
789 self
.post_process(filename
, info_dict
)
790 except (PostProcessingError
) as err
:
791 self
.report_error(u
'postprocessing: %s' % str(err
))
794 self
.record_download_archive(info_dict
)
796 def download(self
, url_list
):
797 """Download a given list of URLs."""
798 if (len(url_list
) > 1 and
799 '%' not in self
.params
['outtmpl']
800 and self
.params
.get('max_downloads') != 1):
801 raise SameFileError(self
.params
['outtmpl'])
805 #It also downloads the videos
806 self
.extract_info(url
)
807 except UnavailableVideoError
:
808 self
.report_error(u
'unable to download video')
809 except MaxDownloadsReached
:
810 self
.to_screen(u
'[info] Maximum number of downloaded files reached.')
813 return self
._download
_retcode
815 def post_process(self
, filename
, ie_info
):
816 """Run all the postprocessors on the given file."""
818 info
['filepath'] = filename
822 keep_video_wish
, new_info
= pp
.run(info
)
823 if keep_video_wish
is not None:
825 keep_video
= keep_video_wish
826 elif keep_video
is None:
827 # No clear decision yet, let IE decide
828 keep_video
= keep_video_wish
829 except PostProcessingError
as e
:
830 self
.report_error(e
.msg
)
831 if keep_video
is False and not self
.params
.get('keepvideo', False):
833 self
.to_screen(u
'Deleting original file %s (pass -k to keep)' % filename
)
834 os
.remove(encodeFilename(filename
))
835 except (IOError, OSError):
836 self
.report_warning(u
'Unable to remove downloaded video file')
838 def _make_archive_id(self
, info_dict
):
839 # Future-proof against any change in case
840 # and backwards compatibility with prior versions
841 extractor
= info_dict
.get('extractor_key')
842 if extractor
is None:
843 if 'id' in info_dict
:
844 extractor
= info_dict
.get('ie_key') # key in a playlist
845 if extractor
is None:
846 return None # Incomplete video information
847 return extractor
.lower() + u
' ' + info_dict
['id']
849 def in_download_archive(self
, info_dict
):
850 fn
= self
.params
.get('download_archive')
854 vid_id
= self
._make
_archive
_id
(info_dict
)
856 return False # Incomplete video information
859 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
860 for line
in archive_file
:
861 if line
.strip() == vid_id
:
863 except IOError as ioe
:
864 if ioe
.errno
!= errno
.ENOENT
:
868 def record_download_archive(self
, info_dict
):
869 fn
= self
.params
.get('download_archive')
872 vid_id
= self
._make
_archive
_id
(info_dict
)
874 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
875 archive_file
.write(vid_id
+ u
'\n')
878 def format_resolution(format
, default
='unknown'):
879 if format
.get('vcodec') == 'none':
881 if format
.get('_resolution') is not None:
882 return format
['_resolution']
883 if format
.get('height') is not None:
884 if format
.get('width') is not None:
885 res
= u
'%sx%s' % (format
['width'], format
['height'])
887 res
= u
'%sp' % format
['height']
892 def list_formats(self
, info_dict
):
893 def format_note(fdict
):
895 if fdict
.get('format_note') is not None:
896 res
+= fdict
['format_note'] + u
' '
897 if (fdict
.get('vcodec') is not None and
898 fdict
.get('vcodec') != 'none'):
899 res
+= u
'%-5s' % fdict
['vcodec']
900 elif fdict
.get('vbr') is not None:
902 if fdict
.get('vbr') is not None:
903 res
+= u
'@%4dk' % fdict
['vbr']
904 if fdict
.get('acodec') is not None:
907 res
+= u
'%-5s' % fdict
['acodec']
908 elif fdict
.get('abr') is not None:
912 if fdict
.get('abr') is not None:
913 res
+= u
'@%3dk' % fdict
['abr']
914 if fdict
.get('filesize') is not None:
917 res
+= format_bytes(fdict
['filesize'])
920 def line(format
, idlen
=20):
921 return ((u
'%-' + compat_str(idlen
+ 1) + u
's%-10s%-12s%s') % (
924 self
.format_resolution(format
),
928 formats
= info_dict
.get('formats', [info_dict
])
929 idlen
= max(len(u
'format code'),
930 max(len(f
['format_id']) for f
in formats
))
931 formats_s
= [line(f
, idlen
) for f
in formats
]
933 formats_s
[0] += (' ' if format_note(formats
[0]) else '') + '(worst)'
934 formats_s
[-1] += (' ' if format_note(formats
[-1]) else '') + '(best)'
937 'format_id': u
'format code', 'ext': u
'extension',
938 '_resolution': u
'resolution', 'format_note': u
'note'}, idlen
=idlen
)
939 self
.to_screen(u
'[info] Available formats for %s:\n%s\n%s' %
940 (info_dict
['id'], header_line
, u
"\n".join(formats_s
)))
942 def urlopen(self
, req
):
943 """ Start an HTTP download """
944 return self
._opener
.open(req
)
946 def print_debug_header(self
):
947 if not self
.params
.get('verbose'):
949 write_string(u
'[debug] youtube-dl version ' + __version__
+ u
'\n')
951 sp
= subprocess
.Popen(
952 ['git', 'rev-parse', '--short', 'HEAD'],
953 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
954 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
955 out
, err
= sp
.communicate()
956 out
= out
.decode().strip()
957 if re
.match('[0-9a-f]+', out
):
958 write_string(u
'[debug] Git HEAD: ' + out
+ u
'\n')
964 write_string(u
'[debug] Python version %s - %s' %
965 (platform
.python_version(), platform_name()) + u
'\n')
968 for handler
in self
._opener
.handlers
:
969 if hasattr(handler
, 'proxies'):
970 proxy_map
.update(handler
.proxies
)
971 write_string(u
'[debug] Proxy map: ' + compat_str(proxy_map
) + u
'\n')
973 def _setup_opener(self
):
974 timeout_val
= self
.params
.get('socket_timeout')
975 timeout
= 600 if timeout_val
is None else float(timeout_val
)
977 opts_cookiefile
= self
.params
.get('cookiefile')
978 opts_proxy
= self
.params
.get('proxy')
980 if opts_cookiefile
is None:
981 self
.cookiejar
= compat_cookiejar
.CookieJar()
983 self
.cookiejar
= compat_cookiejar
.MozillaCookieJar(
985 if os
.access(opts_cookiefile
, os
.R_OK
):
986 self
.cookiejar
.load()
988 cookie_processor
= compat_urllib_request
.HTTPCookieProcessor(
990 if opts_proxy
is not None:
994 proxies
= {'http': opts_proxy
, 'https': opts_proxy
}
996 proxies
= compat_urllib_request
.getproxies()
997 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
998 if 'http' in proxies
and 'https' not in proxies
:
999 proxies
['https'] = proxies
['http']
1000 proxy_handler
= compat_urllib_request
.ProxyHandler(proxies
)
1001 https_handler
= make_HTTPS_handler(
1002 self
.params
.get('nocheckcertificate', False))
1003 opener
= compat_urllib_request
.build_opener(
1004 https_handler
, proxy_handler
, cookie_processor
, YoutubeDLHandler())
1005 # Delete the default user-agent header, which would otherwise apply in
1006 # cases where our custom HTTP handler doesn't come into play
1007 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1008 opener
.addheaders
= []
1009 self
._opener
= opener
1011 # TODO remove this global modification
1012 compat_urllib_request
.install_opener(opener
)
1013 socket
.setdefaulttimeout(timeout
)