]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
debian/control: Update list of extractors.
[youtubedl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import errno
7 import io
8 import json
9 import os
10 import platform
11 import re
12 import shutil
13 import subprocess
14 import socket
15 import sys
16 import time
17 import traceback
18
19 if os.name == 'nt':
20 import ctypes
21
22 from .utils import (
23 compat_cookiejar,
24 compat_http_client,
25 compat_print,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 format_bytes,
37 locked_file,
38 make_HTTPS_handler,
39 MaxDownloadsReached,
40 PostProcessingError,
41 platform_name,
42 preferredencoding,
43 SameFileError,
44 sanitize_filename,
45 subtitles_filename,
46 takewhile_inclusive,
47 UnavailableVideoError,
48 write_json_file,
49 write_string,
50 YoutubeDLHandler,
51 )
52 from .extractor import get_info_extractor, gen_extractors
53 from .FileDownloader import FileDownloader
54 from .version import __version__
55
56
57 class YoutubeDL(object):
58 """YoutubeDL class.
59
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
66
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
74
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
81
82 Available options:
83
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 download_archive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
131 again.
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
135 socket_timeout: Time to wait for unresponsive hosts, in seconds
136
137 The following parameters are not used by YoutubeDL itself, they are used by
138 the FileDownloader:
139 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
140 noresizebuffer, retries, continuedl, noprogress, consoletitle
141 """
142
143 params = None
144 _ies = []
145 _pps = []
146 _download_retcode = None
147 _num_downloads = None
148 _screen_file = None
149
150 def __init__(self, params=None):
151 """Create a FileDownloader object with the given options."""
152 self._ies = []
153 self._ies_instances = {}
154 self._pps = []
155 self._progress_hooks = []
156 self._download_retcode = 0
157 self._num_downloads = 0
158 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
159 self.params = {} if params is None else params
160
161 if (sys.version_info >= (3,) and sys.platform != 'win32' and
162 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
163 and not params['restrictfilenames']):
164 # On Python 3, the Unicode filesystem API will throw errors (#1474)
165 self.report_warning(
166 u'Assuming --restrict-filenames since file system encoding '
167 u'cannot encode all charactes. '
168 u'Set the LC_ALL environment variable to fix this.')
169 self.params['restrictfilenames'] = True
170
171 self.fd = FileDownloader(self, self.params)
172
173 if '%(stitle)s' in self.params.get('outtmpl', ''):
174 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
175
176 self._setup_opener()
177
178 def add_info_extractor(self, ie):
179 """Add an InfoExtractor object to the end of the list."""
180 self._ies.append(ie)
181 self._ies_instances[ie.ie_key()] = ie
182 ie.set_downloader(self)
183
184 def get_info_extractor(self, ie_key):
185 """
186 Get an instance of an IE with name ie_key, it will try to get one from
187 the _ies list, if there's no instance it will create a new one and add
188 it to the extractor list.
189 """
190 ie = self._ies_instances.get(ie_key)
191 if ie is None:
192 ie = get_info_extractor(ie_key)()
193 self.add_info_extractor(ie)
194 return ie
195
196 def add_default_info_extractors(self):
197 """
198 Add the InfoExtractors returned by gen_extractors to the end of the list
199 """
200 for ie in gen_extractors():
201 self.add_info_extractor(ie)
202
203 def add_post_processor(self, pp):
204 """Add a PostProcessor object to the end of the chain."""
205 self._pps.append(pp)
206 pp.set_downloader(self)
207
208 def to_screen(self, message, skip_eol=False):
209 """Print message to stdout if not in quiet mode."""
210 if self.params.get('logger'):
211 self.params['logger'].debug(message)
212 elif not self.params.get('quiet', False):
213 terminator = [u'\n', u''][skip_eol]
214 output = message + terminator
215 write_string(output, self._screen_file)
216
217 def to_stderr(self, message):
218 """Print message to stderr."""
219 assert type(message) == type(u'')
220 if self.params.get('logger'):
221 self.params['logger'].error(message)
222 else:
223 output = message + u'\n'
224 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
225 output = output.encode(preferredencoding())
226 sys.stderr.write(output)
227
228 def to_console_title(self, message):
229 if not self.params.get('consoletitle', False):
230 return
231 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
232 # c_wchar_p() might not be necessary if `message` is
233 # already of type unicode()
234 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
235 elif 'TERM' in os.environ:
236 write_string(u'\033]0;%s\007' % message, self._screen_file)
237
238 def save_console_title(self):
239 if not self.params.get('consoletitle', False):
240 return
241 if 'TERM' in os.environ:
242 # Save the title on stack
243 write_string(u'\033[22;0t', self._screen_file)
244
245 def restore_console_title(self):
246 if not self.params.get('consoletitle', False):
247 return
248 if 'TERM' in os.environ:
249 # Restore the title from stack
250 write_string(u'\033[23;0t', self._screen_file)
251
252 def __enter__(self):
253 self.save_console_title()
254 return self
255
256 def __exit__(self, *args):
257 self.restore_console_title()
258
259 if self.params.get('cookiefile') is not None:
260 self.cookiejar.save()
261
262 def trouble(self, message=None, tb=None):
263 """Determine action to take when a download problem appears.
264
265 Depending on if the downloader has been configured to ignore
266 download errors or not, this method may throw an exception or
267 not when errors are found, after printing the message.
268
269 tb, if given, is additional traceback information.
270 """
271 if message is not None:
272 self.to_stderr(message)
273 if self.params.get('verbose'):
274 if tb is None:
275 if sys.exc_info()[0]: # if .trouble has been called from an except block
276 tb = u''
277 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
278 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
279 tb += compat_str(traceback.format_exc())
280 else:
281 tb_data = traceback.format_list(traceback.extract_stack())
282 tb = u''.join(tb_data)
283 self.to_stderr(tb)
284 if not self.params.get('ignoreerrors', False):
285 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
286 exc_info = sys.exc_info()[1].exc_info
287 else:
288 exc_info = sys.exc_info()
289 raise DownloadError(message, exc_info)
290 self._download_retcode = 1
291
292 def report_warning(self, message):
293 '''
294 Print the message to stderr, it will be prefixed with 'WARNING:'
295 If stderr is a tty file the 'WARNING:' will be colored
296 '''
297 if sys.stderr.isatty() and os.name != 'nt':
298 _msg_header = u'\033[0;33mWARNING:\033[0m'
299 else:
300 _msg_header = u'WARNING:'
301 warning_message = u'%s %s' % (_msg_header, message)
302 self.to_stderr(warning_message)
303
304 def report_error(self, message, tb=None):
305 '''
306 Do the same as trouble, but prefixes the message with 'ERROR:', colored
307 in red if stderr is a tty file.
308 '''
309 if sys.stderr.isatty() and os.name != 'nt':
310 _msg_header = u'\033[0;31mERROR:\033[0m'
311 else:
312 _msg_header = u'ERROR:'
313 error_message = u'%s %s' % (_msg_header, message)
314 self.trouble(error_message, tb)
315
316 def report_writedescription(self, descfn):
317 """ Report that the description file is being written """
318 self.to_screen(u'[info] Writing video description to: ' + descfn)
319
320 def report_writesubtitles(self, sub_filename):
321 """ Report that the subtitles file is being written """
322 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
323
324 def report_writeinfojson(self, infofn):
325 """ Report that the metadata file has been written """
326 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
327
328 def report_writeannotations(self, annofn):
329 """ Report that the annotations file has been written. """
330 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
331
332 def report_file_already_downloaded(self, file_name):
333 """Report file has already been fully downloaded."""
334 try:
335 self.to_screen(u'[download] %s has already been downloaded' % file_name)
336 except UnicodeEncodeError:
337 self.to_screen(u'[download] The file has already been downloaded')
338
339 def increment_downloads(self):
340 """Increment the ordinal that assigns a number to each file."""
341 self._num_downloads += 1
342
343 def prepare_filename(self, info_dict):
344 """Generate the output filename."""
345 try:
346 template_dict = dict(info_dict)
347
348 template_dict['epoch'] = int(time.time())
349 autonumber_size = self.params.get('autonumber_size')
350 if autonumber_size is None:
351 autonumber_size = 5
352 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
353 template_dict['autonumber'] = autonumber_templ % self._num_downloads
354 if template_dict.get('playlist_index') is not None:
355 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
356
357 sanitize = lambda k, v: sanitize_filename(
358 u'NA' if v is None else compat_str(v),
359 restricted=self.params.get('restrictfilenames'),
360 is_id=(k == u'id'))
361 template_dict = dict((k, sanitize(k, v))
362 for k, v in template_dict.items())
363
364 tmpl = os.path.expanduser(self.params['outtmpl'])
365 filename = tmpl % template_dict
366 return filename
367 except KeyError as err:
368 self.report_error(u'Erroneous output template')
369 return None
370 except ValueError as err:
371 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
372 return None
373
374 def _match_entry(self, info_dict):
375 """ Returns None iff the file should be downloaded """
376
377 if 'title' in info_dict:
378 # This can happen when we're just evaluating the playlist
379 title = info_dict['title']
380 matchtitle = self.params.get('matchtitle', False)
381 if matchtitle:
382 if not re.search(matchtitle, title, re.IGNORECASE):
383 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
384 rejecttitle = self.params.get('rejecttitle', False)
385 if rejecttitle:
386 if re.search(rejecttitle, title, re.IGNORECASE):
387 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
388 date = info_dict.get('upload_date', None)
389 if date is not None:
390 dateRange = self.params.get('daterange', DateRange())
391 if date not in dateRange:
392 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
393 age_limit = self.params.get('age_limit')
394 if age_limit is not None:
395 if age_limit < info_dict.get('age_limit', 0):
396 return u'Skipping "' + title + '" because it is age restricted'
397 if self.in_download_archive(info_dict):
398 return (u'%s has already been recorded in archive'
399 % info_dict.get('title', info_dict.get('id', u'video')))
400 return None
401
402 @staticmethod
403 def add_extra_info(info_dict, extra_info):
404 '''Set the keys from extra_info in info dict if they are missing'''
405 for key, value in extra_info.items():
406 info_dict.setdefault(key, value)
407
408 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
409 '''
410 Returns a list with a dictionary for each video we find.
411 If 'download', also downloads the videos.
412 extra_info is a dict containing the extra values to add to each result
413 '''
414
415 if ie_key:
416 ies = [self.get_info_extractor(ie_key)]
417 else:
418 ies = self._ies
419
420 for ie in ies:
421 if not ie.suitable(url):
422 continue
423
424 if not ie.working():
425 self.report_warning(u'The program functionality for this site has been marked as broken, '
426 u'and will probably not work.')
427
428 try:
429 ie_result = ie.extract(url)
430 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
431 break
432 if isinstance(ie_result, list):
433 # Backwards compatibility: old IE result format
434 ie_result = {
435 '_type': 'compat_list',
436 'entries': ie_result,
437 }
438 self.add_extra_info(ie_result,
439 {
440 'extractor': ie.IE_NAME,
441 'webpage_url': url,
442 'extractor_key': ie.ie_key(),
443 })
444 return self.process_ie_result(ie_result, download, extra_info)
445 except ExtractorError as de: # An error we somewhat expected
446 self.report_error(compat_str(de), de.format_traceback())
447 break
448 except Exception as e:
449 if self.params.get('ignoreerrors', False):
450 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
451 break
452 else:
453 raise
454 else:
455 self.report_error(u'no suitable InfoExtractor: %s' % url)
456
457 def process_ie_result(self, ie_result, download=True, extra_info={}):
458 """
459 Take the result of the ie(may be modified) and resolve all unresolved
460 references (URLs, playlist items).
461
462 It will also download the videos if 'download'.
463 Returns the resolved ie_result.
464 """
465
466 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
467 if result_type == 'video':
468 self.add_extra_info(ie_result, extra_info)
469 return self.process_video_result(ie_result, download=download)
470 elif result_type == 'url':
471 # We have to add extra_info to the results because it may be
472 # contained in a playlist
473 return self.extract_info(ie_result['url'],
474 download,
475 ie_key=ie_result.get('ie_key'),
476 extra_info=extra_info)
477 elif result_type == 'playlist':
478
479 # We process each entry in the playlist
480 playlist = ie_result.get('title', None) or ie_result.get('id', None)
481 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
482
483 playlist_results = []
484
485 n_all_entries = len(ie_result['entries'])
486 playliststart = self.params.get('playliststart', 1) - 1
487 playlistend = self.params.get('playlistend', -1)
488
489 if playlistend == -1:
490 entries = ie_result['entries'][playliststart:]
491 else:
492 entries = ie_result['entries'][playliststart:playlistend]
493
494 n_entries = len(entries)
495
496 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
497 (ie_result['extractor'], playlist, n_all_entries, n_entries))
498
499 for i, entry in enumerate(entries, 1):
500 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
501 extra = {
502 'playlist': playlist,
503 'playlist_index': i + playliststart,
504 'extractor': ie_result['extractor'],
505 'webpage_url': ie_result['webpage_url'],
506 'extractor_key': ie_result['extractor_key'],
507 }
508
509 reason = self._match_entry(entry)
510 if reason is not None:
511 self.to_screen(u'[download] ' + reason)
512 continue
513
514 entry_result = self.process_ie_result(entry,
515 download=download,
516 extra_info=extra)
517 playlist_results.append(entry_result)
518 ie_result['entries'] = playlist_results
519 return ie_result
520 elif result_type == 'compat_list':
521 def _fixup(r):
522 self.add_extra_info(r,
523 {
524 'extractor': ie_result['extractor'],
525 'webpage_url': ie_result['webpage_url'],
526 'extractor_key': ie_result['extractor_key'],
527 })
528 return r
529 ie_result['entries'] = [
530 self.process_ie_result(_fixup(r), download, extra_info)
531 for r in ie_result['entries']
532 ]
533 return ie_result
534 else:
535 raise Exception('Invalid result type: %s' % result_type)
536
537 def select_format(self, format_spec, available_formats):
538 if format_spec == 'best' or format_spec is None:
539 return available_formats[-1]
540 elif format_spec == 'worst':
541 return available_formats[0]
542 else:
543 extensions = [u'mp4', u'flv', u'webm', u'3gp']
544 if format_spec in extensions:
545 filter_f = lambda f: f['ext'] == format_spec
546 else:
547 filter_f = lambda f: f['format_id'] == format_spec
548 matches = list(filter(filter_f, available_formats))
549 if matches:
550 return matches[-1]
551 return None
552
553 def process_video_result(self, info_dict, download=True):
554 assert info_dict.get('_type', 'video') == 'video'
555
556 if 'playlist' not in info_dict:
557 # It isn't part of a playlist
558 info_dict['playlist'] = None
559 info_dict['playlist_index'] = None
560
561 # This extractors handle format selection themselves
562 if info_dict['extractor'] in [u'youtube', u'Youku']:
563 if download:
564 self.process_info(info_dict)
565 return info_dict
566
567 # We now pick which formats have to be downloaded
568 if info_dict.get('formats') is None:
569 # There's only one format available
570 formats = [info_dict]
571 else:
572 formats = info_dict['formats']
573
574 # We check that all the formats have the format and format_id fields
575 for (i, format) in enumerate(formats):
576 if format.get('format_id') is None:
577 format['format_id'] = compat_str(i)
578 if format.get('format') is None:
579 format['format'] = u'{id} - {res}{note}'.format(
580 id=format['format_id'],
581 res=self.format_resolution(format),
582 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
583 )
584 # Automatically determine file extension if missing
585 if 'ext' not in format:
586 format['ext'] = determine_ext(format['url'])
587
588 if self.params.get('listformats', None):
589 self.list_formats(info_dict)
590 return
591
592 format_limit = self.params.get('format_limit', None)
593 if format_limit:
594 formats = list(takewhile_inclusive(
595 lambda f: f['format_id'] != format_limit, formats
596 ))
597 if self.params.get('prefer_free_formats'):
598 def _free_formats_key(f):
599 try:
600 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
601 except ValueError:
602 ext_ord = -1
603 # We only compare the extension if they have the same height and width
604 return (f.get('height'), f.get('width'), ext_ord)
605 formats = sorted(formats, key=_free_formats_key)
606
607 req_format = self.params.get('format', 'best')
608 if req_format is None:
609 req_format = 'best'
610 formats_to_download = []
611 # The -1 is for supporting YoutubeIE
612 if req_format in ('-1', 'all'):
613 formats_to_download = formats
614 else:
615 # We can accept formats requestd in the format: 34/5/best, we pick
616 # the first that is available, starting from left
617 req_formats = req_format.split('/')
618 for rf in req_formats:
619 selected_format = self.select_format(rf, formats)
620 if selected_format is not None:
621 formats_to_download = [selected_format]
622 break
623 if not formats_to_download:
624 raise ExtractorError(u'requested format not available',
625 expected=True)
626
627 if download:
628 if len(formats_to_download) > 1:
629 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
630 for format in formats_to_download:
631 new_info = dict(info_dict)
632 new_info.update(format)
633 self.process_info(new_info)
634 # We update the info dict with the best quality format (backwards compatibility)
635 info_dict.update(formats_to_download[-1])
636 return info_dict
637
638 def process_info(self, info_dict):
639 """Process a single resolved IE result."""
640
641 assert info_dict.get('_type', 'video') == 'video'
642 #We increment the download the download count here to match the previous behaviour.
643 self.increment_downloads()
644
645 info_dict['fulltitle'] = info_dict['title']
646 if len(info_dict['title']) > 200:
647 info_dict['title'] = info_dict['title'][:197] + u'...'
648
649 # Keep for backwards compatibility
650 info_dict['stitle'] = info_dict['title']
651
652 if not 'format' in info_dict:
653 info_dict['format'] = info_dict['ext']
654
655 reason = self._match_entry(info_dict)
656 if reason is not None:
657 self.to_screen(u'[download] ' + reason)
658 return
659
660 max_downloads = self.params.get('max_downloads')
661 if max_downloads is not None:
662 if self._num_downloads > int(max_downloads):
663 raise MaxDownloadsReached()
664
665 filename = self.prepare_filename(info_dict)
666
667 # Forced printings
668 if self.params.get('forcetitle', False):
669 compat_print(info_dict['fulltitle'])
670 if self.params.get('forceid', False):
671 compat_print(info_dict['id'])
672 if self.params.get('forceurl', False):
673 # For RTMP URLs, also include the playpath
674 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
675 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
676 compat_print(info_dict['thumbnail'])
677 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
678 compat_print(info_dict['description'])
679 if self.params.get('forcefilename', False) and filename is not None:
680 compat_print(filename)
681 if self.params.get('forceformat', False):
682 compat_print(info_dict['format'])
683 if self.params.get('forcejson', False):
684 compat_print(json.dumps(info_dict))
685
686 # Do nothing else if in simulate mode
687 if self.params.get('simulate', False):
688 return
689
690 if filename is None:
691 return
692
693 try:
694 dn = os.path.dirname(encodeFilename(filename))
695 if dn != '' and not os.path.exists(dn):
696 os.makedirs(dn)
697 except (OSError, IOError) as err:
698 self.report_error(u'unable to create directory ' + compat_str(err))
699 return
700
701 if self.params.get('writedescription', False):
702 try:
703 descfn = filename + u'.description'
704 self.report_writedescription(descfn)
705 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
706 descfile.write(info_dict['description'])
707 except (KeyError, TypeError):
708 self.report_warning(u'There\'s no description to write.')
709 except (OSError, IOError):
710 self.report_error(u'Cannot write description file ' + descfn)
711 return
712
713 if self.params.get('writeannotations', False):
714 try:
715 annofn = filename + u'.annotations.xml'
716 self.report_writeannotations(annofn)
717 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
718 annofile.write(info_dict['annotations'])
719 except (KeyError, TypeError):
720 self.report_warning(u'There are no annotations to write.')
721 except (OSError, IOError):
722 self.report_error(u'Cannot write annotations file: ' + annofn)
723 return
724
725 subtitles_are_requested = any([self.params.get('writesubtitles', False),
726 self.params.get('writeautomaticsub')])
727
728 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
729 # subtitles download errors are already managed as troubles in relevant IE
730 # that way it will silently go on when used with unsupporting IE
731 subtitles = info_dict['subtitles']
732 sub_format = self.params.get('subtitlesformat', 'srt')
733 for sub_lang in subtitles.keys():
734 sub = subtitles[sub_lang]
735 if sub is None:
736 continue
737 try:
738 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
739 self.report_writesubtitles(sub_filename)
740 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
741 subfile.write(sub)
742 except (OSError, IOError):
743 self.report_error(u'Cannot write subtitles file ' + descfn)
744 return
745
746 if self.params.get('writeinfojson', False):
747 infofn = os.path.splitext(filename)[0] + u'.info.json'
748 self.report_writeinfojson(infofn)
749 try:
750 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
751 write_json_file(json_info_dict, encodeFilename(infofn))
752 except (OSError, IOError):
753 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
754 return
755
756 if self.params.get('writethumbnail', False):
757 if info_dict.get('thumbnail') is not None:
758 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
759 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
760 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
761 (info_dict['extractor'], info_dict['id']))
762 try:
763 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
764 with open(thumb_filename, 'wb') as thumbf:
765 shutil.copyfileobj(uf, thumbf)
766 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
767 (info_dict['extractor'], info_dict['id'], thumb_filename))
768 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
769 self.report_warning(u'Unable to download thumbnail "%s": %s' %
770 (info_dict['thumbnail'], compat_str(err)))
771
772 if not self.params.get('skip_download', False):
773 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
774 success = True
775 else:
776 try:
777 success = self.fd._do_download(filename, info_dict)
778 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
779 self.report_error(u'unable to download video data: %s' % str(err))
780 return
781 except (OSError, IOError) as err:
782 raise UnavailableVideoError(err)
783 except (ContentTooShortError, ) as err:
784 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
785 return
786
787 if success:
788 try:
789 self.post_process(filename, info_dict)
790 except (PostProcessingError) as err:
791 self.report_error(u'postprocessing: %s' % str(err))
792 return
793
794 self.record_download_archive(info_dict)
795
796 def download(self, url_list):
797 """Download a given list of URLs."""
798 if (len(url_list) > 1 and
799 '%' not in self.params['outtmpl']
800 and self.params.get('max_downloads') != 1):
801 raise SameFileError(self.params['outtmpl'])
802
803 for url in url_list:
804 try:
805 #It also downloads the videos
806 self.extract_info(url)
807 except UnavailableVideoError:
808 self.report_error(u'unable to download video')
809 except MaxDownloadsReached:
810 self.to_screen(u'[info] Maximum number of downloaded files reached.')
811 raise
812
813 return self._download_retcode
814
815 def post_process(self, filename, ie_info):
816 """Run all the postprocessors on the given file."""
817 info = dict(ie_info)
818 info['filepath'] = filename
819 keep_video = None
820 for pp in self._pps:
821 try:
822 keep_video_wish, new_info = pp.run(info)
823 if keep_video_wish is not None:
824 if keep_video_wish:
825 keep_video = keep_video_wish
826 elif keep_video is None:
827 # No clear decision yet, let IE decide
828 keep_video = keep_video_wish
829 except PostProcessingError as e:
830 self.report_error(e.msg)
831 if keep_video is False and not self.params.get('keepvideo', False):
832 try:
833 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
834 os.remove(encodeFilename(filename))
835 except (IOError, OSError):
836 self.report_warning(u'Unable to remove downloaded video file')
837
838 def _make_archive_id(self, info_dict):
839 # Future-proof against any change in case
840 # and backwards compatibility with prior versions
841 extractor = info_dict.get('extractor_key')
842 if extractor is None:
843 if 'id' in info_dict:
844 extractor = info_dict.get('ie_key') # key in a playlist
845 if extractor is None:
846 return None # Incomplete video information
847 return extractor.lower() + u' ' + info_dict['id']
848
849 def in_download_archive(self, info_dict):
850 fn = self.params.get('download_archive')
851 if fn is None:
852 return False
853
854 vid_id = self._make_archive_id(info_dict)
855 if vid_id is None:
856 return False # Incomplete video information
857
858 try:
859 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
860 for line in archive_file:
861 if line.strip() == vid_id:
862 return True
863 except IOError as ioe:
864 if ioe.errno != errno.ENOENT:
865 raise
866 return False
867
868 def record_download_archive(self, info_dict):
869 fn = self.params.get('download_archive')
870 if fn is None:
871 return
872 vid_id = self._make_archive_id(info_dict)
873 assert vid_id
874 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
875 archive_file.write(vid_id + u'\n')
876
877 @staticmethod
878 def format_resolution(format, default='unknown'):
879 if format.get('vcodec') == 'none':
880 return 'audio only'
881 if format.get('_resolution') is not None:
882 return format['_resolution']
883 if format.get('height') is not None:
884 if format.get('width') is not None:
885 res = u'%sx%s' % (format['width'], format['height'])
886 else:
887 res = u'%sp' % format['height']
888 else:
889 res = default
890 return res
891
892 def list_formats(self, info_dict):
893 def format_note(fdict):
894 res = u''
895 if fdict.get('format_note') is not None:
896 res += fdict['format_note'] + u' '
897 if (fdict.get('vcodec') is not None and
898 fdict.get('vcodec') != 'none'):
899 res += u'%-5s' % fdict['vcodec']
900 elif fdict.get('vbr') is not None:
901 res += u'video'
902 if fdict.get('vbr') is not None:
903 res += u'@%4dk' % fdict['vbr']
904 if fdict.get('acodec') is not None:
905 if res:
906 res += u', '
907 res += u'%-5s' % fdict['acodec']
908 elif fdict.get('abr') is not None:
909 if res:
910 res += u', '
911 res += 'audio'
912 if fdict.get('abr') is not None:
913 res += u'@%3dk' % fdict['abr']
914 if fdict.get('filesize') is not None:
915 if res:
916 res += u', '
917 res += format_bytes(fdict['filesize'])
918 return res
919
920 def line(format, idlen=20):
921 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
922 format['format_id'],
923 format['ext'],
924 self.format_resolution(format),
925 format_note(format),
926 ))
927
928 formats = info_dict.get('formats', [info_dict])
929 idlen = max(len(u'format code'),
930 max(len(f['format_id']) for f in formats))
931 formats_s = [line(f, idlen) for f in formats]
932 if len(formats) > 1:
933 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
934 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
935
936 header_line = line({
937 'format_id': u'format code', 'ext': u'extension',
938 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
939 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
940 (info_dict['id'], header_line, u"\n".join(formats_s)))
941
942 def urlopen(self, req):
943 """ Start an HTTP download """
944 return self._opener.open(req)
945
946 def print_debug_header(self):
947 if not self.params.get('verbose'):
948 return
949 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
950 try:
951 sp = subprocess.Popen(
952 ['git', 'rev-parse', '--short', 'HEAD'],
953 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
954 cwd=os.path.dirname(os.path.abspath(__file__)))
955 out, err = sp.communicate()
956 out = out.decode().strip()
957 if re.match('[0-9a-f]+', out):
958 write_string(u'[debug] Git HEAD: ' + out + u'\n')
959 except:
960 try:
961 sys.exc_clear()
962 except:
963 pass
964 write_string(u'[debug] Python version %s - %s' %
965 (platform.python_version(), platform_name()) + u'\n')
966
967 proxy_map = {}
968 for handler in self._opener.handlers:
969 if hasattr(handler, 'proxies'):
970 proxy_map.update(handler.proxies)
971 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
972
973 def _setup_opener(self):
974 timeout_val = self.params.get('socket_timeout')
975 timeout = 600 if timeout_val is None else float(timeout_val)
976
977 opts_cookiefile = self.params.get('cookiefile')
978 opts_proxy = self.params.get('proxy')
979
980 if opts_cookiefile is None:
981 self.cookiejar = compat_cookiejar.CookieJar()
982 else:
983 self.cookiejar = compat_cookiejar.MozillaCookieJar(
984 opts_cookiefile)
985 if os.access(opts_cookiefile, os.R_OK):
986 self.cookiejar.load()
987
988 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
989 self.cookiejar)
990 if opts_proxy is not None:
991 if opts_proxy == '':
992 proxies = {}
993 else:
994 proxies = {'http': opts_proxy, 'https': opts_proxy}
995 else:
996 proxies = compat_urllib_request.getproxies()
997 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
998 if 'http' in proxies and 'https' not in proxies:
999 proxies['https'] = proxies['http']
1000 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1001 https_handler = make_HTTPS_handler(
1002 self.params.get('nocheckcertificate', False))
1003 opener = compat_urllib_request.build_opener(
1004 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1005 # Delete the default user-agent header, which would otherwise apply in
1006 # cases where our custom HTTP handler doesn't come into play
1007 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1008 opener.addheaders = []
1009 self._opener = opener
1010
1011 # TODO remove this global modification
1012 compat_urllib_request.install_opener(opener)
1013 socket.setdefaulttimeout(timeout)