]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
Imported Upstream version 2013.11.11
[youtubedl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import errno
7 import io
8 import os
9 import re
10 import shutil
11 import socket
12 import sys
13 import time
14 import traceback
15
16 from .utils import *
17 from .extractor import get_info_extractor, gen_extractors
18 from .FileDownloader import FileDownloader
19
20
21 class YoutubeDL(object):
22 """YoutubeDL class.
23
24 YoutubeDL objects are the ones responsible of downloading the
25 actual video file and writing it to disk if the user has requested
26 it, among some other tasks. In most cases there should be one per
27 program. As, given a video URL, the downloader doesn't know how to
28 extract all the needed information, task that InfoExtractors do, it
29 has to pass the URL to one of them.
30
31 For this, YoutubeDL objects have a method that allows
32 InfoExtractors to be registered in a given order. When it is passed
33 a URL, the YoutubeDL object handles it to the first InfoExtractor it
34 finds that reports being able to handle it. The InfoExtractor extracts
35 all the information about the video or videos the URL refers to, and
36 YoutubeDL process the extracted information, possibly using a File
37 Downloader to download the video.
38
39 YoutubeDL objects accept a lot of parameters. In order not to saturate
40 the object constructor with arguments, it receives a dictionary of
41 options instead. These options are available through the params
42 attribute for the InfoExtractors to use. The YoutubeDL also
43 registers itself as the downloader in charge for the InfoExtractors
44 that are added to it, so this is a "mutual registration".
45
46 Available options:
47
48 username: Username for authentication purposes.
49 password: Password for authentication purposes.
50 videopassword: Password for acces a video.
51 usenetrc: Use netrc for authentication instead.
52 verbose: Print additional info to stdout.
53 quiet: Do not print messages to stdout.
54 forceurl: Force printing final URL.
55 forcetitle: Force printing title.
56 forceid: Force printing ID.
57 forcethumbnail: Force printing thumbnail URL.
58 forcedescription: Force printing description.
59 forcefilename: Force printing final filename.
60 simulate: Do not download the video files.
61 format: Video format code.
62 format_limit: Highest quality format to try.
63 outtmpl: Template for output names.
64 restrictfilenames: Do not allow "&" and spaces in file names
65 ignoreerrors: Do not stop on download errors.
66 nooverwrites: Prevent overwriting files.
67 playliststart: Playlist item to start at.
68 playlistend: Playlist item to end at.
69 matchtitle: Download only matching titles.
70 rejecttitle: Reject downloads for matching titles.
71 logtostderr: Log messages to stderr instead of stdout.
72 writedescription: Write the video description to a .description file
73 writeinfojson: Write the video description to a .info.json file
74 writeannotations: Write the video annotations to a .annotations.xml file
75 writethumbnail: Write the thumbnail image to a file
76 writesubtitles: Write the video subtitles to a file
77 writeautomaticsub: Write the automatic subtitles to a file
78 allsubtitles: Downloads all the subtitles of the video
79 (requires writesubtitles or writeautomaticsub)
80 listsubtitles: Lists all available subtitles for the video
81 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
82 subtitleslangs: List of languages of the subtitles to download
83 keepvideo: Keep the video file after post-processing
84 daterange: A DateRange object, download only if the upload_date is in the range.
85 skip_download: Skip the actual download of the video file
86 cachedir: Location of the cache files in the filesystem.
87 None to disable filesystem cache.
88 noplaylist: Download single video instead of a playlist if in doubt.
89 age_limit: An integer representing the user's age in years.
90 Unsuitable videos for the given age are skipped.
91 downloadarchive: File name of a file where all downloads are recorded.
92 Videos already present in the file are not downloaded
93 again.
94
95 The following parameters are not used by YoutubeDL itself, they are used by
96 the FileDownloader:
97 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
98 noresizebuffer, retries, continuedl, noprogress, consoletitle
99 """
100
101 params = None
102 _ies = []
103 _pps = []
104 _download_retcode = None
105 _num_downloads = None
106 _screen_file = None
107
108 def __init__(self, params):
109 """Create a FileDownloader object with the given options."""
110 self._ies = []
111 self._ies_instances = {}
112 self._pps = []
113 self._progress_hooks = []
114 self._download_retcode = 0
115 self._num_downloads = 0
116 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
117
118 if (sys.version_info >= (3,) and sys.platform != 'win32' and
119 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
120 and not params['restrictfilenames']):
121 # On Python 3, the Unicode filesystem API will throw errors (#1474)
122 self.report_warning(
123 u'Assuming --restrict-filenames since file system encoding '
124 u'cannot encode all charactes. '
125 u'Set the LC_ALL environment variable to fix this.')
126 params['restrictfilenames'] = True
127
128 self.params = params
129 self.fd = FileDownloader(self, self.params)
130
131 if '%(stitle)s' in self.params['outtmpl']:
132 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
133
134 def add_info_extractor(self, ie):
135 """Add an InfoExtractor object to the end of the list."""
136 self._ies.append(ie)
137 self._ies_instances[ie.ie_key()] = ie
138 ie.set_downloader(self)
139
140 def get_info_extractor(self, ie_key):
141 """
142 Get an instance of an IE with name ie_key, it will try to get one from
143 the _ies list, if there's no instance it will create a new one and add
144 it to the extractor list.
145 """
146 ie = self._ies_instances.get(ie_key)
147 if ie is None:
148 ie = get_info_extractor(ie_key)()
149 self.add_info_extractor(ie)
150 return ie
151
152 def add_default_info_extractors(self):
153 """
154 Add the InfoExtractors returned by gen_extractors to the end of the list
155 """
156 for ie in gen_extractors():
157 self.add_info_extractor(ie)
158
159 def add_post_processor(self, pp):
160 """Add a PostProcessor object to the end of the chain."""
161 self._pps.append(pp)
162 pp.set_downloader(self)
163
164 def to_screen(self, message, skip_eol=False):
165 """Print message to stdout if not in quiet mode."""
166 if not self.params.get('quiet', False):
167 terminator = [u'\n', u''][skip_eol]
168 output = message + terminator
169 write_string(output, self._screen_file)
170
171 def to_stderr(self, message):
172 """Print message to stderr."""
173 assert type(message) == type(u'')
174 output = message + u'\n'
175 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
176 output = output.encode(preferredencoding())
177 sys.stderr.write(output)
178
179 def fixed_template(self):
180 """Checks if the output template is fixed."""
181 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
182
183 def trouble(self, message=None, tb=None):
184 """Determine action to take when a download problem appears.
185
186 Depending on if the downloader has been configured to ignore
187 download errors or not, this method may throw an exception or
188 not when errors are found, after printing the message.
189
190 tb, if given, is additional traceback information.
191 """
192 if message is not None:
193 self.to_stderr(message)
194 if self.params.get('verbose'):
195 if tb is None:
196 if sys.exc_info()[0]: # if .trouble has been called from an except block
197 tb = u''
198 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
199 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
200 tb += compat_str(traceback.format_exc())
201 else:
202 tb_data = traceback.format_list(traceback.extract_stack())
203 tb = u''.join(tb_data)
204 self.to_stderr(tb)
205 if not self.params.get('ignoreerrors', False):
206 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
207 exc_info = sys.exc_info()[1].exc_info
208 else:
209 exc_info = sys.exc_info()
210 raise DownloadError(message, exc_info)
211 self._download_retcode = 1
212
213 def report_warning(self, message):
214 '''
215 Print the message to stderr, it will be prefixed with 'WARNING:'
216 If stderr is a tty file the 'WARNING:' will be colored
217 '''
218 if sys.stderr.isatty() and os.name != 'nt':
219 _msg_header = u'\033[0;33mWARNING:\033[0m'
220 else:
221 _msg_header = u'WARNING:'
222 warning_message = u'%s %s' % (_msg_header, message)
223 self.to_stderr(warning_message)
224
225 def report_error(self, message, tb=None):
226 '''
227 Do the same as trouble, but prefixes the message with 'ERROR:', colored
228 in red if stderr is a tty file.
229 '''
230 if sys.stderr.isatty() and os.name != 'nt':
231 _msg_header = u'\033[0;31mERROR:\033[0m'
232 else:
233 _msg_header = u'ERROR:'
234 error_message = u'%s %s' % (_msg_header, message)
235 self.trouble(error_message, tb)
236
237 def report_writedescription(self, descfn):
238 """ Report that the description file is being written """
239 self.to_screen(u'[info] Writing video description to: ' + descfn)
240
241 def report_writesubtitles(self, sub_filename):
242 """ Report that the subtitles file is being written """
243 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
244
245 def report_writeinfojson(self, infofn):
246 """ Report that the metadata file has been written """
247 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
248
249 def report_writeannotations(self, annofn):
250 """ Report that the annotations file has been written. """
251 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
252
253 def report_file_already_downloaded(self, file_name):
254 """Report file has already been fully downloaded."""
255 try:
256 self.to_screen(u'[download] %s has already been downloaded' % file_name)
257 except (UnicodeEncodeError) as err:
258 self.to_screen(u'[download] The file has already been downloaded')
259
260 def increment_downloads(self):
261 """Increment the ordinal that assigns a number to each file."""
262 self._num_downloads += 1
263
264 def prepare_filename(self, info_dict):
265 """Generate the output filename."""
266 try:
267 template_dict = dict(info_dict)
268
269 template_dict['epoch'] = int(time.time())
270 autonumber_size = self.params.get('autonumber_size')
271 if autonumber_size is None:
272 autonumber_size = 5
273 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
274 template_dict['autonumber'] = autonumber_templ % self._num_downloads
275 if template_dict.get('playlist_index') is not None:
276 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
277
278 sanitize = lambda k, v: sanitize_filename(
279 u'NA' if v is None else compat_str(v),
280 restricted=self.params.get('restrictfilenames'),
281 is_id=(k == u'id'))
282 template_dict = dict((k, sanitize(k, v))
283 for k, v in template_dict.items())
284
285 tmpl = os.path.expanduser(self.params['outtmpl'])
286 filename = tmpl % template_dict
287 return filename
288 except KeyError as err:
289 self.report_error(u'Erroneous output template')
290 return None
291 except ValueError as err:
292 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
293 return None
294
295 def _match_entry(self, info_dict):
296 """ Returns None iff the file should be downloaded """
297
298 title = info_dict['title']
299 matchtitle = self.params.get('matchtitle', False)
300 if matchtitle:
301 if not re.search(matchtitle, title, re.IGNORECASE):
302 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
303 rejecttitle = self.params.get('rejecttitle', False)
304 if rejecttitle:
305 if re.search(rejecttitle, title, re.IGNORECASE):
306 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
307 date = info_dict.get('upload_date', None)
308 if date is not None:
309 dateRange = self.params.get('daterange', DateRange())
310 if date not in dateRange:
311 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
312 age_limit = self.params.get('age_limit')
313 if age_limit is not None:
314 if age_limit < info_dict.get('age_limit', 0):
315 return u'Skipping "' + title + '" because it is age restricted'
316 if self.in_download_archive(info_dict):
317 return (u'%(title)s has already been recorded in archive'
318 % info_dict)
319 return None
320
321 @staticmethod
322 def add_extra_info(info_dict, extra_info):
323 '''Set the keys from extra_info in info dict if they are missing'''
324 for key, value in extra_info.items():
325 info_dict.setdefault(key, value)
326
327 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
328 '''
329 Returns a list with a dictionary for each video we find.
330 If 'download', also downloads the videos.
331 extra_info is a dict containing the extra values to add to each result
332 '''
333
334 if ie_key:
335 ies = [self.get_info_extractor(ie_key)]
336 else:
337 ies = self._ies
338
339 for ie in ies:
340 if not ie.suitable(url):
341 continue
342
343 if not ie.working():
344 self.report_warning(u'The program functionality for this site has been marked as broken, '
345 u'and will probably not work.')
346
347 try:
348 ie_result = ie.extract(url)
349 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
350 break
351 if isinstance(ie_result, list):
352 # Backwards compatibility: old IE result format
353 ie_result = {
354 '_type': 'compat_list',
355 'entries': ie_result,
356 }
357 self.add_extra_info(ie_result,
358 {
359 'extractor': ie.IE_NAME,
360 'webpage_url': url,
361 'extractor_key': ie.ie_key(),
362 })
363 return self.process_ie_result(ie_result, download, extra_info)
364 except ExtractorError as de: # An error we somewhat expected
365 self.report_error(compat_str(de), de.format_traceback())
366 break
367 except Exception as e:
368 if self.params.get('ignoreerrors', False):
369 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
370 break
371 else:
372 raise
373 else:
374 self.report_error(u'no suitable InfoExtractor: %s' % url)
375
376 def process_ie_result(self, ie_result, download=True, extra_info={}):
377 """
378 Take the result of the ie(may be modified) and resolve all unresolved
379 references (URLs, playlist items).
380
381 It will also download the videos if 'download'.
382 Returns the resolved ie_result.
383 """
384
385 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
386 if result_type == 'video':
387 self.add_extra_info(ie_result, extra_info)
388 return self.process_video_result(ie_result)
389 elif result_type == 'url':
390 # We have to add extra_info to the results because it may be
391 # contained in a playlist
392 return self.extract_info(ie_result['url'],
393 download,
394 ie_key=ie_result.get('ie_key'),
395 extra_info=extra_info)
396 elif result_type == 'playlist':
397 self.add_extra_info(ie_result, extra_info)
398 # We process each entry in the playlist
399 playlist = ie_result.get('title', None) or ie_result.get('id', None)
400 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
401
402 playlist_results = []
403
404 n_all_entries = len(ie_result['entries'])
405 playliststart = self.params.get('playliststart', 1) - 1
406 playlistend = self.params.get('playlistend', -1)
407
408 if playlistend == -1:
409 entries = ie_result['entries'][playliststart:]
410 else:
411 entries = ie_result['entries'][playliststart:playlistend]
412
413 n_entries = len(entries)
414
415 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
416 (ie_result['extractor'], playlist, n_all_entries, n_entries))
417
418 for i, entry in enumerate(entries, 1):
419 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
420 extra = {
421 'playlist': playlist,
422 'playlist_index': i + playliststart,
423 'extractor': ie_result['extractor'],
424 'webpage_url': ie_result['webpage_url'],
425 'extractor_key': ie_result['extractor_key'],
426 }
427 entry_result = self.process_ie_result(entry,
428 download=download,
429 extra_info=extra)
430 playlist_results.append(entry_result)
431 ie_result['entries'] = playlist_results
432 return ie_result
433 elif result_type == 'compat_list':
434 def _fixup(r):
435 self.add_extra_info(r,
436 {
437 'extractor': ie_result['extractor'],
438 'webpage_url': ie_result['webpage_url'],
439 'extractor_key': ie_result['extractor_key'],
440 })
441 return r
442 ie_result['entries'] = [
443 self.process_ie_result(_fixup(r), download, extra_info)
444 for r in ie_result['entries']
445 ]
446 return ie_result
447 else:
448 raise Exception('Invalid result type: %s' % result_type)
449
450 def select_format(self, format_spec, available_formats):
451 if format_spec == 'best' or format_spec is None:
452 return available_formats[-1]
453 elif format_spec == 'worst':
454 return available_formats[0]
455 else:
456 extensions = [u'mp4', u'flv', u'webm', u'3gp']
457 if format_spec in extensions:
458 filter_f = lambda f: f['ext'] == format_spec
459 else:
460 filter_f = lambda f: f['format_id'] == format_spec
461 matches = list(filter(filter_f, available_formats))
462 if matches:
463 return matches[-1]
464 return None
465
466 def process_video_result(self, info_dict, download=True):
467 assert info_dict.get('_type', 'video') == 'video'
468
469 if 'playlist' not in info_dict:
470 # It isn't part of a playlist
471 info_dict['playlist'] = None
472 info_dict['playlist_index'] = None
473
474 # This extractors handle format selection themselves
475 if info_dict['extractor'] in [u'youtube', u'Youku']:
476 if download:
477 self.process_info(info_dict)
478 return info_dict
479
480 # We now pick which formats have to be downloaded
481 if info_dict.get('formats') is None:
482 # There's only one format available
483 formats = [info_dict]
484 else:
485 formats = info_dict['formats']
486
487 # We check that all the formats have the format and format_id fields
488 for (i, format) in enumerate(formats):
489 if format.get('format_id') is None:
490 format['format_id'] = compat_str(i)
491 if format.get('format') is None:
492 format['format'] = u'{id} - {res}{note}'.format(
493 id=format['format_id'],
494 res=self.format_resolution(format),
495 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
496 )
497 # Automatically determine file extension if missing
498 if 'ext' not in format:
499 format['ext'] = determine_ext(format['url'])
500
501 if self.params.get('listformats', None):
502 self.list_formats(info_dict)
503 return
504
505 format_limit = self.params.get('format_limit', None)
506 if format_limit:
507 formats = list(takewhile_inclusive(
508 lambda f: f['format_id'] != format_limit, formats
509 ))
510 if self.params.get('prefer_free_formats'):
511 def _free_formats_key(f):
512 try:
513 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
514 except ValueError:
515 ext_ord = -1
516 # We only compare the extension if they have the same height and width
517 return (f.get('height'), f.get('width'), ext_ord)
518 formats = sorted(formats, key=_free_formats_key)
519
520 req_format = self.params.get('format', 'best')
521 if req_format is None:
522 req_format = 'best'
523 formats_to_download = []
524 # The -1 is for supporting YoutubeIE
525 if req_format in ('-1', 'all'):
526 formats_to_download = formats
527 else:
528 # We can accept formats requestd in the format: 34/5/best, we pick
529 # the first that is available, starting from left
530 req_formats = req_format.split('/')
531 for rf in req_formats:
532 selected_format = self.select_format(rf, formats)
533 if selected_format is not None:
534 formats_to_download = [selected_format]
535 break
536 if not formats_to_download:
537 raise ExtractorError(u'requested format not available',
538 expected=True)
539
540 if download:
541 if len(formats_to_download) > 1:
542 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
543 for format in formats_to_download:
544 new_info = dict(info_dict)
545 new_info.update(format)
546 self.process_info(new_info)
547 # We update the info dict with the best quality format (backwards compatibility)
548 info_dict.update(formats_to_download[-1])
549 return info_dict
550
551 def process_info(self, info_dict):
552 """Process a single resolved IE result."""
553
554 assert info_dict.get('_type', 'video') == 'video'
555 #We increment the download the download count here to match the previous behaviour.
556 self.increment_downloads()
557
558 info_dict['fulltitle'] = info_dict['title']
559 if len(info_dict['title']) > 200:
560 info_dict['title'] = info_dict['title'][:197] + u'...'
561
562 # Keep for backwards compatibility
563 info_dict['stitle'] = info_dict['title']
564
565 if not 'format' in info_dict:
566 info_dict['format'] = info_dict['ext']
567
568 reason = self._match_entry(info_dict)
569 if reason is not None:
570 self.to_screen(u'[download] ' + reason)
571 return
572
573 max_downloads = self.params.get('max_downloads')
574 if max_downloads is not None:
575 if self._num_downloads > int(max_downloads):
576 raise MaxDownloadsReached()
577
578 filename = self.prepare_filename(info_dict)
579
580 # Forced printings
581 if self.params.get('forcetitle', False):
582 compat_print(info_dict['title'])
583 if self.params.get('forceid', False):
584 compat_print(info_dict['id'])
585 if self.params.get('forceurl', False):
586 # For RTMP URLs, also include the playpath
587 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
588 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
589 compat_print(info_dict['thumbnail'])
590 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
591 compat_print(info_dict['description'])
592 if self.params.get('forcefilename', False) and filename is not None:
593 compat_print(filename)
594 if self.params.get('forceformat', False):
595 compat_print(info_dict['format'])
596
597 # Do nothing else if in simulate mode
598 if self.params.get('simulate', False):
599 return
600
601 if filename is None:
602 return
603
604 try:
605 dn = os.path.dirname(encodeFilename(filename))
606 if dn != '' and not os.path.exists(dn):
607 os.makedirs(dn)
608 except (OSError, IOError) as err:
609 self.report_error(u'unable to create directory ' + compat_str(err))
610 return
611
612 if self.params.get('writedescription', False):
613 try:
614 descfn = filename + u'.description'
615 self.report_writedescription(descfn)
616 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
617 descfile.write(info_dict['description'])
618 except (KeyError, TypeError):
619 self.report_warning(u'There\'s no description to write.')
620 except (OSError, IOError):
621 self.report_error(u'Cannot write description file ' + descfn)
622 return
623
624 if self.params.get('writeannotations', False):
625 try:
626 annofn = filename + u'.annotations.xml'
627 self.report_writeannotations(annofn)
628 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
629 annofile.write(info_dict['annotations'])
630 except (KeyError, TypeError):
631 self.report_warning(u'There are no annotations to write.')
632 except (OSError, IOError):
633 self.report_error(u'Cannot write annotations file: ' + annofn)
634 return
635
636 subtitles_are_requested = any([self.params.get('writesubtitles', False),
637 self.params.get('writeautomaticsub')])
638
639 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
640 # subtitles download errors are already managed as troubles in relevant IE
641 # that way it will silently go on when used with unsupporting IE
642 subtitles = info_dict['subtitles']
643 sub_format = self.params.get('subtitlesformat')
644 for sub_lang in subtitles.keys():
645 sub = subtitles[sub_lang]
646 if sub is None:
647 continue
648 try:
649 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
650 self.report_writesubtitles(sub_filename)
651 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
652 subfile.write(sub)
653 except (OSError, IOError):
654 self.report_error(u'Cannot write subtitles file ' + descfn)
655 return
656
657 if self.params.get('writeinfojson', False):
658 infofn = filename + u'.info.json'
659 self.report_writeinfojson(infofn)
660 try:
661 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
662 write_json_file(json_info_dict, encodeFilename(infofn))
663 except (OSError, IOError):
664 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
665 return
666
667 if self.params.get('writethumbnail', False):
668 if info_dict.get('thumbnail') is not None:
669 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
670 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
671 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
672 (info_dict['extractor'], info_dict['id']))
673 try:
674 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
675 with open(thumb_filename, 'wb') as thumbf:
676 shutil.copyfileobj(uf, thumbf)
677 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
678 (info_dict['extractor'], info_dict['id'], thumb_filename))
679 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
680 self.report_warning(u'Unable to download thumbnail "%s": %s' %
681 (info_dict['thumbnail'], compat_str(err)))
682
683 if not self.params.get('skip_download', False):
684 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
685 success = True
686 else:
687 try:
688 success = self.fd._do_download(filename, info_dict)
689 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
690 self.report_error(u'unable to download video data: %s' % str(err))
691 return
692 except (OSError, IOError) as err:
693 raise UnavailableVideoError(err)
694 except (ContentTooShortError, ) as err:
695 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
696 return
697
698 if success:
699 try:
700 self.post_process(filename, info_dict)
701 except (PostProcessingError) as err:
702 self.report_error(u'postprocessing: %s' % str(err))
703 return
704
705 self.record_download_archive(info_dict)
706
707 def download(self, url_list):
708 """Download a given list of URLs."""
709 if len(url_list) > 1 and self.fixed_template():
710 raise SameFileError(self.params['outtmpl'])
711
712 for url in url_list:
713 try:
714 #It also downloads the videos
715 videos = self.extract_info(url)
716 except UnavailableVideoError:
717 self.report_error(u'unable to download video')
718 except MaxDownloadsReached:
719 self.to_screen(u'[info] Maximum number of downloaded files reached.')
720 raise
721
722 return self._download_retcode
723
724 def post_process(self, filename, ie_info):
725 """Run all the postprocessors on the given file."""
726 info = dict(ie_info)
727 info['filepath'] = filename
728 keep_video = None
729 for pp in self._pps:
730 try:
731 keep_video_wish, new_info = pp.run(info)
732 if keep_video_wish is not None:
733 if keep_video_wish:
734 keep_video = keep_video_wish
735 elif keep_video is None:
736 # No clear decision yet, let IE decide
737 keep_video = keep_video_wish
738 except PostProcessingError as e:
739 self.report_error(e.msg)
740 if keep_video is False and not self.params.get('keepvideo', False):
741 try:
742 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
743 os.remove(encodeFilename(filename))
744 except (IOError, OSError):
745 self.report_warning(u'Unable to remove downloaded video file')
746
747 def in_download_archive(self, info_dict):
748 fn = self.params.get('download_archive')
749 if fn is None:
750 return False
751 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
752 try:
753 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
754 for line in archive_file:
755 if line.strip() == vid_id:
756 return True
757 except IOError as ioe:
758 if ioe.errno != errno.ENOENT:
759 raise
760 return False
761
762 def record_download_archive(self, info_dict):
763 fn = self.params.get('download_archive')
764 if fn is None:
765 return
766 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
767 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
768 archive_file.write(vid_id + u'\n')
769
770 @staticmethod
771 def format_resolution(format, default='unknown'):
772 if format.get('_resolution') is not None:
773 return format['_resolution']
774 if format.get('height') is not None:
775 if format.get('width') is not None:
776 res = u'%sx%s' % (format['width'], format['height'])
777 else:
778 res = u'%sp' % format['height']
779 else:
780 res = default
781 return res
782
783 def list_formats(self, info_dict):
784 def line(format):
785 return (u'%-20s%-10s%-12s%s' % (
786 format['format_id'],
787 format['ext'],
788 self.format_resolution(format),
789 format.get('format_note', ''),
790 )
791 )
792
793 formats = info_dict.get('formats', [info_dict])
794 formats_s = list(map(line, formats))
795 if len(formats) > 1:
796 formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)'
797 formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)'
798
799 header_line = line({
800 'format_id': u'format code', 'ext': u'extension',
801 '_resolution': u'resolution', 'format_note': u'note'})
802 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
803 (info_dict['id'], header_line, u"\n".join(formats_s)))