]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
Merge tag 'upstream/2014.06.19'
[youtubedl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import json
11 import locale
12 import os
13 import platform
14 import re
15 import shutil
16 import subprocess
17 import socket
18 import sys
19 import time
20 import traceback
21
22 if os.name == 'nt':
23 import ctypes
24
25 from .utils import (
26 compat_cookiejar,
27 compat_http_client,
28 compat_str,
29 compat_urllib_error,
30 compat_urllib_request,
31 ContentTooShortError,
32 date_from_str,
33 DateRange,
34 DEFAULT_OUTTMPL,
35 determine_ext,
36 DownloadError,
37 encodeFilename,
38 ExtractorError,
39 format_bytes,
40 formatSeconds,
41 get_term_width,
42 locked_file,
43 make_HTTPS_handler,
44 MaxDownloadsReached,
45 PagedList,
46 PostProcessingError,
47 platform_name,
48 preferredencoding,
49 SameFileError,
50 sanitize_filename,
51 subtitles_filename,
52 takewhile_inclusive,
53 UnavailableVideoError,
54 url_basename,
55 write_json_file,
56 write_string,
57 YoutubeDLHandler,
58 prepend_extension,
59 )
60 from .extractor import get_info_extractor, gen_extractors
61 from .downloader import get_suitable_downloader
62 from .postprocessor import FFmpegMergerPP
63 from .version import __version__
64
65
66 class YoutubeDL(object):
67 """YoutubeDL class.
68
69 YoutubeDL objects are the ones responsible of downloading the
70 actual video file and writing it to disk if the user has requested
71 it, among some other tasks. In most cases there should be one per
72 program. As, given a video URL, the downloader doesn't know how to
73 extract all the needed information, task that InfoExtractors do, it
74 has to pass the URL to one of them.
75
76 For this, YoutubeDL objects have a method that allows
77 InfoExtractors to be registered in a given order. When it is passed
78 a URL, the YoutubeDL object handles it to the first InfoExtractor it
79 finds that reports being able to handle it. The InfoExtractor extracts
80 all the information about the video or videos the URL refers to, and
81 YoutubeDL process the extracted information, possibly using a File
82 Downloader to download the video.
83
84 YoutubeDL objects accept a lot of parameters. In order not to saturate
85 the object constructor with arguments, it receives a dictionary of
86 options instead. These options are available through the params
87 attribute for the InfoExtractors to use. The YoutubeDL also
88 registers itself as the downloader in charge for the InfoExtractors
89 that are added to it, so this is a "mutual registration".
90
91 Available options:
92
93 username: Username for authentication purposes.
94 password: Password for authentication purposes.
95 videopassword: Password for acces a video.
96 usenetrc: Use netrc for authentication instead.
97 verbose: Print additional info to stdout.
98 quiet: Do not print messages to stdout.
99 no_warnings: Do not print out anything for warnings.
100 forceurl: Force printing final URL.
101 forcetitle: Force printing title.
102 forceid: Force printing ID.
103 forcethumbnail: Force printing thumbnail URL.
104 forcedescription: Force printing description.
105 forcefilename: Force printing final filename.
106 forceduration: Force printing duration.
107 forcejson: Force printing info_dict as JSON.
108 simulate: Do not download the video files.
109 format: Video format code.
110 format_limit: Highest quality format to try.
111 outtmpl: Template for output names.
112 restrictfilenames: Do not allow "&" and spaces in file names
113 ignoreerrors: Do not stop on download errors.
114 nooverwrites: Prevent overwriting files.
115 playliststart: Playlist item to start at.
116 playlistend: Playlist item to end at.
117 matchtitle: Download only matching titles.
118 rejecttitle: Reject downloads for matching titles.
119 logger: Log messages to a logging.Logger instance.
120 logtostderr: Log messages to stderr instead of stdout.
121 writedescription: Write the video description to a .description file
122 writeinfojson: Write the video description to a .info.json file
123 writeannotations: Write the video annotations to a .annotations.xml file
124 writethumbnail: Write the thumbnail image to a file
125 writesubtitles: Write the video subtitles to a file
126 writeautomaticsub: Write the automatic subtitles to a file
127 allsubtitles: Downloads all the subtitles of the video
128 (requires writesubtitles or writeautomaticsub)
129 listsubtitles: Lists all available subtitles for the video
130 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
131 subtitleslangs: List of languages of the subtitles to download
132 keepvideo: Keep the video file after post-processing
133 daterange: A DateRange object, download only if the upload_date is in the range.
134 skip_download: Skip the actual download of the video file
135 cachedir: Location of the cache files in the filesystem.
136 None to disable filesystem cache.
137 noplaylist: Download single video instead of a playlist if in doubt.
138 age_limit: An integer representing the user's age in years.
139 Unsuitable videos for the given age are skipped.
140 min_views: An integer representing the minimum view count the video
141 must have in order to not be skipped.
142 Videos without view count information are always
143 downloaded. None for no limit.
144 max_views: An integer representing the maximum view count.
145 Videos that are more popular than that are not
146 downloaded.
147 Videos without view count information are always
148 downloaded. None for no limit.
149 download_archive: File name of a file where all downloads are recorded.
150 Videos already present in the file are not downloaded
151 again.
152 cookiefile: File name where cookies should be read from and dumped to.
153 nocheckcertificate:Do not verify SSL certificates
154 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
155 At the moment, this is only supported by YouTube.
156 proxy: URL of the proxy server to use
157 socket_timeout: Time to wait for unresponsive hosts, in seconds
158 bidi_workaround: Work around buggy terminals without bidirectional text
159 support, using fridibi
160 debug_printtraffic:Print out sent and received HTTP traffic
161 include_ads: Download ads as well
162 default_search: Prepend this string if an input url is not valid.
163 'auto' for elaborate guessing
164 encoding: Use this encoding instead of the system-specified.
165
166 The following parameters are not used by YoutubeDL itself, they are used by
167 the FileDownloader:
168 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
169 noresizebuffer, retries, continuedl, noprogress, consoletitle
170
171 The following options are used by the post processors:
172 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
173 otherwise prefer avconv.
174 """
175
176 params = None
177 _ies = []
178 _pps = []
179 _download_retcode = None
180 _num_downloads = None
181 _screen_file = None
182
183 def __init__(self, params=None):
184 """Create a FileDownloader object with the given options."""
185 if params is None:
186 params = {}
187 self._ies = []
188 self._ies_instances = {}
189 self._pps = []
190 self._progress_hooks = []
191 self._download_retcode = 0
192 self._num_downloads = 0
193 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
194 self._err_file = sys.stderr
195 self.params = params
196
197 if params.get('bidi_workaround', False):
198 try:
199 import pty
200 master, slave = pty.openpty()
201 width = get_term_width()
202 if width is None:
203 width_args = []
204 else:
205 width_args = ['-w', str(width)]
206 sp_kwargs = dict(
207 stdin=subprocess.PIPE,
208 stdout=slave,
209 stderr=self._err_file)
210 try:
211 self._output_process = subprocess.Popen(
212 ['bidiv'] + width_args, **sp_kwargs
213 )
214 except OSError:
215 self._output_process = subprocess.Popen(
216 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
217 self._output_channel = os.fdopen(master, 'rb')
218 except OSError as ose:
219 if ose.errno == 2:
220 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
221 else:
222 raise
223
224 if (sys.version_info >= (3,) and sys.platform != 'win32' and
225 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
226 and not params['restrictfilenames']):
227 # On Python 3, the Unicode filesystem API will throw errors (#1474)
228 self.report_warning(
229 'Assuming --restrict-filenames since file system encoding '
230 'cannot encode all charactes. '
231 'Set the LC_ALL environment variable to fix this.')
232 self.params['restrictfilenames'] = True
233
234 if '%(stitle)s' in self.params.get('outtmpl', ''):
235 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
236
237 self._setup_opener()
238
239 def add_info_extractor(self, ie):
240 """Add an InfoExtractor object to the end of the list."""
241 self._ies.append(ie)
242 self._ies_instances[ie.ie_key()] = ie
243 ie.set_downloader(self)
244
245 def get_info_extractor(self, ie_key):
246 """
247 Get an instance of an IE with name ie_key, it will try to get one from
248 the _ies list, if there's no instance it will create a new one and add
249 it to the extractor list.
250 """
251 ie = self._ies_instances.get(ie_key)
252 if ie is None:
253 ie = get_info_extractor(ie_key)()
254 self.add_info_extractor(ie)
255 return ie
256
257 def add_default_info_extractors(self):
258 """
259 Add the InfoExtractors returned by gen_extractors to the end of the list
260 """
261 for ie in gen_extractors():
262 self.add_info_extractor(ie)
263
264 def add_post_processor(self, pp):
265 """Add a PostProcessor object to the end of the chain."""
266 self._pps.append(pp)
267 pp.set_downloader(self)
268
269 def add_progress_hook(self, ph):
270 """Add the progress hook (currently only for the file downloader)"""
271 self._progress_hooks.append(ph)
272
273 def _bidi_workaround(self, message):
274 if not hasattr(self, '_output_channel'):
275 return message
276
277 assert hasattr(self, '_output_process')
278 assert type(message) == type('')
279 line_count = message.count('\n') + 1
280 self._output_process.stdin.write((message + '\n').encode('utf-8'))
281 self._output_process.stdin.flush()
282 res = ''.join(self._output_channel.readline().decode('utf-8')
283 for _ in range(line_count))
284 return res[:-len('\n')]
285
286 def to_screen(self, message, skip_eol=False):
287 """Print message to stdout if not in quiet mode."""
288 return self.to_stdout(message, skip_eol, check_quiet=True)
289
290 def _write_string(self, s, out=None):
291 write_string(s, out=out, encoding=self.params.get('encoding'))
292
293 def to_stdout(self, message, skip_eol=False, check_quiet=False):
294 """Print message to stdout if not in quiet mode."""
295 if self.params.get('logger'):
296 self.params['logger'].debug(message)
297 elif not check_quiet or not self.params.get('quiet', False):
298 message = self._bidi_workaround(message)
299 terminator = ['\n', ''][skip_eol]
300 output = message + terminator
301
302 self._write_string(output, self._screen_file)
303
304 def to_stderr(self, message):
305 """Print message to stderr."""
306 assert type(message) == type('')
307 if self.params.get('logger'):
308 self.params['logger'].error(message)
309 else:
310 message = self._bidi_workaround(message)
311 output = message + '\n'
312 self._write_string(output, self._err_file)
313
314 def to_console_title(self, message):
315 if not self.params.get('consoletitle', False):
316 return
317 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
318 # c_wchar_p() might not be necessary if `message` is
319 # already of type unicode()
320 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
321 elif 'TERM' in os.environ:
322 self._write_string('\033]0;%s\007' % message, self._screen_file)
323
324 def save_console_title(self):
325 if not self.params.get('consoletitle', False):
326 return
327 if 'TERM' in os.environ:
328 # Save the title on stack
329 self._write_string('\033[22;0t', self._screen_file)
330
331 def restore_console_title(self):
332 if not self.params.get('consoletitle', False):
333 return
334 if 'TERM' in os.environ:
335 # Restore the title from stack
336 self._write_string('\033[23;0t', self._screen_file)
337
338 def __enter__(self):
339 self.save_console_title()
340 return self
341
342 def __exit__(self, *args):
343 self.restore_console_title()
344
345 if self.params.get('cookiefile') is not None:
346 self.cookiejar.save()
347
348 def trouble(self, message=None, tb=None):
349 """Determine action to take when a download problem appears.
350
351 Depending on if the downloader has been configured to ignore
352 download errors or not, this method may throw an exception or
353 not when errors are found, after printing the message.
354
355 tb, if given, is additional traceback information.
356 """
357 if message is not None:
358 self.to_stderr(message)
359 if self.params.get('verbose'):
360 if tb is None:
361 if sys.exc_info()[0]: # if .trouble has been called from an except block
362 tb = ''
363 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
364 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
365 tb += compat_str(traceback.format_exc())
366 else:
367 tb_data = traceback.format_list(traceback.extract_stack())
368 tb = ''.join(tb_data)
369 self.to_stderr(tb)
370 if not self.params.get('ignoreerrors', False):
371 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
372 exc_info = sys.exc_info()[1].exc_info
373 else:
374 exc_info = sys.exc_info()
375 raise DownloadError(message, exc_info)
376 self._download_retcode = 1
377
378 def report_warning(self, message):
379 '''
380 Print the message to stderr, it will be prefixed with 'WARNING:'
381 If stderr is a tty file the 'WARNING:' will be colored
382 '''
383 if self.params.get('logger') is not None:
384 self.params['logger'].warning(message)
385 else:
386 if self.params.get('no_warnings'):
387 return
388 if self._err_file.isatty() and os.name != 'nt':
389 _msg_header = '\033[0;33mWARNING:\033[0m'
390 else:
391 _msg_header = 'WARNING:'
392 warning_message = '%s %s' % (_msg_header, message)
393 self.to_stderr(warning_message)
394
395 def report_error(self, message, tb=None):
396 '''
397 Do the same as trouble, but prefixes the message with 'ERROR:', colored
398 in red if stderr is a tty file.
399 '''
400 if self._err_file.isatty() and os.name != 'nt':
401 _msg_header = '\033[0;31mERROR:\033[0m'
402 else:
403 _msg_header = 'ERROR:'
404 error_message = '%s %s' % (_msg_header, message)
405 self.trouble(error_message, tb)
406
407 def report_file_already_downloaded(self, file_name):
408 """Report file has already been fully downloaded."""
409 try:
410 self.to_screen('[download] %s has already been downloaded' % file_name)
411 except UnicodeEncodeError:
412 self.to_screen('[download] The file has already been downloaded')
413
414 def prepare_filename(self, info_dict):
415 """Generate the output filename."""
416 try:
417 template_dict = dict(info_dict)
418
419 template_dict['epoch'] = int(time.time())
420 autonumber_size = self.params.get('autonumber_size')
421 if autonumber_size is None:
422 autonumber_size = 5
423 autonumber_templ = '%0' + str(autonumber_size) + 'd'
424 template_dict['autonumber'] = autonumber_templ % self._num_downloads
425 if template_dict.get('playlist_index') is not None:
426 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
427 if template_dict.get('resolution') is None:
428 if template_dict.get('width') and template_dict.get('height'):
429 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
430 elif template_dict.get('height'):
431 template_dict['resolution'] = '%sp' % template_dict['height']
432 elif template_dict.get('width'):
433 template_dict['resolution'] = '?x%d' % template_dict['width']
434
435 sanitize = lambda k, v: sanitize_filename(
436 compat_str(v),
437 restricted=self.params.get('restrictfilenames'),
438 is_id=(k == 'id'))
439 template_dict = dict((k, sanitize(k, v))
440 for k, v in template_dict.items()
441 if v is not None)
442 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
443
444 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
445 tmpl = os.path.expanduser(outtmpl)
446 filename = tmpl % template_dict
447 return filename
448 except ValueError as err:
449 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
450 return None
451
452 def _match_entry(self, info_dict):
453 """ Returns None iff the file should be downloaded """
454
455 video_title = info_dict.get('title', info_dict.get('id', 'video'))
456 if 'title' in info_dict:
457 # This can happen when we're just evaluating the playlist
458 title = info_dict['title']
459 matchtitle = self.params.get('matchtitle', False)
460 if matchtitle:
461 if not re.search(matchtitle, title, re.IGNORECASE):
462 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
463 rejecttitle = self.params.get('rejecttitle', False)
464 if rejecttitle:
465 if re.search(rejecttitle, title, re.IGNORECASE):
466 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
467 date = info_dict.get('upload_date', None)
468 if date is not None:
469 dateRange = self.params.get('daterange', DateRange())
470 if date not in dateRange:
471 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
472 view_count = info_dict.get('view_count', None)
473 if view_count is not None:
474 min_views = self.params.get('min_views')
475 if min_views is not None and view_count < min_views:
476 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
477 max_views = self.params.get('max_views')
478 if max_views is not None and view_count > max_views:
479 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
480 age_limit = self.params.get('age_limit')
481 if age_limit is not None:
482 if age_limit < info_dict.get('age_limit', 0):
483 return 'Skipping "' + title + '" because it is age restricted'
484 if self.in_download_archive(info_dict):
485 return '%s has already been recorded in archive' % video_title
486 return None
487
488 @staticmethod
489 def add_extra_info(info_dict, extra_info):
490 '''Set the keys from extra_info in info dict if they are missing'''
491 for key, value in extra_info.items():
492 info_dict.setdefault(key, value)
493
494 def extract_info(self, url, download=True, ie_key=None, extra_info={},
495 process=True):
496 '''
497 Returns a list with a dictionary for each video we find.
498 If 'download', also downloads the videos.
499 extra_info is a dict containing the extra values to add to each result
500 '''
501
502 if ie_key:
503 ies = [self.get_info_extractor(ie_key)]
504 else:
505 ies = self._ies
506
507 for ie in ies:
508 if not ie.suitable(url):
509 continue
510
511 if not ie.working():
512 self.report_warning('The program functionality for this site has been marked as broken, '
513 'and will probably not work.')
514
515 try:
516 ie_result = ie.extract(url)
517 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
518 break
519 if isinstance(ie_result, list):
520 # Backwards compatibility: old IE result format
521 ie_result = {
522 '_type': 'compat_list',
523 'entries': ie_result,
524 }
525 self.add_default_extra_info(ie_result, ie, url)
526 if process:
527 return self.process_ie_result(ie_result, download, extra_info)
528 else:
529 return ie_result
530 except ExtractorError as de: # An error we somewhat expected
531 self.report_error(compat_str(de), de.format_traceback())
532 break
533 except MaxDownloadsReached:
534 raise
535 except Exception as e:
536 if self.params.get('ignoreerrors', False):
537 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
538 break
539 else:
540 raise
541 else:
542 self.report_error('no suitable InfoExtractor for URL %s' % url)
543
544 def add_default_extra_info(self, ie_result, ie, url):
545 self.add_extra_info(ie_result, {
546 'extractor': ie.IE_NAME,
547 'webpage_url': url,
548 'webpage_url_basename': url_basename(url),
549 'extractor_key': ie.ie_key(),
550 })
551
552 def process_ie_result(self, ie_result, download=True, extra_info={}):
553 """
554 Take the result of the ie(may be modified) and resolve all unresolved
555 references (URLs, playlist items).
556
557 It will also download the videos if 'download'.
558 Returns the resolved ie_result.
559 """
560
561 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
562 if result_type == 'video':
563 self.add_extra_info(ie_result, extra_info)
564 return self.process_video_result(ie_result, download=download)
565 elif result_type == 'url':
566 # We have to add extra_info to the results because it may be
567 # contained in a playlist
568 return self.extract_info(ie_result['url'],
569 download,
570 ie_key=ie_result.get('ie_key'),
571 extra_info=extra_info)
572 elif result_type == 'url_transparent':
573 # Use the information from the embedding page
574 info = self.extract_info(
575 ie_result['url'], ie_key=ie_result.get('ie_key'),
576 extra_info=extra_info, download=False, process=False)
577
578 def make_result(embedded_info):
579 new_result = ie_result.copy()
580 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
581 'entries', 'ie_key', 'duration',
582 'subtitles', 'annotations', 'format',
583 'thumbnail', 'thumbnails'):
584 if f in new_result:
585 del new_result[f]
586 if f in embedded_info:
587 new_result[f] = embedded_info[f]
588 return new_result
589 new_result = make_result(info)
590
591 assert new_result.get('_type') != 'url_transparent'
592 if new_result.get('_type') == 'compat_list':
593 new_result['entries'] = [
594 make_result(e) for e in new_result['entries']]
595
596 return self.process_ie_result(
597 new_result, download=download, extra_info=extra_info)
598 elif result_type == 'playlist':
599 # We process each entry in the playlist
600 playlist = ie_result.get('title', None) or ie_result.get('id', None)
601 self.to_screen('[download] Downloading playlist: %s' % playlist)
602
603 playlist_results = []
604
605 playliststart = self.params.get('playliststart', 1) - 1
606 playlistend = self.params.get('playlistend', None)
607 # For backwards compatibility, interpret -1 as whole list
608 if playlistend == -1:
609 playlistend = None
610
611 if isinstance(ie_result['entries'], list):
612 n_all_entries = len(ie_result['entries'])
613 entries = ie_result['entries'][playliststart:playlistend]
614 n_entries = len(entries)
615 self.to_screen(
616 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
617 (ie_result['extractor'], playlist, n_all_entries, n_entries))
618 else:
619 assert isinstance(ie_result['entries'], PagedList)
620 entries = ie_result['entries'].getslice(
621 playliststart, playlistend)
622 n_entries = len(entries)
623 self.to_screen(
624 "[%s] playlist %s: Downloading %d videos" %
625 (ie_result['extractor'], playlist, n_entries))
626
627 for i, entry in enumerate(entries, 1):
628 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
629 extra = {
630 'playlist': playlist,
631 'playlist_index': i + playliststart,
632 'extractor': ie_result['extractor'],
633 'webpage_url': ie_result['webpage_url'],
634 'webpage_url_basename': url_basename(ie_result['webpage_url']),
635 'extractor_key': ie_result['extractor_key'],
636 }
637
638 reason = self._match_entry(entry)
639 if reason is not None:
640 self.to_screen('[download] ' + reason)
641 continue
642
643 entry_result = self.process_ie_result(entry,
644 download=download,
645 extra_info=extra)
646 playlist_results.append(entry_result)
647 ie_result['entries'] = playlist_results
648 return ie_result
649 elif result_type == 'compat_list':
650 def _fixup(r):
651 self.add_extra_info(r,
652 {
653 'extractor': ie_result['extractor'],
654 'webpage_url': ie_result['webpage_url'],
655 'webpage_url_basename': url_basename(ie_result['webpage_url']),
656 'extractor_key': ie_result['extractor_key'],
657 })
658 return r
659 ie_result['entries'] = [
660 self.process_ie_result(_fixup(r), download, extra_info)
661 for r in ie_result['entries']
662 ]
663 return ie_result
664 else:
665 raise Exception('Invalid result type: %s' % result_type)
666
667 def select_format(self, format_spec, available_formats):
668 if format_spec == 'best' or format_spec is None:
669 return available_formats[-1]
670 elif format_spec == 'worst':
671 return available_formats[0]
672 elif format_spec == 'bestaudio':
673 audio_formats = [
674 f for f in available_formats
675 if f.get('vcodec') == 'none']
676 if audio_formats:
677 return audio_formats[-1]
678 elif format_spec == 'worstaudio':
679 audio_formats = [
680 f for f in available_formats
681 if f.get('vcodec') == 'none']
682 if audio_formats:
683 return audio_formats[0]
684 elif format_spec == 'bestvideo':
685 video_formats = [
686 f for f in available_formats
687 if f.get('acodec') == 'none']
688 if video_formats:
689 return video_formats[-1]
690 elif format_spec == 'worstvideo':
691 video_formats = [
692 f for f in available_formats
693 if f.get('acodec') == 'none']
694 if video_formats:
695 return video_formats[0]
696 else:
697 extensions = ['mp4', 'flv', 'webm', '3gp']
698 if format_spec in extensions:
699 filter_f = lambda f: f['ext'] == format_spec
700 else:
701 filter_f = lambda f: f['format_id'] == format_spec
702 matches = list(filter(filter_f, available_formats))
703 if matches:
704 return matches[-1]
705 return None
706
707 def process_video_result(self, info_dict, download=True):
708 assert info_dict.get('_type', 'video') == 'video'
709
710 if 'id' not in info_dict:
711 raise ExtractorError('Missing "id" field in extractor result')
712 if 'title' not in info_dict:
713 raise ExtractorError('Missing "title" field in extractor result')
714
715 if 'playlist' not in info_dict:
716 # It isn't part of a playlist
717 info_dict['playlist'] = None
718 info_dict['playlist_index'] = None
719
720 thumbnails = info_dict.get('thumbnails')
721 if thumbnails:
722 thumbnails.sort(key=lambda t: (
723 t.get('width'), t.get('height'), t.get('url')))
724 for t in thumbnails:
725 if 'width' in t and 'height' in t:
726 t['resolution'] = '%dx%d' % (t['width'], t['height'])
727
728 if thumbnails and 'thumbnail' not in info_dict:
729 info_dict['thumbnail'] = thumbnails[-1]['url']
730
731 if 'display_id' not in info_dict and 'id' in info_dict:
732 info_dict['display_id'] = info_dict['id']
733
734 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
735 upload_date = datetime.datetime.utcfromtimestamp(
736 info_dict['timestamp'])
737 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
738
739 # This extractors handle format selection themselves
740 if info_dict['extractor'] in ['Youku']:
741 if download:
742 self.process_info(info_dict)
743 return info_dict
744
745 # We now pick which formats have to be downloaded
746 if info_dict.get('formats') is None:
747 # There's only one format available
748 formats = [info_dict]
749 else:
750 formats = info_dict['formats']
751
752 if not formats:
753 raise ExtractorError('No video formats found!')
754
755 # We check that all the formats have the format and format_id fields
756 for i, format in enumerate(formats):
757 if 'url' not in format:
758 raise ExtractorError('Missing "url" key in result (index %d)' % i)
759
760 if format.get('format_id') is None:
761 format['format_id'] = compat_str(i)
762 if format.get('format') is None:
763 format['format'] = '{id} - {res}{note}'.format(
764 id=format['format_id'],
765 res=self.format_resolution(format),
766 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
767 )
768 # Automatically determine file extension if missing
769 if 'ext' not in format:
770 format['ext'] = determine_ext(format['url']).lower()
771
772 format_limit = self.params.get('format_limit', None)
773 if format_limit:
774 formats = list(takewhile_inclusive(
775 lambda f: f['format_id'] != format_limit, formats
776 ))
777
778 # TODO Central sorting goes here
779
780 if formats[0] is not info_dict:
781 # only set the 'formats' fields if the original info_dict list them
782 # otherwise we end up with a circular reference, the first (and unique)
783 # element in the 'formats' field in info_dict is info_dict itself,
784 # wich can't be exported to json
785 info_dict['formats'] = formats
786 if self.params.get('listformats', None):
787 self.list_formats(info_dict)
788 return
789
790 req_format = self.params.get('format')
791 if req_format is None:
792 req_format = 'best'
793 formats_to_download = []
794 # The -1 is for supporting YoutubeIE
795 if req_format in ('-1', 'all'):
796 formats_to_download = formats
797 else:
798 # We can accept formats requested in the format: 34/5/best, we pick
799 # the first that is available, starting from left
800 req_formats = req_format.split('/')
801 for rf in req_formats:
802 if re.match(r'.+?\+.+?', rf) is not None:
803 # Two formats have been requested like '137+139'
804 format_1, format_2 = rf.split('+')
805 formats_info = (self.select_format(format_1, formats),
806 self.select_format(format_2, formats))
807 if all(formats_info):
808 selected_format = {
809 'requested_formats': formats_info,
810 'format': rf,
811 'ext': formats_info[0]['ext'],
812 }
813 else:
814 selected_format = None
815 else:
816 selected_format = self.select_format(rf, formats)
817 if selected_format is not None:
818 formats_to_download = [selected_format]
819 break
820 if not formats_to_download:
821 raise ExtractorError('requested format not available',
822 expected=True)
823
824 if download:
825 if len(formats_to_download) > 1:
826 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
827 for format in formats_to_download:
828 new_info = dict(info_dict)
829 new_info.update(format)
830 self.process_info(new_info)
831 # We update the info dict with the best quality format (backwards compatibility)
832 info_dict.update(formats_to_download[-1])
833 return info_dict
834
835 def process_info(self, info_dict):
836 """Process a single resolved IE result."""
837
838 assert info_dict.get('_type', 'video') == 'video'
839
840 max_downloads = self.params.get('max_downloads')
841 if max_downloads is not None:
842 if self._num_downloads >= int(max_downloads):
843 raise MaxDownloadsReached()
844
845 info_dict['fulltitle'] = info_dict['title']
846 if len(info_dict['title']) > 200:
847 info_dict['title'] = info_dict['title'][:197] + '...'
848
849 # Keep for backwards compatibility
850 info_dict['stitle'] = info_dict['title']
851
852 if not 'format' in info_dict:
853 info_dict['format'] = info_dict['ext']
854
855 reason = self._match_entry(info_dict)
856 if reason is not None:
857 self.to_screen('[download] ' + reason)
858 return
859
860 self._num_downloads += 1
861
862 filename = self.prepare_filename(info_dict)
863
864 # Forced printings
865 if self.params.get('forcetitle', False):
866 self.to_stdout(info_dict['fulltitle'])
867 if self.params.get('forceid', False):
868 self.to_stdout(info_dict['id'])
869 if self.params.get('forceurl', False):
870 # For RTMP URLs, also include the playpath
871 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
872 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
873 self.to_stdout(info_dict['thumbnail'])
874 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
875 self.to_stdout(info_dict['description'])
876 if self.params.get('forcefilename', False) and filename is not None:
877 self.to_stdout(filename)
878 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
879 self.to_stdout(formatSeconds(info_dict['duration']))
880 if self.params.get('forceformat', False):
881 self.to_stdout(info_dict['format'])
882 if self.params.get('forcejson', False):
883 info_dict['_filename'] = filename
884 self.to_stdout(json.dumps(info_dict))
885
886 # Do nothing else if in simulate mode
887 if self.params.get('simulate', False):
888 return
889
890 if filename is None:
891 return
892
893 try:
894 dn = os.path.dirname(encodeFilename(filename))
895 if dn and not os.path.exists(dn):
896 os.makedirs(dn)
897 except (OSError, IOError) as err:
898 self.report_error('unable to create directory ' + compat_str(err))
899 return
900
901 if self.params.get('writedescription', False):
902 descfn = filename + '.description'
903 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
904 self.to_screen('[info] Video description is already present')
905 else:
906 try:
907 self.to_screen('[info] Writing video description to: ' + descfn)
908 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
909 descfile.write(info_dict['description'])
910 except (KeyError, TypeError):
911 self.report_warning('There\'s no description to write.')
912 except (OSError, IOError):
913 self.report_error('Cannot write description file ' + descfn)
914 return
915
916 if self.params.get('writeannotations', False):
917 annofn = filename + '.annotations.xml'
918 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
919 self.to_screen('[info] Video annotations are already present')
920 else:
921 try:
922 self.to_screen('[info] Writing video annotations to: ' + annofn)
923 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
924 annofile.write(info_dict['annotations'])
925 except (KeyError, TypeError):
926 self.report_warning('There are no annotations to write.')
927 except (OSError, IOError):
928 self.report_error('Cannot write annotations file: ' + annofn)
929 return
930
931 subtitles_are_requested = any([self.params.get('writesubtitles', False),
932 self.params.get('writeautomaticsub')])
933
934 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
935 # subtitles download errors are already managed as troubles in relevant IE
936 # that way it will silently go on when used with unsupporting IE
937 subtitles = info_dict['subtitles']
938 sub_format = self.params.get('subtitlesformat', 'srt')
939 for sub_lang in subtitles.keys():
940 sub = subtitles[sub_lang]
941 if sub is None:
942 continue
943 try:
944 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
945 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
946 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
947 else:
948 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
949 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
950 subfile.write(sub)
951 except (OSError, IOError):
952 self.report_error('Cannot write subtitles file ' + sub_filename)
953 return
954
955 if self.params.get('writeinfojson', False):
956 infofn = os.path.splitext(filename)[0] + '.info.json'
957 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
958 self.to_screen('[info] Video description metadata is already present')
959 else:
960 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
961 try:
962 write_json_file(info_dict, encodeFilename(infofn))
963 except (OSError, IOError):
964 self.report_error('Cannot write metadata to JSON file ' + infofn)
965 return
966
967 if self.params.get('writethumbnail', False):
968 if info_dict.get('thumbnail') is not None:
969 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
970 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
971 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
972 self.to_screen('[%s] %s: Thumbnail is already present' %
973 (info_dict['extractor'], info_dict['id']))
974 else:
975 self.to_screen('[%s] %s: Downloading thumbnail ...' %
976 (info_dict['extractor'], info_dict['id']))
977 try:
978 uf = self.urlopen(info_dict['thumbnail'])
979 with open(thumb_filename, 'wb') as thumbf:
980 shutil.copyfileobj(uf, thumbf)
981 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
982 (info_dict['extractor'], info_dict['id'], thumb_filename))
983 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
984 self.report_warning('Unable to download thumbnail "%s": %s' %
985 (info_dict['thumbnail'], compat_str(err)))
986
987 if not self.params.get('skip_download', False):
988 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
989 success = True
990 else:
991 try:
992 def dl(name, info):
993 fd = get_suitable_downloader(info)(self, self.params)
994 for ph in self._progress_hooks:
995 fd.add_progress_hook(ph)
996 return fd.download(name, info)
997 if info_dict.get('requested_formats') is not None:
998 downloaded = []
999 success = True
1000 merger = FFmpegMergerPP(self)
1001 if not merger._get_executable():
1002 postprocessors = []
1003 self.report_warning('You have requested multiple '
1004 'formats but ffmpeg or avconv are not installed.'
1005 ' The formats won\'t be merged')
1006 else:
1007 postprocessors = [merger]
1008 for f in info_dict['requested_formats']:
1009 new_info = dict(info_dict)
1010 new_info.update(f)
1011 fname = self.prepare_filename(new_info)
1012 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1013 downloaded.append(fname)
1014 partial_success = dl(fname, new_info)
1015 success = success and partial_success
1016 info_dict['__postprocessors'] = postprocessors
1017 info_dict['__files_to_merge'] = downloaded
1018 else:
1019 # Just a single file
1020 success = dl(filename, info_dict)
1021 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1022 self.report_error('unable to download video data: %s' % str(err))
1023 return
1024 except (OSError, IOError) as err:
1025 raise UnavailableVideoError(err)
1026 except (ContentTooShortError, ) as err:
1027 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1028 return
1029
1030 if success:
1031 try:
1032 self.post_process(filename, info_dict)
1033 except (PostProcessingError) as err:
1034 self.report_error('postprocessing: %s' % str(err))
1035 return
1036
1037 self.record_download_archive(info_dict)
1038
1039 def download(self, url_list):
1040 """Download a given list of URLs."""
1041 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1042 if (len(url_list) > 1 and
1043 '%' not in outtmpl
1044 and self.params.get('max_downloads') != 1):
1045 raise SameFileError(outtmpl)
1046
1047 for url in url_list:
1048 try:
1049 #It also downloads the videos
1050 self.extract_info(url)
1051 except UnavailableVideoError:
1052 self.report_error('unable to download video')
1053 except MaxDownloadsReached:
1054 self.to_screen('[info] Maximum number of downloaded files reached.')
1055 raise
1056
1057 return self._download_retcode
1058
1059 def download_with_info_file(self, info_filename):
1060 with io.open(info_filename, 'r', encoding='utf-8') as f:
1061 info = json.load(f)
1062 try:
1063 self.process_ie_result(info, download=True)
1064 except DownloadError:
1065 webpage_url = info.get('webpage_url')
1066 if webpage_url is not None:
1067 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1068 return self.download([webpage_url])
1069 else:
1070 raise
1071 return self._download_retcode
1072
1073 def post_process(self, filename, ie_info):
1074 """Run all the postprocessors on the given file."""
1075 info = dict(ie_info)
1076 info['filepath'] = filename
1077 keep_video = None
1078 pps_chain = []
1079 if ie_info.get('__postprocessors') is not None:
1080 pps_chain.extend(ie_info['__postprocessors'])
1081 pps_chain.extend(self._pps)
1082 for pp in pps_chain:
1083 try:
1084 keep_video_wish, new_info = pp.run(info)
1085 if keep_video_wish is not None:
1086 if keep_video_wish:
1087 keep_video = keep_video_wish
1088 elif keep_video is None:
1089 # No clear decision yet, let IE decide
1090 keep_video = keep_video_wish
1091 except PostProcessingError as e:
1092 self.report_error(e.msg)
1093 if keep_video is False and not self.params.get('keepvideo', False):
1094 try:
1095 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1096 os.remove(encodeFilename(filename))
1097 except (IOError, OSError):
1098 self.report_warning('Unable to remove downloaded video file')
1099
1100 def _make_archive_id(self, info_dict):
1101 # Future-proof against any change in case
1102 # and backwards compatibility with prior versions
1103 extractor = info_dict.get('extractor_key')
1104 if extractor is None:
1105 if 'id' in info_dict:
1106 extractor = info_dict.get('ie_key') # key in a playlist
1107 if extractor is None:
1108 return None # Incomplete video information
1109 return extractor.lower() + ' ' + info_dict['id']
1110
1111 def in_download_archive(self, info_dict):
1112 fn = self.params.get('download_archive')
1113 if fn is None:
1114 return False
1115
1116 vid_id = self._make_archive_id(info_dict)
1117 if vid_id is None:
1118 return False # Incomplete video information
1119
1120 try:
1121 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1122 for line in archive_file:
1123 if line.strip() == vid_id:
1124 return True
1125 except IOError as ioe:
1126 if ioe.errno != errno.ENOENT:
1127 raise
1128 return False
1129
1130 def record_download_archive(self, info_dict):
1131 fn = self.params.get('download_archive')
1132 if fn is None:
1133 return
1134 vid_id = self._make_archive_id(info_dict)
1135 assert vid_id
1136 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1137 archive_file.write(vid_id + '\n')
1138
1139 @staticmethod
1140 def format_resolution(format, default='unknown'):
1141 if format.get('vcodec') == 'none':
1142 return 'audio only'
1143 if format.get('resolution') is not None:
1144 return format['resolution']
1145 if format.get('height') is not None:
1146 if format.get('width') is not None:
1147 res = '%sx%s' % (format['width'], format['height'])
1148 else:
1149 res = '%sp' % format['height']
1150 elif format.get('width') is not None:
1151 res = '?x%d' % format['width']
1152 else:
1153 res = default
1154 return res
1155
1156 def _format_note(self, fdict):
1157 res = ''
1158 if fdict.get('ext') in ['f4f', 'f4m']:
1159 res += '(unsupported) '
1160 if fdict.get('format_note') is not None:
1161 res += fdict['format_note'] + ' '
1162 if fdict.get('tbr') is not None:
1163 res += '%4dk ' % fdict['tbr']
1164 if fdict.get('container') is not None:
1165 if res:
1166 res += ', '
1167 res += '%s container' % fdict['container']
1168 if (fdict.get('vcodec') is not None and
1169 fdict.get('vcodec') != 'none'):
1170 if res:
1171 res += ', '
1172 res += fdict['vcodec']
1173 if fdict.get('vbr') is not None:
1174 res += '@'
1175 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1176 res += 'video@'
1177 if fdict.get('vbr') is not None:
1178 res += '%4dk' % fdict['vbr']
1179 if fdict.get('acodec') is not None:
1180 if res:
1181 res += ', '
1182 if fdict['acodec'] == 'none':
1183 res += 'video only'
1184 else:
1185 res += '%-5s' % fdict['acodec']
1186 elif fdict.get('abr') is not None:
1187 if res:
1188 res += ', '
1189 res += 'audio'
1190 if fdict.get('abr') is not None:
1191 res += '@%3dk' % fdict['abr']
1192 if fdict.get('asr') is not None:
1193 res += ' (%5dHz)' % fdict['asr']
1194 if fdict.get('filesize') is not None:
1195 if res:
1196 res += ', '
1197 res += format_bytes(fdict['filesize'])
1198 return res
1199
1200 def list_formats(self, info_dict):
1201 def line(format, idlen=20):
1202 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1203 format['format_id'],
1204 format['ext'],
1205 self.format_resolution(format),
1206 self._format_note(format),
1207 ))
1208
1209 formats = info_dict.get('formats', [info_dict])
1210 idlen = max(len('format code'),
1211 max(len(f['format_id']) for f in formats))
1212 formats_s = [line(f, idlen) for f in formats]
1213 if len(formats) > 1:
1214 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1215 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1216
1217 header_line = line({
1218 'format_id': 'format code', 'ext': 'extension',
1219 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1220 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1221 (info_dict['id'], header_line, '\n'.join(formats_s)))
1222
1223 def urlopen(self, req):
1224 """ Start an HTTP download """
1225 return self._opener.open(req, timeout=self._socket_timeout)
1226
1227 def print_debug_header(self):
1228 if not self.params.get('verbose'):
1229 return
1230
1231 write_string(
1232 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1233 locale.getpreferredencoding(),
1234 sys.getfilesystemencoding(),
1235 sys.stdout.encoding,
1236 self.get_encoding()),
1237 encoding=None
1238 )
1239
1240 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1241 try:
1242 sp = subprocess.Popen(
1243 ['git', 'rev-parse', '--short', 'HEAD'],
1244 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1245 cwd=os.path.dirname(os.path.abspath(__file__)))
1246 out, err = sp.communicate()
1247 out = out.decode().strip()
1248 if re.match('[0-9a-f]+', out):
1249 self._write_string('[debug] Git HEAD: ' + out + '\n')
1250 except:
1251 try:
1252 sys.exc_clear()
1253 except:
1254 pass
1255 self._write_string('[debug] Python version %s - %s' %
1256 (platform.python_version(), platform_name()) + '\n')
1257
1258 proxy_map = {}
1259 for handler in self._opener.handlers:
1260 if hasattr(handler, 'proxies'):
1261 proxy_map.update(handler.proxies)
1262 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1263
1264 def _setup_opener(self):
1265 timeout_val = self.params.get('socket_timeout')
1266 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1267
1268 opts_cookiefile = self.params.get('cookiefile')
1269 opts_proxy = self.params.get('proxy')
1270
1271 if opts_cookiefile is None:
1272 self.cookiejar = compat_cookiejar.CookieJar()
1273 else:
1274 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1275 opts_cookiefile)
1276 if os.access(opts_cookiefile, os.R_OK):
1277 self.cookiejar.load()
1278
1279 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1280 self.cookiejar)
1281 if opts_proxy is not None:
1282 if opts_proxy == '':
1283 proxies = {}
1284 else:
1285 proxies = {'http': opts_proxy, 'https': opts_proxy}
1286 else:
1287 proxies = compat_urllib_request.getproxies()
1288 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1289 if 'http' in proxies and 'https' not in proxies:
1290 proxies['https'] = proxies['http']
1291 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1292
1293 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1294 https_handler = make_HTTPS_handler(
1295 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1296 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1297 opener = compat_urllib_request.build_opener(
1298 https_handler, proxy_handler, cookie_processor, ydlh)
1299 # Delete the default user-agent header, which would otherwise apply in
1300 # cases where our custom HTTP handler doesn't come into play
1301 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1302 opener.addheaders = []
1303 self._opener = opener
1304
1305 def encode(self, s):
1306 if isinstance(s, bytes):
1307 return s # Already encoded
1308
1309 try:
1310 return s.encode(self.get_encoding())
1311 except UnicodeEncodeError as err:
1312 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1313 raise
1314
1315 def get_encoding(self):
1316 encoding = self.params.get('encoding')
1317 if encoding is None:
1318 encoding = preferredencoding()
1319 return encoding