]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
debian/changelog: Annotate bugs being closed.
[youtubedl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import json
11 import locale
12 import os
13 import platform
14 import re
15 import shutil
16 import subprocess
17 import socket
18 import sys
19 import time
20 import traceback
21
22 if os.name == 'nt':
23 import ctypes
24
25 from .utils import (
26 compat_cookiejar,
27 compat_expanduser,
28 compat_http_client,
29 compat_str,
30 compat_urllib_error,
31 compat_urllib_request,
32 escape_url,
33 ContentTooShortError,
34 date_from_str,
35 DateRange,
36 DEFAULT_OUTTMPL,
37 determine_ext,
38 DownloadError,
39 encodeFilename,
40 ExtractorError,
41 format_bytes,
42 formatSeconds,
43 get_term_width,
44 locked_file,
45 make_HTTPS_handler,
46 MaxDownloadsReached,
47 PagedList,
48 PostProcessingError,
49 platform_name,
50 preferredencoding,
51 SameFileError,
52 sanitize_filename,
53 subtitles_filename,
54 takewhile_inclusive,
55 UnavailableVideoError,
56 url_basename,
57 write_json_file,
58 write_string,
59 YoutubeDLHandler,
60 prepend_extension,
61 )
62 from .cache import Cache
63 from .extractor import get_info_extractor, gen_extractors
64 from .downloader import get_suitable_downloader
65 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
66 from .version import __version__
67
68
69 class YoutubeDL(object):
70 """YoutubeDL class.
71
72 YoutubeDL objects are the ones responsible of downloading the
73 actual video file and writing it to disk if the user has requested
74 it, among some other tasks. In most cases there should be one per
75 program. As, given a video URL, the downloader doesn't know how to
76 extract all the needed information, task that InfoExtractors do, it
77 has to pass the URL to one of them.
78
79 For this, YoutubeDL objects have a method that allows
80 InfoExtractors to be registered in a given order. When it is passed
81 a URL, the YoutubeDL object handles it to the first InfoExtractor it
82 finds that reports being able to handle it. The InfoExtractor extracts
83 all the information about the video or videos the URL refers to, and
84 YoutubeDL process the extracted information, possibly using a File
85 Downloader to download the video.
86
87 YoutubeDL objects accept a lot of parameters. In order not to saturate
88 the object constructor with arguments, it receives a dictionary of
89 options instead. These options are available through the params
90 attribute for the InfoExtractors to use. The YoutubeDL also
91 registers itself as the downloader in charge for the InfoExtractors
92 that are added to it, so this is a "mutual registration".
93
94 Available options:
95
96 username: Username for authentication purposes.
97 password: Password for authentication purposes.
98 videopassword: Password for acces a video.
99 usenetrc: Use netrc for authentication instead.
100 verbose: Print additional info to stdout.
101 quiet: Do not print messages to stdout.
102 no_warnings: Do not print out anything for warnings.
103 forceurl: Force printing final URL.
104 forcetitle: Force printing title.
105 forceid: Force printing ID.
106 forcethumbnail: Force printing thumbnail URL.
107 forcedescription: Force printing description.
108 forcefilename: Force printing final filename.
109 forceduration: Force printing duration.
110 forcejson: Force printing info_dict as JSON.
111 dump_single_json: Force printing the info_dict of the whole playlist
112 (or video) as a single JSON line.
113 simulate: Do not download the video files.
114 format: Video format code.
115 format_limit: Highest quality format to try.
116 outtmpl: Template for output names.
117 restrictfilenames: Do not allow "&" and spaces in file names
118 ignoreerrors: Do not stop on download errors.
119 nooverwrites: Prevent overwriting files.
120 playliststart: Playlist item to start at.
121 playlistend: Playlist item to end at.
122 matchtitle: Download only matching titles.
123 rejecttitle: Reject downloads for matching titles.
124 logger: Log messages to a logging.Logger instance.
125 logtostderr: Log messages to stderr instead of stdout.
126 writedescription: Write the video description to a .description file
127 writeinfojson: Write the video description to a .info.json file
128 writeannotations: Write the video annotations to a .annotations.xml file
129 writethumbnail: Write the thumbnail image to a file
130 writesubtitles: Write the video subtitles to a file
131 writeautomaticsub: Write the automatic subtitles to a file
132 allsubtitles: Downloads all the subtitles of the video
133 (requires writesubtitles or writeautomaticsub)
134 listsubtitles: Lists all available subtitles for the video
135 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
136 subtitleslangs: List of languages of the subtitles to download
137 keepvideo: Keep the video file after post-processing
138 daterange: A DateRange object, download only if the upload_date is in the range.
139 skip_download: Skip the actual download of the video file
140 cachedir: Location of the cache files in the filesystem.
141 False to disable filesystem cache.
142 noplaylist: Download single video instead of a playlist if in doubt.
143 age_limit: An integer representing the user's age in years.
144 Unsuitable videos for the given age are skipped.
145 min_views: An integer representing the minimum view count the video
146 must have in order to not be skipped.
147 Videos without view count information are always
148 downloaded. None for no limit.
149 max_views: An integer representing the maximum view count.
150 Videos that are more popular than that are not
151 downloaded.
152 Videos without view count information are always
153 downloaded. None for no limit.
154 download_archive: File name of a file where all downloads are recorded.
155 Videos already present in the file are not downloaded
156 again.
157 cookiefile: File name where cookies should be read from and dumped to.
158 nocheckcertificate:Do not verify SSL certificates
159 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
160 At the moment, this is only supported by YouTube.
161 proxy: URL of the proxy server to use
162 socket_timeout: Time to wait for unresponsive hosts, in seconds
163 bidi_workaround: Work around buggy terminals without bidirectional text
164 support, using fridibi
165 debug_printtraffic:Print out sent and received HTTP traffic
166 include_ads: Download ads as well
167 default_search: Prepend this string if an input url is not valid.
168 'auto' for elaborate guessing
169 encoding: Use this encoding instead of the system-specified.
170 extract_flat: Do not resolve URLs, return the immediate result.
171 Pass in 'in_playlist' to only show this behavior for
172 playlist items.
173
174 The following parameters are not used by YoutubeDL itself, they are used by
175 the FileDownloader:
176 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
177 noresizebuffer, retries, continuedl, noprogress, consoletitle
178
179 The following options are used by the post processors:
180 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
181 otherwise prefer avconv.
182 exec_cmd: Arbitrary command to run after downloading
183 """
184
185 params = None
186 _ies = []
187 _pps = []
188 _download_retcode = None
189 _num_downloads = None
190 _screen_file = None
191
192 def __init__(self, params=None, auto_init=True):
193 """Create a FileDownloader object with the given options."""
194 if params is None:
195 params = {}
196 self._ies = []
197 self._ies_instances = {}
198 self._pps = []
199 self._progress_hooks = []
200 self._download_retcode = 0
201 self._num_downloads = 0
202 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
203 self._err_file = sys.stderr
204 self.params = params
205 self.cache = Cache(self)
206
207 if params.get('bidi_workaround', False):
208 try:
209 import pty
210 master, slave = pty.openpty()
211 width = get_term_width()
212 if width is None:
213 width_args = []
214 else:
215 width_args = ['-w', str(width)]
216 sp_kwargs = dict(
217 stdin=subprocess.PIPE,
218 stdout=slave,
219 stderr=self._err_file)
220 try:
221 self._output_process = subprocess.Popen(
222 ['bidiv'] + width_args, **sp_kwargs
223 )
224 except OSError:
225 self._output_process = subprocess.Popen(
226 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
227 self._output_channel = os.fdopen(master, 'rb')
228 except OSError as ose:
229 if ose.errno == 2:
230 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
231 else:
232 raise
233
234 if (sys.version_info >= (3,) and sys.platform != 'win32' and
235 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
236 and not params.get('restrictfilenames', False)):
237 # On Python 3, the Unicode filesystem API will throw errors (#1474)
238 self.report_warning(
239 'Assuming --restrict-filenames since file system encoding '
240 'cannot encode all characters. '
241 'Set the LC_ALL environment variable to fix this.')
242 self.params['restrictfilenames'] = True
243
244 if '%(stitle)s' in self.params.get('outtmpl', ''):
245 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
246
247 self._setup_opener()
248
249 if auto_init:
250 self.print_debug_header()
251 self.add_default_info_extractors()
252
253 def add_info_extractor(self, ie):
254 """Add an InfoExtractor object to the end of the list."""
255 self._ies.append(ie)
256 self._ies_instances[ie.ie_key()] = ie
257 ie.set_downloader(self)
258
259 def get_info_extractor(self, ie_key):
260 """
261 Get an instance of an IE with name ie_key, it will try to get one from
262 the _ies list, if there's no instance it will create a new one and add
263 it to the extractor list.
264 """
265 ie = self._ies_instances.get(ie_key)
266 if ie is None:
267 ie = get_info_extractor(ie_key)()
268 self.add_info_extractor(ie)
269 return ie
270
271 def add_default_info_extractors(self):
272 """
273 Add the InfoExtractors returned by gen_extractors to the end of the list
274 """
275 for ie in gen_extractors():
276 self.add_info_extractor(ie)
277
278 def add_post_processor(self, pp):
279 """Add a PostProcessor object to the end of the chain."""
280 self._pps.append(pp)
281 pp.set_downloader(self)
282
283 def add_progress_hook(self, ph):
284 """Add the progress hook (currently only for the file downloader)"""
285 self._progress_hooks.append(ph)
286
287 def _bidi_workaround(self, message):
288 if not hasattr(self, '_output_channel'):
289 return message
290
291 assert hasattr(self, '_output_process')
292 assert isinstance(message, compat_str)
293 line_count = message.count('\n') + 1
294 self._output_process.stdin.write((message + '\n').encode('utf-8'))
295 self._output_process.stdin.flush()
296 res = ''.join(self._output_channel.readline().decode('utf-8')
297 for _ in range(line_count))
298 return res[:-len('\n')]
299
300 def to_screen(self, message, skip_eol=False):
301 """Print message to stdout if not in quiet mode."""
302 return self.to_stdout(message, skip_eol, check_quiet=True)
303
304 def _write_string(self, s, out=None):
305 write_string(s, out=out, encoding=self.params.get('encoding'))
306
307 def to_stdout(self, message, skip_eol=False, check_quiet=False):
308 """Print message to stdout if not in quiet mode."""
309 if self.params.get('logger'):
310 self.params['logger'].debug(message)
311 elif not check_quiet or not self.params.get('quiet', False):
312 message = self._bidi_workaround(message)
313 terminator = ['\n', ''][skip_eol]
314 output = message + terminator
315
316 self._write_string(output, self._screen_file)
317
318 def to_stderr(self, message):
319 """Print message to stderr."""
320 assert isinstance(message, compat_str)
321 if self.params.get('logger'):
322 self.params['logger'].error(message)
323 else:
324 message = self._bidi_workaround(message)
325 output = message + '\n'
326 self._write_string(output, self._err_file)
327
328 def to_console_title(self, message):
329 if not self.params.get('consoletitle', False):
330 return
331 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
332 # c_wchar_p() might not be necessary if `message` is
333 # already of type unicode()
334 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
335 elif 'TERM' in os.environ:
336 self._write_string('\033]0;%s\007' % message, self._screen_file)
337
338 def save_console_title(self):
339 if not self.params.get('consoletitle', False):
340 return
341 if 'TERM' in os.environ:
342 # Save the title on stack
343 self._write_string('\033[22;0t', self._screen_file)
344
345 def restore_console_title(self):
346 if not self.params.get('consoletitle', False):
347 return
348 if 'TERM' in os.environ:
349 # Restore the title from stack
350 self._write_string('\033[23;0t', self._screen_file)
351
352 def __enter__(self):
353 self.save_console_title()
354 return self
355
356 def __exit__(self, *args):
357 self.restore_console_title()
358
359 if self.params.get('cookiefile') is not None:
360 self.cookiejar.save()
361
362 def trouble(self, message=None, tb=None):
363 """Determine action to take when a download problem appears.
364
365 Depending on if the downloader has been configured to ignore
366 download errors or not, this method may throw an exception or
367 not when errors are found, after printing the message.
368
369 tb, if given, is additional traceback information.
370 """
371 if message is not None:
372 self.to_stderr(message)
373 if self.params.get('verbose'):
374 if tb is None:
375 if sys.exc_info()[0]: # if .trouble has been called from an except block
376 tb = ''
377 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
378 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
379 tb += compat_str(traceback.format_exc())
380 else:
381 tb_data = traceback.format_list(traceback.extract_stack())
382 tb = ''.join(tb_data)
383 self.to_stderr(tb)
384 if not self.params.get('ignoreerrors', False):
385 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
386 exc_info = sys.exc_info()[1].exc_info
387 else:
388 exc_info = sys.exc_info()
389 raise DownloadError(message, exc_info)
390 self._download_retcode = 1
391
392 def report_warning(self, message):
393 '''
394 Print the message to stderr, it will be prefixed with 'WARNING:'
395 If stderr is a tty file the 'WARNING:' will be colored
396 '''
397 if self.params.get('logger') is not None:
398 self.params['logger'].warning(message)
399 else:
400 if self.params.get('no_warnings'):
401 return
402 if self._err_file.isatty() and os.name != 'nt':
403 _msg_header = '\033[0;33mWARNING:\033[0m'
404 else:
405 _msg_header = 'WARNING:'
406 warning_message = '%s %s' % (_msg_header, message)
407 self.to_stderr(warning_message)
408
409 def report_error(self, message, tb=None):
410 '''
411 Do the same as trouble, but prefixes the message with 'ERROR:', colored
412 in red if stderr is a tty file.
413 '''
414 if self._err_file.isatty() and os.name != 'nt':
415 _msg_header = '\033[0;31mERROR:\033[0m'
416 else:
417 _msg_header = 'ERROR:'
418 error_message = '%s %s' % (_msg_header, message)
419 self.trouble(error_message, tb)
420
421 def report_file_already_downloaded(self, file_name):
422 """Report file has already been fully downloaded."""
423 try:
424 self.to_screen('[download] %s has already been downloaded' % file_name)
425 except UnicodeEncodeError:
426 self.to_screen('[download] The file has already been downloaded')
427
428 def prepare_filename(self, info_dict):
429 """Generate the output filename."""
430 try:
431 template_dict = dict(info_dict)
432
433 template_dict['epoch'] = int(time.time())
434 autonumber_size = self.params.get('autonumber_size')
435 if autonumber_size is None:
436 autonumber_size = 5
437 autonumber_templ = '%0' + str(autonumber_size) + 'd'
438 template_dict['autonumber'] = autonumber_templ % self._num_downloads
439 if template_dict.get('playlist_index') is not None:
440 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
441 if template_dict.get('resolution') is None:
442 if template_dict.get('width') and template_dict.get('height'):
443 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
444 elif template_dict.get('height'):
445 template_dict['resolution'] = '%sp' % template_dict['height']
446 elif template_dict.get('width'):
447 template_dict['resolution'] = '?x%d' % template_dict['width']
448
449 sanitize = lambda k, v: sanitize_filename(
450 compat_str(v),
451 restricted=self.params.get('restrictfilenames'),
452 is_id=(k == 'id'))
453 template_dict = dict((k, sanitize(k, v))
454 for k, v in template_dict.items()
455 if v is not None)
456 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
457
458 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
459 tmpl = compat_expanduser(outtmpl)
460 filename = tmpl % template_dict
461 return filename
462 except ValueError as err:
463 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
464 return None
465
466 def _match_entry(self, info_dict):
467 """ Returns None iff the file should be downloaded """
468
469 video_title = info_dict.get('title', info_dict.get('id', 'video'))
470 if 'title' in info_dict:
471 # This can happen when we're just evaluating the playlist
472 title = info_dict['title']
473 matchtitle = self.params.get('matchtitle', False)
474 if matchtitle:
475 if not re.search(matchtitle, title, re.IGNORECASE):
476 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
477 rejecttitle = self.params.get('rejecttitle', False)
478 if rejecttitle:
479 if re.search(rejecttitle, title, re.IGNORECASE):
480 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
481 date = info_dict.get('upload_date', None)
482 if date is not None:
483 dateRange = self.params.get('daterange', DateRange())
484 if date not in dateRange:
485 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
486 view_count = info_dict.get('view_count', None)
487 if view_count is not None:
488 min_views = self.params.get('min_views')
489 if min_views is not None and view_count < min_views:
490 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
491 max_views = self.params.get('max_views')
492 if max_views is not None and view_count > max_views:
493 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
494 age_limit = self.params.get('age_limit')
495 if age_limit is not None:
496 actual_age_limit = info_dict.get('age_limit')
497 if actual_age_limit is None:
498 actual_age_limit = 0
499 if age_limit < actual_age_limit:
500 return 'Skipping "' + title + '" because it is age restricted'
501 if self.in_download_archive(info_dict):
502 return '%s has already been recorded in archive' % video_title
503 return None
504
505 @staticmethod
506 def add_extra_info(info_dict, extra_info):
507 '''Set the keys from extra_info in info dict if they are missing'''
508 for key, value in extra_info.items():
509 info_dict.setdefault(key, value)
510
511 def extract_info(self, url, download=True, ie_key=None, extra_info={},
512 process=True):
513 '''
514 Returns a list with a dictionary for each video we find.
515 If 'download', also downloads the videos.
516 extra_info is a dict containing the extra values to add to each result
517 '''
518
519 if ie_key:
520 ies = [self.get_info_extractor(ie_key)]
521 else:
522 ies = self._ies
523
524 for ie in ies:
525 if not ie.suitable(url):
526 continue
527
528 if not ie.working():
529 self.report_warning('The program functionality for this site has been marked as broken, '
530 'and will probably not work.')
531
532 try:
533 ie_result = ie.extract(url)
534 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
535 break
536 if isinstance(ie_result, list):
537 # Backwards compatibility: old IE result format
538 ie_result = {
539 '_type': 'compat_list',
540 'entries': ie_result,
541 }
542 self.add_default_extra_info(ie_result, ie, url)
543 if process:
544 return self.process_ie_result(ie_result, download, extra_info)
545 else:
546 return ie_result
547 except ExtractorError as de: # An error we somewhat expected
548 self.report_error(compat_str(de), de.format_traceback())
549 break
550 except MaxDownloadsReached:
551 raise
552 except Exception as e:
553 if self.params.get('ignoreerrors', False):
554 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
555 break
556 else:
557 raise
558 else:
559 self.report_error('no suitable InfoExtractor for URL %s' % url)
560
561 def add_default_extra_info(self, ie_result, ie, url):
562 self.add_extra_info(ie_result, {
563 'extractor': ie.IE_NAME,
564 'webpage_url': url,
565 'webpage_url_basename': url_basename(url),
566 'extractor_key': ie.ie_key(),
567 })
568
569 def process_ie_result(self, ie_result, download=True, extra_info={}):
570 """
571 Take the result of the ie(may be modified) and resolve all unresolved
572 references (URLs, playlist items).
573
574 It will also download the videos if 'download'.
575 Returns the resolved ie_result.
576 """
577
578 result_type = ie_result.get('_type', 'video')
579
580 if result_type in ('url', 'url_transparent'):
581 extract_flat = self.params.get('extract_flat', False)
582 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
583 extract_flat is True):
584 if self.params.get('forcejson', False):
585 self.to_stdout(json.dumps(ie_result))
586 return ie_result
587
588 if result_type == 'video':
589 self.add_extra_info(ie_result, extra_info)
590 return self.process_video_result(ie_result, download=download)
591 elif result_type == 'url':
592 # We have to add extra_info to the results because it may be
593 # contained in a playlist
594 return self.extract_info(ie_result['url'],
595 download,
596 ie_key=ie_result.get('ie_key'),
597 extra_info=extra_info)
598 elif result_type == 'url_transparent':
599 # Use the information from the embedding page
600 info = self.extract_info(
601 ie_result['url'], ie_key=ie_result.get('ie_key'),
602 extra_info=extra_info, download=False, process=False)
603
604 def make_result(embedded_info):
605 new_result = ie_result.copy()
606 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
607 'entries', 'ie_key', 'duration',
608 'subtitles', 'annotations', 'format',
609 'thumbnail', 'thumbnails'):
610 if f in new_result:
611 del new_result[f]
612 if f in embedded_info:
613 new_result[f] = embedded_info[f]
614 return new_result
615 new_result = make_result(info)
616
617 assert new_result.get('_type') != 'url_transparent'
618 if new_result.get('_type') == 'compat_list':
619 new_result['entries'] = [
620 make_result(e) for e in new_result['entries']]
621
622 return self.process_ie_result(
623 new_result, download=download, extra_info=extra_info)
624 elif result_type == 'playlist':
625 # We process each entry in the playlist
626 playlist = ie_result.get('title', None) or ie_result.get('id', None)
627 self.to_screen('[download] Downloading playlist: %s' % playlist)
628
629 playlist_results = []
630
631 playliststart = self.params.get('playliststart', 1) - 1
632 playlistend = self.params.get('playlistend', None)
633 # For backwards compatibility, interpret -1 as whole list
634 if playlistend == -1:
635 playlistend = None
636
637 if isinstance(ie_result['entries'], list):
638 n_all_entries = len(ie_result['entries'])
639 entries = ie_result['entries'][playliststart:playlistend]
640 n_entries = len(entries)
641 self.to_screen(
642 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
643 (ie_result['extractor'], playlist, n_all_entries, n_entries))
644 else:
645 assert isinstance(ie_result['entries'], PagedList)
646 entries = ie_result['entries'].getslice(
647 playliststart, playlistend)
648 n_entries = len(entries)
649 self.to_screen(
650 "[%s] playlist %s: Downloading %d videos" %
651 (ie_result['extractor'], playlist, n_entries))
652
653 for i, entry in enumerate(entries, 1):
654 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
655 extra = {
656 'n_entries': n_entries,
657 'playlist': playlist,
658 'playlist_index': i + playliststart,
659 'extractor': ie_result['extractor'],
660 'webpage_url': ie_result['webpage_url'],
661 'webpage_url_basename': url_basename(ie_result['webpage_url']),
662 'extractor_key': ie_result['extractor_key'],
663 }
664
665 reason = self._match_entry(entry)
666 if reason is not None:
667 self.to_screen('[download] ' + reason)
668 continue
669
670 entry_result = self.process_ie_result(entry,
671 download=download,
672 extra_info=extra)
673 playlist_results.append(entry_result)
674 ie_result['entries'] = playlist_results
675 return ie_result
676 elif result_type == 'compat_list':
677 def _fixup(r):
678 self.add_extra_info(r,
679 {
680 'extractor': ie_result['extractor'],
681 'webpage_url': ie_result['webpage_url'],
682 'webpage_url_basename': url_basename(ie_result['webpage_url']),
683 'extractor_key': ie_result['extractor_key'],
684 })
685 return r
686 ie_result['entries'] = [
687 self.process_ie_result(_fixup(r), download, extra_info)
688 for r in ie_result['entries']
689 ]
690 return ie_result
691 else:
692 raise Exception('Invalid result type: %s' % result_type)
693
694 def select_format(self, format_spec, available_formats):
695 if format_spec == 'best' or format_spec is None:
696 return available_formats[-1]
697 elif format_spec == 'worst':
698 return available_formats[0]
699 elif format_spec == 'bestaudio':
700 audio_formats = [
701 f for f in available_formats
702 if f.get('vcodec') == 'none']
703 if audio_formats:
704 return audio_formats[-1]
705 elif format_spec == 'worstaudio':
706 audio_formats = [
707 f for f in available_formats
708 if f.get('vcodec') == 'none']
709 if audio_formats:
710 return audio_formats[0]
711 elif format_spec == 'bestvideo':
712 video_formats = [
713 f for f in available_formats
714 if f.get('acodec') == 'none']
715 if video_formats:
716 return video_formats[-1]
717 elif format_spec == 'worstvideo':
718 video_formats = [
719 f for f in available_formats
720 if f.get('acodec') == 'none']
721 if video_formats:
722 return video_formats[0]
723 else:
724 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
725 if format_spec in extensions:
726 filter_f = lambda f: f['ext'] == format_spec
727 else:
728 filter_f = lambda f: f['format_id'] == format_spec
729 matches = list(filter(filter_f, available_formats))
730 if matches:
731 return matches[-1]
732 return None
733
734 def process_video_result(self, info_dict, download=True):
735 assert info_dict.get('_type', 'video') == 'video'
736
737 if 'id' not in info_dict:
738 raise ExtractorError('Missing "id" field in extractor result')
739 if 'title' not in info_dict:
740 raise ExtractorError('Missing "title" field in extractor result')
741
742 if 'playlist' not in info_dict:
743 # It isn't part of a playlist
744 info_dict['playlist'] = None
745 info_dict['playlist_index'] = None
746
747 thumbnails = info_dict.get('thumbnails')
748 if thumbnails:
749 thumbnails.sort(key=lambda t: (
750 t.get('width'), t.get('height'), t.get('url')))
751 for t in thumbnails:
752 if 'width' in t and 'height' in t:
753 t['resolution'] = '%dx%d' % (t['width'], t['height'])
754
755 if thumbnails and 'thumbnail' not in info_dict:
756 info_dict['thumbnail'] = thumbnails[-1]['url']
757
758 if 'display_id' not in info_dict and 'id' in info_dict:
759 info_dict['display_id'] = info_dict['id']
760
761 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
762 upload_date = datetime.datetime.utcfromtimestamp(
763 info_dict['timestamp'])
764 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
765
766 # This extractors handle format selection themselves
767 if info_dict['extractor'] in ['Youku']:
768 if download:
769 self.process_info(info_dict)
770 return info_dict
771
772 # We now pick which formats have to be downloaded
773 if info_dict.get('formats') is None:
774 # There's only one format available
775 formats = [info_dict]
776 else:
777 formats = info_dict['formats']
778
779 if not formats:
780 raise ExtractorError('No video formats found!')
781
782 # We check that all the formats have the format and format_id fields
783 for i, format in enumerate(formats):
784 if 'url' not in format:
785 raise ExtractorError('Missing "url" key in result (index %d)' % i)
786
787 if format.get('format_id') is None:
788 format['format_id'] = compat_str(i)
789 if format.get('format') is None:
790 format['format'] = '{id} - {res}{note}'.format(
791 id=format['format_id'],
792 res=self.format_resolution(format),
793 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
794 )
795 # Automatically determine file extension if missing
796 if 'ext' not in format:
797 format['ext'] = determine_ext(format['url']).lower()
798
799 format_limit = self.params.get('format_limit', None)
800 if format_limit:
801 formats = list(takewhile_inclusive(
802 lambda f: f['format_id'] != format_limit, formats
803 ))
804
805 # TODO Central sorting goes here
806
807 if formats[0] is not info_dict:
808 # only set the 'formats' fields if the original info_dict list them
809 # otherwise we end up with a circular reference, the first (and unique)
810 # element in the 'formats' field in info_dict is info_dict itself,
811 # wich can't be exported to json
812 info_dict['formats'] = formats
813 if self.params.get('listformats', None):
814 self.list_formats(info_dict)
815 return
816
817 req_format = self.params.get('format')
818 if req_format is None:
819 req_format = 'best'
820 formats_to_download = []
821 # The -1 is for supporting YoutubeIE
822 if req_format in ('-1', 'all'):
823 formats_to_download = formats
824 else:
825 for rfstr in req_format.split(','):
826 # We can accept formats requested in the format: 34/5/best, we pick
827 # the first that is available, starting from left
828 req_formats = rfstr.split('/')
829 for rf in req_formats:
830 if re.match(r'.+?\+.+?', rf) is not None:
831 # Two formats have been requested like '137+139'
832 format_1, format_2 = rf.split('+')
833 formats_info = (self.select_format(format_1, formats),
834 self.select_format(format_2, formats))
835 if all(formats_info):
836 selected_format = {
837 'requested_formats': formats_info,
838 'format': rf,
839 'ext': formats_info[0]['ext'],
840 }
841 else:
842 selected_format = None
843 else:
844 selected_format = self.select_format(rf, formats)
845 if selected_format is not None:
846 formats_to_download.append(selected_format)
847 break
848 if not formats_to_download:
849 raise ExtractorError('requested format not available',
850 expected=True)
851
852 if download:
853 if len(formats_to_download) > 1:
854 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
855 for format in formats_to_download:
856 new_info = dict(info_dict)
857 new_info.update(format)
858 self.process_info(new_info)
859 # We update the info dict with the best quality format (backwards compatibility)
860 info_dict.update(formats_to_download[-1])
861 return info_dict
862
863 def process_info(self, info_dict):
864 """Process a single resolved IE result."""
865
866 assert info_dict.get('_type', 'video') == 'video'
867
868 max_downloads = self.params.get('max_downloads')
869 if max_downloads is not None:
870 if self._num_downloads >= int(max_downloads):
871 raise MaxDownloadsReached()
872
873 info_dict['fulltitle'] = info_dict['title']
874 if len(info_dict['title']) > 200:
875 info_dict['title'] = info_dict['title'][:197] + '...'
876
877 # Keep for backwards compatibility
878 info_dict['stitle'] = info_dict['title']
879
880 if 'format' not in info_dict:
881 info_dict['format'] = info_dict['ext']
882
883 reason = self._match_entry(info_dict)
884 if reason is not None:
885 self.to_screen('[download] ' + reason)
886 return
887
888 self._num_downloads += 1
889
890 filename = self.prepare_filename(info_dict)
891
892 # Forced printings
893 if self.params.get('forcetitle', False):
894 self.to_stdout(info_dict['fulltitle'])
895 if self.params.get('forceid', False):
896 self.to_stdout(info_dict['id'])
897 if self.params.get('forceurl', False):
898 # For RTMP URLs, also include the playpath
899 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
900 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
901 self.to_stdout(info_dict['thumbnail'])
902 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
903 self.to_stdout(info_dict['description'])
904 if self.params.get('forcefilename', False) and filename is not None:
905 self.to_stdout(filename)
906 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
907 self.to_stdout(formatSeconds(info_dict['duration']))
908 if self.params.get('forceformat', False):
909 self.to_stdout(info_dict['format'])
910 if self.params.get('forcejson', False):
911 info_dict['_filename'] = filename
912 self.to_stdout(json.dumps(info_dict))
913 if self.params.get('dump_single_json', False):
914 info_dict['_filename'] = filename
915
916 # Do nothing else if in simulate mode
917 if self.params.get('simulate', False):
918 return
919
920 if filename is None:
921 return
922
923 try:
924 dn = os.path.dirname(encodeFilename(filename))
925 if dn and not os.path.exists(dn):
926 os.makedirs(dn)
927 except (OSError, IOError) as err:
928 self.report_error('unable to create directory ' + compat_str(err))
929 return
930
931 if self.params.get('writedescription', False):
932 descfn = filename + '.description'
933 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
934 self.to_screen('[info] Video description is already present')
935 else:
936 try:
937 self.to_screen('[info] Writing video description to: ' + descfn)
938 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
939 descfile.write(info_dict['description'])
940 except (KeyError, TypeError):
941 self.report_warning('There\'s no description to write.')
942 except (OSError, IOError):
943 self.report_error('Cannot write description file ' + descfn)
944 return
945
946 if self.params.get('writeannotations', False):
947 annofn = filename + '.annotations.xml'
948 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
949 self.to_screen('[info] Video annotations are already present')
950 else:
951 try:
952 self.to_screen('[info] Writing video annotations to: ' + annofn)
953 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
954 annofile.write(info_dict['annotations'])
955 except (KeyError, TypeError):
956 self.report_warning('There are no annotations to write.')
957 except (OSError, IOError):
958 self.report_error('Cannot write annotations file: ' + annofn)
959 return
960
961 subtitles_are_requested = any([self.params.get('writesubtitles', False),
962 self.params.get('writeautomaticsub')])
963
964 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
965 # subtitles download errors are already managed as troubles in relevant IE
966 # that way it will silently go on when used with unsupporting IE
967 subtitles = info_dict['subtitles']
968 sub_format = self.params.get('subtitlesformat', 'srt')
969 for sub_lang in subtitles.keys():
970 sub = subtitles[sub_lang]
971 if sub is None:
972 continue
973 try:
974 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
975 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
976 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
977 else:
978 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
979 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
980 subfile.write(sub)
981 except (OSError, IOError):
982 self.report_error('Cannot write subtitles file ' + sub_filename)
983 return
984
985 if self.params.get('writeinfojson', False):
986 infofn = os.path.splitext(filename)[0] + '.info.json'
987 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
988 self.to_screen('[info] Video description metadata is already present')
989 else:
990 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
991 try:
992 write_json_file(info_dict, encodeFilename(infofn))
993 except (OSError, IOError):
994 self.report_error('Cannot write metadata to JSON file ' + infofn)
995 return
996
997 if self.params.get('writethumbnail', False):
998 if info_dict.get('thumbnail') is not None:
999 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1000 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1001 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1002 self.to_screen('[%s] %s: Thumbnail is already present' %
1003 (info_dict['extractor'], info_dict['id']))
1004 else:
1005 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1006 (info_dict['extractor'], info_dict['id']))
1007 try:
1008 uf = self.urlopen(info_dict['thumbnail'])
1009 with open(thumb_filename, 'wb') as thumbf:
1010 shutil.copyfileobj(uf, thumbf)
1011 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1012 (info_dict['extractor'], info_dict['id'], thumb_filename))
1013 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1014 self.report_warning('Unable to download thumbnail "%s": %s' %
1015 (info_dict['thumbnail'], compat_str(err)))
1016
1017 if not self.params.get('skip_download', False):
1018 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1019 success = True
1020 else:
1021 try:
1022 def dl(name, info):
1023 fd = get_suitable_downloader(info)(self, self.params)
1024 for ph in self._progress_hooks:
1025 fd.add_progress_hook(ph)
1026 if self.params.get('verbose'):
1027 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1028 return fd.download(name, info)
1029 if info_dict.get('requested_formats') is not None:
1030 downloaded = []
1031 success = True
1032 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1033 if not merger._executable:
1034 postprocessors = []
1035 self.report_warning('You have requested multiple '
1036 'formats but ffmpeg or avconv are not installed.'
1037 ' The formats won\'t be merged')
1038 else:
1039 postprocessors = [merger]
1040 for f in info_dict['requested_formats']:
1041 new_info = dict(info_dict)
1042 new_info.update(f)
1043 fname = self.prepare_filename(new_info)
1044 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1045 downloaded.append(fname)
1046 partial_success = dl(fname, new_info)
1047 success = success and partial_success
1048 info_dict['__postprocessors'] = postprocessors
1049 info_dict['__files_to_merge'] = downloaded
1050 else:
1051 # Just a single file
1052 success = dl(filename, info_dict)
1053 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1054 self.report_error('unable to download video data: %s' % str(err))
1055 return
1056 except (OSError, IOError) as err:
1057 raise UnavailableVideoError(err)
1058 except (ContentTooShortError, ) as err:
1059 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1060 return
1061
1062 if success:
1063 try:
1064 self.post_process(filename, info_dict)
1065 except (PostProcessingError) as err:
1066 self.report_error('postprocessing: %s' % str(err))
1067 return
1068
1069 self.record_download_archive(info_dict)
1070
1071 def download(self, url_list):
1072 """Download a given list of URLs."""
1073 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1074 if (len(url_list) > 1 and
1075 '%' not in outtmpl
1076 and self.params.get('max_downloads') != 1):
1077 raise SameFileError(outtmpl)
1078
1079 for url in url_list:
1080 try:
1081 #It also downloads the videos
1082 res = self.extract_info(url)
1083 except UnavailableVideoError:
1084 self.report_error('unable to download video')
1085 except MaxDownloadsReached:
1086 self.to_screen('[info] Maximum number of downloaded files reached.')
1087 raise
1088 else:
1089 if self.params.get('dump_single_json', False):
1090 self.to_stdout(json.dumps(res))
1091
1092 return self._download_retcode
1093
1094 def download_with_info_file(self, info_filename):
1095 with io.open(info_filename, 'r', encoding='utf-8') as f:
1096 info = json.load(f)
1097 try:
1098 self.process_ie_result(info, download=True)
1099 except DownloadError:
1100 webpage_url = info.get('webpage_url')
1101 if webpage_url is not None:
1102 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1103 return self.download([webpage_url])
1104 else:
1105 raise
1106 return self._download_retcode
1107
1108 def post_process(self, filename, ie_info):
1109 """Run all the postprocessors on the given file."""
1110 info = dict(ie_info)
1111 info['filepath'] = filename
1112 keep_video = None
1113 pps_chain = []
1114 if ie_info.get('__postprocessors') is not None:
1115 pps_chain.extend(ie_info['__postprocessors'])
1116 pps_chain.extend(self._pps)
1117 for pp in pps_chain:
1118 try:
1119 keep_video_wish, new_info = pp.run(info)
1120 if keep_video_wish is not None:
1121 if keep_video_wish:
1122 keep_video = keep_video_wish
1123 elif keep_video is None:
1124 # No clear decision yet, let IE decide
1125 keep_video = keep_video_wish
1126 except PostProcessingError as e:
1127 self.report_error(e.msg)
1128 if keep_video is False and not self.params.get('keepvideo', False):
1129 try:
1130 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1131 os.remove(encodeFilename(filename))
1132 except (IOError, OSError):
1133 self.report_warning('Unable to remove downloaded video file')
1134
1135 def _make_archive_id(self, info_dict):
1136 # Future-proof against any change in case
1137 # and backwards compatibility with prior versions
1138 extractor = info_dict.get('extractor_key')
1139 if extractor is None:
1140 if 'id' in info_dict:
1141 extractor = info_dict.get('ie_key') # key in a playlist
1142 if extractor is None:
1143 return None # Incomplete video information
1144 return extractor.lower() + ' ' + info_dict['id']
1145
1146 def in_download_archive(self, info_dict):
1147 fn = self.params.get('download_archive')
1148 if fn is None:
1149 return False
1150
1151 vid_id = self._make_archive_id(info_dict)
1152 if vid_id is None:
1153 return False # Incomplete video information
1154
1155 try:
1156 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1157 for line in archive_file:
1158 if line.strip() == vid_id:
1159 return True
1160 except IOError as ioe:
1161 if ioe.errno != errno.ENOENT:
1162 raise
1163 return False
1164
1165 def record_download_archive(self, info_dict):
1166 fn = self.params.get('download_archive')
1167 if fn is None:
1168 return
1169 vid_id = self._make_archive_id(info_dict)
1170 assert vid_id
1171 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1172 archive_file.write(vid_id + '\n')
1173
1174 @staticmethod
1175 def format_resolution(format, default='unknown'):
1176 if format.get('vcodec') == 'none':
1177 return 'audio only'
1178 if format.get('resolution') is not None:
1179 return format['resolution']
1180 if format.get('height') is not None:
1181 if format.get('width') is not None:
1182 res = '%sx%s' % (format['width'], format['height'])
1183 else:
1184 res = '%sp' % format['height']
1185 elif format.get('width') is not None:
1186 res = '?x%d' % format['width']
1187 else:
1188 res = default
1189 return res
1190
1191 def _format_note(self, fdict):
1192 res = ''
1193 if fdict.get('ext') in ['f4f', 'f4m']:
1194 res += '(unsupported) '
1195 if fdict.get('format_note') is not None:
1196 res += fdict['format_note'] + ' '
1197 if fdict.get('tbr') is not None:
1198 res += '%4dk ' % fdict['tbr']
1199 if fdict.get('container') is not None:
1200 if res:
1201 res += ', '
1202 res += '%s container' % fdict['container']
1203 if (fdict.get('vcodec') is not None and
1204 fdict.get('vcodec') != 'none'):
1205 if res:
1206 res += ', '
1207 res += fdict['vcodec']
1208 if fdict.get('vbr') is not None:
1209 res += '@'
1210 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1211 res += 'video@'
1212 if fdict.get('vbr') is not None:
1213 res += '%4dk' % fdict['vbr']
1214 if fdict.get('fps') is not None:
1215 res += ', %sfps' % fdict['fps']
1216 if fdict.get('acodec') is not None:
1217 if res:
1218 res += ', '
1219 if fdict['acodec'] == 'none':
1220 res += 'video only'
1221 else:
1222 res += '%-5s' % fdict['acodec']
1223 elif fdict.get('abr') is not None:
1224 if res:
1225 res += ', '
1226 res += 'audio'
1227 if fdict.get('abr') is not None:
1228 res += '@%3dk' % fdict['abr']
1229 if fdict.get('asr') is not None:
1230 res += ' (%5dHz)' % fdict['asr']
1231 if fdict.get('filesize') is not None:
1232 if res:
1233 res += ', '
1234 res += format_bytes(fdict['filesize'])
1235 elif fdict.get('filesize_approx') is not None:
1236 if res:
1237 res += ', '
1238 res += '~' + format_bytes(fdict['filesize_approx'])
1239 return res
1240
1241 def list_formats(self, info_dict):
1242 def line(format, idlen=20):
1243 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1244 format['format_id'],
1245 format['ext'],
1246 self.format_resolution(format),
1247 self._format_note(format),
1248 ))
1249
1250 formats = info_dict.get('formats', [info_dict])
1251 idlen = max(len('format code'),
1252 max(len(f['format_id']) for f in formats))
1253 formats_s = [line(f, idlen) for f in formats]
1254 if len(formats) > 1:
1255 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1256 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1257
1258 header_line = line({
1259 'format_id': 'format code', 'ext': 'extension',
1260 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1261 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1262 (info_dict['id'], header_line, '\n'.join(formats_s)))
1263
1264 def urlopen(self, req):
1265 """ Start an HTTP download """
1266
1267 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1268 # always respected by websites, some tend to give out URLs with non percent-encoded
1269 # non-ASCII characters (see telemb.py, ard.py [#3412])
1270 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1271 # To work around aforementioned issue we will replace request's original URL with
1272 # percent-encoded one
1273 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1274 url = req if req_is_string else req.get_full_url()
1275 url_escaped = escape_url(url)
1276
1277 # Substitute URL if any change after escaping
1278 if url != url_escaped:
1279 if req_is_string:
1280 req = url_escaped
1281 else:
1282 req = compat_urllib_request.Request(
1283 url_escaped, data=req.data, headers=req.headers,
1284 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1285
1286 return self._opener.open(req, timeout=self._socket_timeout)
1287
1288 def print_debug_header(self):
1289 if not self.params.get('verbose'):
1290 return
1291
1292 if type('') is not compat_str:
1293 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1294 self.report_warning(
1295 'Your Python is broken! Update to a newer and supported version')
1296
1297 encoding_str = (
1298 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1299 locale.getpreferredencoding(),
1300 sys.getfilesystemencoding(),
1301 sys.stdout.encoding,
1302 self.get_encoding()))
1303 write_string(encoding_str, encoding=None)
1304
1305 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1306 try:
1307 sp = subprocess.Popen(
1308 ['git', 'rev-parse', '--short', 'HEAD'],
1309 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1310 cwd=os.path.dirname(os.path.abspath(__file__)))
1311 out, err = sp.communicate()
1312 out = out.decode().strip()
1313 if re.match('[0-9a-f]+', out):
1314 self._write_string('[debug] Git HEAD: ' + out + '\n')
1315 except:
1316 try:
1317 sys.exc_clear()
1318 except:
1319 pass
1320 self._write_string('[debug] Python version %s - %s\n' % (
1321 platform.python_version(), platform_name()))
1322
1323 exe_versions = FFmpegPostProcessor.get_versions()
1324 exe_str = ', '.join(
1325 '%s %s' % (exe, v)
1326 for exe, v in sorted(exe_versions.items())
1327 if v
1328 )
1329 if not exe_str:
1330 exe_str = 'none'
1331 self._write_string('[debug] exe versions: %s\n' % exe_str)
1332
1333 proxy_map = {}
1334 for handler in self._opener.handlers:
1335 if hasattr(handler, 'proxies'):
1336 proxy_map.update(handler.proxies)
1337 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1338
1339 def _setup_opener(self):
1340 timeout_val = self.params.get('socket_timeout')
1341 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1342
1343 opts_cookiefile = self.params.get('cookiefile')
1344 opts_proxy = self.params.get('proxy')
1345
1346 if opts_cookiefile is None:
1347 self.cookiejar = compat_cookiejar.CookieJar()
1348 else:
1349 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1350 opts_cookiefile)
1351 if os.access(opts_cookiefile, os.R_OK):
1352 self.cookiejar.load()
1353
1354 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1355 self.cookiejar)
1356 if opts_proxy is not None:
1357 if opts_proxy == '':
1358 proxies = {}
1359 else:
1360 proxies = {'http': opts_proxy, 'https': opts_proxy}
1361 else:
1362 proxies = compat_urllib_request.getproxies()
1363 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1364 if 'http' in proxies and 'https' not in proxies:
1365 proxies['https'] = proxies['http']
1366 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1367
1368 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1369 https_handler = make_HTTPS_handler(
1370 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1371 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1372 opener = compat_urllib_request.build_opener(
1373 https_handler, proxy_handler, cookie_processor, ydlh)
1374 # Delete the default user-agent header, which would otherwise apply in
1375 # cases where our custom HTTP handler doesn't come into play
1376 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1377 opener.addheaders = []
1378 self._opener = opener
1379
1380 def encode(self, s):
1381 if isinstance(s, bytes):
1382 return s # Already encoded
1383
1384 try:
1385 return s.encode(self.get_encoding())
1386 except UnicodeEncodeError as err:
1387 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1388 raise
1389
1390 def get_encoding(self):
1391 encoding = self.params.get('encoding')
1392 if encoding is None:
1393 encoding = preferredencoding()
1394 return encoding