]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
Merge tag 'upstream/2014.11.21'
[youtubedl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import json
11 import locale
12 import os
13 import platform
14 import re
15 import shutil
16 import subprocess
17 import socket
18 import sys
19 import time
20 import traceback
21
22 if os.name == 'nt':
23 import ctypes
24
25 from .compat import (
26 compat_cookiejar,
27 compat_expanduser,
28 compat_http_client,
29 compat_str,
30 compat_urllib_error,
31 compat_urllib_request,
32 )
33 from .utils import (
34 escape_url,
35 ContentTooShortError,
36 date_from_str,
37 DateRange,
38 DEFAULT_OUTTMPL,
39 determine_ext,
40 DownloadError,
41 encodeFilename,
42 ExtractorError,
43 format_bytes,
44 formatSeconds,
45 get_term_width,
46 locked_file,
47 make_HTTPS_handler,
48 MaxDownloadsReached,
49 PagedList,
50 PostProcessingError,
51 platform_name,
52 preferredencoding,
53 SameFileError,
54 sanitize_filename,
55 subtitles_filename,
56 takewhile_inclusive,
57 UnavailableVideoError,
58 url_basename,
59 write_json_file,
60 write_string,
61 YoutubeDLHandler,
62 prepend_extension,
63 )
64 from .cache import Cache
65 from .extractor import get_info_extractor, gen_extractors
66 from .downloader import get_suitable_downloader
67 from .downloader.rtmp import rtmpdump_version
68 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
69 from .version import __version__
70
71
72 class YoutubeDL(object):
73 """YoutubeDL class.
74
75 YoutubeDL objects are the ones responsible of downloading the
76 actual video file and writing it to disk if the user has requested
77 it, among some other tasks. In most cases there should be one per
78 program. As, given a video URL, the downloader doesn't know how to
79 extract all the needed information, task that InfoExtractors do, it
80 has to pass the URL to one of them.
81
82 For this, YoutubeDL objects have a method that allows
83 InfoExtractors to be registered in a given order. When it is passed
84 a URL, the YoutubeDL object handles it to the first InfoExtractor it
85 finds that reports being able to handle it. The InfoExtractor extracts
86 all the information about the video or videos the URL refers to, and
87 YoutubeDL process the extracted information, possibly using a File
88 Downloader to download the video.
89
90 YoutubeDL objects accept a lot of parameters. In order not to saturate
91 the object constructor with arguments, it receives a dictionary of
92 options instead. These options are available through the params
93 attribute for the InfoExtractors to use. The YoutubeDL also
94 registers itself as the downloader in charge for the InfoExtractors
95 that are added to it, so this is a "mutual registration".
96
97 Available options:
98
99 username: Username for authentication purposes.
100 password: Password for authentication purposes.
101 videopassword: Password for acces a video.
102 usenetrc: Use netrc for authentication instead.
103 verbose: Print additional info to stdout.
104 quiet: Do not print messages to stdout.
105 no_warnings: Do not print out anything for warnings.
106 forceurl: Force printing final URL.
107 forcetitle: Force printing title.
108 forceid: Force printing ID.
109 forcethumbnail: Force printing thumbnail URL.
110 forcedescription: Force printing description.
111 forcefilename: Force printing final filename.
112 forceduration: Force printing duration.
113 forcejson: Force printing info_dict as JSON.
114 dump_single_json: Force printing the info_dict of the whole playlist
115 (or video) as a single JSON line.
116 simulate: Do not download the video files.
117 format: Video format code.
118 format_limit: Highest quality format to try.
119 outtmpl: Template for output names.
120 restrictfilenames: Do not allow "&" and spaces in file names
121 ignoreerrors: Do not stop on download errors.
122 nooverwrites: Prevent overwriting files.
123 playliststart: Playlist item to start at.
124 playlistend: Playlist item to end at.
125 matchtitle: Download only matching titles.
126 rejecttitle: Reject downloads for matching titles.
127 logger: Log messages to a logging.Logger instance.
128 logtostderr: Log messages to stderr instead of stdout.
129 writedescription: Write the video description to a .description file
130 writeinfojson: Write the video description to a .info.json file
131 writeannotations: Write the video annotations to a .annotations.xml file
132 writethumbnail: Write the thumbnail image to a file
133 writesubtitles: Write the video subtitles to a file
134 writeautomaticsub: Write the automatic subtitles to a file
135 allsubtitles: Downloads all the subtitles of the video
136 (requires writesubtitles or writeautomaticsub)
137 listsubtitles: Lists all available subtitles for the video
138 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
139 subtitleslangs: List of languages of the subtitles to download
140 keepvideo: Keep the video file after post-processing
141 daterange: A DateRange object, download only if the upload_date is in the range.
142 skip_download: Skip the actual download of the video file
143 cachedir: Location of the cache files in the filesystem.
144 False to disable filesystem cache.
145 noplaylist: Download single video instead of a playlist if in doubt.
146 age_limit: An integer representing the user's age in years.
147 Unsuitable videos for the given age are skipped.
148 min_views: An integer representing the minimum view count the video
149 must have in order to not be skipped.
150 Videos without view count information are always
151 downloaded. None for no limit.
152 max_views: An integer representing the maximum view count.
153 Videos that are more popular than that are not
154 downloaded.
155 Videos without view count information are always
156 downloaded. None for no limit.
157 download_archive: File name of a file where all downloads are recorded.
158 Videos already present in the file are not downloaded
159 again.
160 cookiefile: File name where cookies should be read from and dumped to.
161 nocheckcertificate:Do not verify SSL certificates
162 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
163 At the moment, this is only supported by YouTube.
164 proxy: URL of the proxy server to use
165 socket_timeout: Time to wait for unresponsive hosts, in seconds
166 bidi_workaround: Work around buggy terminals without bidirectional text
167 support, using fridibi
168 debug_printtraffic:Print out sent and received HTTP traffic
169 include_ads: Download ads as well
170 default_search: Prepend this string if an input url is not valid.
171 'auto' for elaborate guessing
172 encoding: Use this encoding instead of the system-specified.
173 extract_flat: Do not resolve URLs, return the immediate result.
174 Pass in 'in_playlist' to only show this behavior for
175 playlist items.
176
177 The following parameters are not used by YoutubeDL itself, they are used by
178 the FileDownloader:
179 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
180 noresizebuffer, retries, continuedl, noprogress, consoletitle
181
182 The following options are used by the post processors:
183 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
184 otherwise prefer avconv.
185 exec_cmd: Arbitrary command to run after downloading
186 """
187
188 params = None
189 _ies = []
190 _pps = []
191 _download_retcode = None
192 _num_downloads = None
193 _screen_file = None
194
195 def __init__(self, params=None, auto_init=True):
196 """Create a FileDownloader object with the given options."""
197 if params is None:
198 params = {}
199 self._ies = []
200 self._ies_instances = {}
201 self._pps = []
202 self._progress_hooks = []
203 self._download_retcode = 0
204 self._num_downloads = 0
205 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
206 self._err_file = sys.stderr
207 self.params = params
208 self.cache = Cache(self)
209
210 if params.get('bidi_workaround', False):
211 try:
212 import pty
213 master, slave = pty.openpty()
214 width = get_term_width()
215 if width is None:
216 width_args = []
217 else:
218 width_args = ['-w', str(width)]
219 sp_kwargs = dict(
220 stdin=subprocess.PIPE,
221 stdout=slave,
222 stderr=self._err_file)
223 try:
224 self._output_process = subprocess.Popen(
225 ['bidiv'] + width_args, **sp_kwargs
226 )
227 except OSError:
228 self._output_process = subprocess.Popen(
229 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
230 self._output_channel = os.fdopen(master, 'rb')
231 except OSError as ose:
232 if ose.errno == 2:
233 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
234 else:
235 raise
236
237 if (sys.version_info >= (3,) and sys.platform != 'win32' and
238 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
239 and not params.get('restrictfilenames', False)):
240 # On Python 3, the Unicode filesystem API will throw errors (#1474)
241 self.report_warning(
242 'Assuming --restrict-filenames since file system encoding '
243 'cannot encode all characters. '
244 'Set the LC_ALL environment variable to fix this.')
245 self.params['restrictfilenames'] = True
246
247 if '%(stitle)s' in self.params.get('outtmpl', ''):
248 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
249
250 self._setup_opener()
251
252 if auto_init:
253 self.print_debug_header()
254 self.add_default_info_extractors()
255
256 def add_info_extractor(self, ie):
257 """Add an InfoExtractor object to the end of the list."""
258 self._ies.append(ie)
259 self._ies_instances[ie.ie_key()] = ie
260 ie.set_downloader(self)
261
262 def get_info_extractor(self, ie_key):
263 """
264 Get an instance of an IE with name ie_key, it will try to get one from
265 the _ies list, if there's no instance it will create a new one and add
266 it to the extractor list.
267 """
268 ie = self._ies_instances.get(ie_key)
269 if ie is None:
270 ie = get_info_extractor(ie_key)()
271 self.add_info_extractor(ie)
272 return ie
273
274 def add_default_info_extractors(self):
275 """
276 Add the InfoExtractors returned by gen_extractors to the end of the list
277 """
278 for ie in gen_extractors():
279 self.add_info_extractor(ie)
280
281 def add_post_processor(self, pp):
282 """Add a PostProcessor object to the end of the chain."""
283 self._pps.append(pp)
284 pp.set_downloader(self)
285
286 def add_progress_hook(self, ph):
287 """Add the progress hook (currently only for the file downloader)"""
288 self._progress_hooks.append(ph)
289
290 def _bidi_workaround(self, message):
291 if not hasattr(self, '_output_channel'):
292 return message
293
294 assert hasattr(self, '_output_process')
295 assert isinstance(message, compat_str)
296 line_count = message.count('\n') + 1
297 self._output_process.stdin.write((message + '\n').encode('utf-8'))
298 self._output_process.stdin.flush()
299 res = ''.join(self._output_channel.readline().decode('utf-8')
300 for _ in range(line_count))
301 return res[:-len('\n')]
302
303 def to_screen(self, message, skip_eol=False):
304 """Print message to stdout if not in quiet mode."""
305 return self.to_stdout(message, skip_eol, check_quiet=True)
306
307 def _write_string(self, s, out=None):
308 write_string(s, out=out, encoding=self.params.get('encoding'))
309
310 def to_stdout(self, message, skip_eol=False, check_quiet=False):
311 """Print message to stdout if not in quiet mode."""
312 if self.params.get('logger'):
313 self.params['logger'].debug(message)
314 elif not check_quiet or not self.params.get('quiet', False):
315 message = self._bidi_workaround(message)
316 terminator = ['\n', ''][skip_eol]
317 output = message + terminator
318
319 self._write_string(output, self._screen_file)
320
321 def to_stderr(self, message):
322 """Print message to stderr."""
323 assert isinstance(message, compat_str)
324 if self.params.get('logger'):
325 self.params['logger'].error(message)
326 else:
327 message = self._bidi_workaround(message)
328 output = message + '\n'
329 self._write_string(output, self._err_file)
330
331 def to_console_title(self, message):
332 if not self.params.get('consoletitle', False):
333 return
334 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
335 # c_wchar_p() might not be necessary if `message` is
336 # already of type unicode()
337 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
338 elif 'TERM' in os.environ:
339 self._write_string('\033]0;%s\007' % message, self._screen_file)
340
341 def save_console_title(self):
342 if not self.params.get('consoletitle', False):
343 return
344 if 'TERM' in os.environ:
345 # Save the title on stack
346 self._write_string('\033[22;0t', self._screen_file)
347
348 def restore_console_title(self):
349 if not self.params.get('consoletitle', False):
350 return
351 if 'TERM' in os.environ:
352 # Restore the title from stack
353 self._write_string('\033[23;0t', self._screen_file)
354
355 def __enter__(self):
356 self.save_console_title()
357 return self
358
359 def __exit__(self, *args):
360 self.restore_console_title()
361
362 if self.params.get('cookiefile') is not None:
363 self.cookiejar.save()
364
365 def trouble(self, message=None, tb=None):
366 """Determine action to take when a download problem appears.
367
368 Depending on if the downloader has been configured to ignore
369 download errors or not, this method may throw an exception or
370 not when errors are found, after printing the message.
371
372 tb, if given, is additional traceback information.
373 """
374 if message is not None:
375 self.to_stderr(message)
376 if self.params.get('verbose'):
377 if tb is None:
378 if sys.exc_info()[0]: # if .trouble has been called from an except block
379 tb = ''
380 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
381 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
382 tb += compat_str(traceback.format_exc())
383 else:
384 tb_data = traceback.format_list(traceback.extract_stack())
385 tb = ''.join(tb_data)
386 self.to_stderr(tb)
387 if not self.params.get('ignoreerrors', False):
388 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
389 exc_info = sys.exc_info()[1].exc_info
390 else:
391 exc_info = sys.exc_info()
392 raise DownloadError(message, exc_info)
393 self._download_retcode = 1
394
395 def report_warning(self, message):
396 '''
397 Print the message to stderr, it will be prefixed with 'WARNING:'
398 If stderr is a tty file the 'WARNING:' will be colored
399 '''
400 if self.params.get('logger') is not None:
401 self.params['logger'].warning(message)
402 else:
403 if self.params.get('no_warnings'):
404 return
405 if self._err_file.isatty() and os.name != 'nt':
406 _msg_header = '\033[0;33mWARNING:\033[0m'
407 else:
408 _msg_header = 'WARNING:'
409 warning_message = '%s %s' % (_msg_header, message)
410 self.to_stderr(warning_message)
411
412 def report_error(self, message, tb=None):
413 '''
414 Do the same as trouble, but prefixes the message with 'ERROR:', colored
415 in red if stderr is a tty file.
416 '''
417 if self._err_file.isatty() and os.name != 'nt':
418 _msg_header = '\033[0;31mERROR:\033[0m'
419 else:
420 _msg_header = 'ERROR:'
421 error_message = '%s %s' % (_msg_header, message)
422 self.trouble(error_message, tb)
423
424 def report_file_already_downloaded(self, file_name):
425 """Report file has already been fully downloaded."""
426 try:
427 self.to_screen('[download] %s has already been downloaded' % file_name)
428 except UnicodeEncodeError:
429 self.to_screen('[download] The file has already been downloaded')
430
431 def prepare_filename(self, info_dict):
432 """Generate the output filename."""
433 try:
434 template_dict = dict(info_dict)
435
436 template_dict['epoch'] = int(time.time())
437 autonumber_size = self.params.get('autonumber_size')
438 if autonumber_size is None:
439 autonumber_size = 5
440 autonumber_templ = '%0' + str(autonumber_size) + 'd'
441 template_dict['autonumber'] = autonumber_templ % self._num_downloads
442 if template_dict.get('playlist_index') is not None:
443 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
444 if template_dict.get('resolution') is None:
445 if template_dict.get('width') and template_dict.get('height'):
446 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
447 elif template_dict.get('height'):
448 template_dict['resolution'] = '%sp' % template_dict['height']
449 elif template_dict.get('width'):
450 template_dict['resolution'] = '?x%d' % template_dict['width']
451
452 sanitize = lambda k, v: sanitize_filename(
453 compat_str(v),
454 restricted=self.params.get('restrictfilenames'),
455 is_id=(k == 'id'))
456 template_dict = dict((k, sanitize(k, v))
457 for k, v in template_dict.items()
458 if v is not None)
459 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
460
461 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
462 tmpl = compat_expanduser(outtmpl)
463 filename = tmpl % template_dict
464 return filename
465 except ValueError as err:
466 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
467 return None
468
469 def _match_entry(self, info_dict):
470 """ Returns None iff the file should be downloaded """
471
472 video_title = info_dict.get('title', info_dict.get('id', 'video'))
473 if 'title' in info_dict:
474 # This can happen when we're just evaluating the playlist
475 title = info_dict['title']
476 matchtitle = self.params.get('matchtitle', False)
477 if matchtitle:
478 if not re.search(matchtitle, title, re.IGNORECASE):
479 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
480 rejecttitle = self.params.get('rejecttitle', False)
481 if rejecttitle:
482 if re.search(rejecttitle, title, re.IGNORECASE):
483 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
484 date = info_dict.get('upload_date', None)
485 if date is not None:
486 dateRange = self.params.get('daterange', DateRange())
487 if date not in dateRange:
488 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
489 view_count = info_dict.get('view_count', None)
490 if view_count is not None:
491 min_views = self.params.get('min_views')
492 if min_views is not None and view_count < min_views:
493 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
494 max_views = self.params.get('max_views')
495 if max_views is not None and view_count > max_views:
496 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
497 age_limit = self.params.get('age_limit')
498 if age_limit is not None:
499 actual_age_limit = info_dict.get('age_limit')
500 if actual_age_limit is None:
501 actual_age_limit = 0
502 if age_limit < actual_age_limit:
503 return 'Skipping "' + title + '" because it is age restricted'
504 if self.in_download_archive(info_dict):
505 return '%s has already been recorded in archive' % video_title
506 return None
507
508 @staticmethod
509 def add_extra_info(info_dict, extra_info):
510 '''Set the keys from extra_info in info dict if they are missing'''
511 for key, value in extra_info.items():
512 info_dict.setdefault(key, value)
513
514 def extract_info(self, url, download=True, ie_key=None, extra_info={},
515 process=True):
516 '''
517 Returns a list with a dictionary for each video we find.
518 If 'download', also downloads the videos.
519 extra_info is a dict containing the extra values to add to each result
520 '''
521
522 if ie_key:
523 ies = [self.get_info_extractor(ie_key)]
524 else:
525 ies = self._ies
526
527 for ie in ies:
528 if not ie.suitable(url):
529 continue
530
531 if not ie.working():
532 self.report_warning('The program functionality for this site has been marked as broken, '
533 'and will probably not work.')
534
535 try:
536 ie_result = ie.extract(url)
537 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
538 break
539 if isinstance(ie_result, list):
540 # Backwards compatibility: old IE result format
541 ie_result = {
542 '_type': 'compat_list',
543 'entries': ie_result,
544 }
545 self.add_default_extra_info(ie_result, ie, url)
546 if process:
547 return self.process_ie_result(ie_result, download, extra_info)
548 else:
549 return ie_result
550 except ExtractorError as de: # An error we somewhat expected
551 self.report_error(compat_str(de), de.format_traceback())
552 break
553 except MaxDownloadsReached:
554 raise
555 except Exception as e:
556 if self.params.get('ignoreerrors', False):
557 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
558 break
559 else:
560 raise
561 else:
562 self.report_error('no suitable InfoExtractor for URL %s' % url)
563
564 def add_default_extra_info(self, ie_result, ie, url):
565 self.add_extra_info(ie_result, {
566 'extractor': ie.IE_NAME,
567 'webpage_url': url,
568 'webpage_url_basename': url_basename(url),
569 'extractor_key': ie.ie_key(),
570 })
571
572 def process_ie_result(self, ie_result, download=True, extra_info={}):
573 """
574 Take the result of the ie(may be modified) and resolve all unresolved
575 references (URLs, playlist items).
576
577 It will also download the videos if 'download'.
578 Returns the resolved ie_result.
579 """
580
581 result_type = ie_result.get('_type', 'video')
582
583 if result_type in ('url', 'url_transparent'):
584 extract_flat = self.params.get('extract_flat', False)
585 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
586 extract_flat is True):
587 if self.params.get('forcejson', False):
588 self.to_stdout(json.dumps(ie_result))
589 return ie_result
590
591 if result_type == 'video':
592 self.add_extra_info(ie_result, extra_info)
593 return self.process_video_result(ie_result, download=download)
594 elif result_type == 'url':
595 # We have to add extra_info to the results because it may be
596 # contained in a playlist
597 return self.extract_info(ie_result['url'],
598 download,
599 ie_key=ie_result.get('ie_key'),
600 extra_info=extra_info)
601 elif result_type == 'url_transparent':
602 # Use the information from the embedding page
603 info = self.extract_info(
604 ie_result['url'], ie_key=ie_result.get('ie_key'),
605 extra_info=extra_info, download=False, process=False)
606
607 def make_result(embedded_info):
608 new_result = ie_result.copy()
609 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
610 'entries', 'ie_key', 'duration',
611 'subtitles', 'annotations', 'format',
612 'thumbnail', 'thumbnails'):
613 if f in new_result:
614 del new_result[f]
615 if f in embedded_info:
616 new_result[f] = embedded_info[f]
617 return new_result
618 new_result = make_result(info)
619
620 assert new_result.get('_type') != 'url_transparent'
621 if new_result.get('_type') == 'compat_list':
622 new_result['entries'] = [
623 make_result(e) for e in new_result['entries']]
624
625 return self.process_ie_result(
626 new_result, download=download, extra_info=extra_info)
627 elif result_type == 'playlist' or playlist == 'multi_video':
628 # We process each entry in the playlist
629 playlist = ie_result.get('title', None) or ie_result.get('id', None)
630 self.to_screen('[download] Downloading playlist: %s' % playlist)
631
632 playlist_results = []
633
634 playliststart = self.params.get('playliststart', 1) - 1
635 playlistend = self.params.get('playlistend', None)
636 # For backwards compatibility, interpret -1 as whole list
637 if playlistend == -1:
638 playlistend = None
639
640 if isinstance(ie_result['entries'], list):
641 n_all_entries = len(ie_result['entries'])
642 entries = ie_result['entries'][playliststart:playlistend]
643 n_entries = len(entries)
644 self.to_screen(
645 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
646 (ie_result['extractor'], playlist, n_all_entries, n_entries))
647 else:
648 assert isinstance(ie_result['entries'], PagedList)
649 entries = ie_result['entries'].getslice(
650 playliststart, playlistend)
651 n_entries = len(entries)
652 self.to_screen(
653 "[%s] playlist %s: Downloading %d videos" %
654 (ie_result['extractor'], playlist, n_entries))
655
656 for i, entry in enumerate(entries, 1):
657 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
658 extra = {
659 'n_entries': n_entries,
660 'playlist': playlist,
661 'playlist_id': ie_result.get('id'),
662 'playlist_title': ie_result.get('title'),
663 'playlist_index': i + playliststart,
664 'extractor': ie_result['extractor'],
665 'webpage_url': ie_result['webpage_url'],
666 'webpage_url_basename': url_basename(ie_result['webpage_url']),
667 'extractor_key': ie_result['extractor_key'],
668 }
669
670 reason = self._match_entry(entry)
671 if reason is not None:
672 self.to_screen('[download] ' + reason)
673 continue
674
675 entry_result = self.process_ie_result(entry,
676 download=download,
677 extra_info=extra)
678 playlist_results.append(entry_result)
679 ie_result['entries'] = playlist_results
680 return ie_result
681 elif result_type == 'compat_list':
682 self.report_warning(
683 'Extractor %s returned a compat_list result. '
684 'It needs to be updated.' % ie_result.get('extractor'))
685 def _fixup(r):
686 self.add_extra_info(r,
687 {
688 'extractor': ie_result['extractor'],
689 'webpage_url': ie_result['webpage_url'],
690 'webpage_url_basename': url_basename(ie_result['webpage_url']),
691 'extractor_key': ie_result['extractor_key'],
692 })
693 return r
694 ie_result['entries'] = [
695 self.process_ie_result(_fixup(r), download, extra_info)
696 for r in ie_result['entries']
697 ]
698 return ie_result
699 else:
700 raise Exception('Invalid result type: %s' % result_type)
701
702 def select_format(self, format_spec, available_formats):
703 if format_spec == 'best' or format_spec is None:
704 return available_formats[-1]
705 elif format_spec == 'worst':
706 return available_formats[0]
707 elif format_spec == 'bestaudio':
708 audio_formats = [
709 f for f in available_formats
710 if f.get('vcodec') == 'none']
711 if audio_formats:
712 return audio_formats[-1]
713 elif format_spec == 'worstaudio':
714 audio_formats = [
715 f for f in available_formats
716 if f.get('vcodec') == 'none']
717 if audio_formats:
718 return audio_formats[0]
719 elif format_spec == 'bestvideo':
720 video_formats = [
721 f for f in available_formats
722 if f.get('acodec') == 'none']
723 if video_formats:
724 return video_formats[-1]
725 elif format_spec == 'worstvideo':
726 video_formats = [
727 f for f in available_formats
728 if f.get('acodec') == 'none']
729 if video_formats:
730 return video_formats[0]
731 else:
732 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
733 if format_spec in extensions:
734 filter_f = lambda f: f['ext'] == format_spec
735 else:
736 filter_f = lambda f: f['format_id'] == format_spec
737 matches = list(filter(filter_f, available_formats))
738 if matches:
739 return matches[-1]
740 return None
741
742 def process_video_result(self, info_dict, download=True):
743 assert info_dict.get('_type', 'video') == 'video'
744
745 if 'id' not in info_dict:
746 raise ExtractorError('Missing "id" field in extractor result')
747 if 'title' not in info_dict:
748 raise ExtractorError('Missing "title" field in extractor result')
749
750 if 'playlist' not in info_dict:
751 # It isn't part of a playlist
752 info_dict['playlist'] = None
753 info_dict['playlist_index'] = None
754
755 thumbnails = info_dict.get('thumbnails')
756 if thumbnails:
757 thumbnails.sort(key=lambda t: (
758 t.get('width'), t.get('height'), t.get('url')))
759 for t in thumbnails:
760 if 'width' in t and 'height' in t:
761 t['resolution'] = '%dx%d' % (t['width'], t['height'])
762
763 if thumbnails and 'thumbnail' not in info_dict:
764 info_dict['thumbnail'] = thumbnails[-1]['url']
765
766 if 'display_id' not in info_dict and 'id' in info_dict:
767 info_dict['display_id'] = info_dict['id']
768
769 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
770 upload_date = datetime.datetime.utcfromtimestamp(
771 info_dict['timestamp'])
772 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
773
774 # This extractors handle format selection themselves
775 if info_dict['extractor'] in ['Youku']:
776 if download:
777 self.process_info(info_dict)
778 return info_dict
779
780 # We now pick which formats have to be downloaded
781 if info_dict.get('formats') is None:
782 # There's only one format available
783 formats = [info_dict]
784 else:
785 formats = info_dict['formats']
786
787 if not formats:
788 raise ExtractorError('No video formats found!')
789
790 # We check that all the formats have the format and format_id fields
791 for i, format in enumerate(formats):
792 if 'url' not in format:
793 raise ExtractorError('Missing "url" key in result (index %d)' % i)
794
795 if format.get('format_id') is None:
796 format['format_id'] = compat_str(i)
797 if format.get('format') is None:
798 format['format'] = '{id} - {res}{note}'.format(
799 id=format['format_id'],
800 res=self.format_resolution(format),
801 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
802 )
803 # Automatically determine file extension if missing
804 if 'ext' not in format:
805 format['ext'] = determine_ext(format['url']).lower()
806
807 format_limit = self.params.get('format_limit', None)
808 if format_limit:
809 formats = list(takewhile_inclusive(
810 lambda f: f['format_id'] != format_limit, formats
811 ))
812
813 # TODO Central sorting goes here
814
815 if formats[0] is not info_dict:
816 # only set the 'formats' fields if the original info_dict list them
817 # otherwise we end up with a circular reference, the first (and unique)
818 # element in the 'formats' field in info_dict is info_dict itself,
819 # wich can't be exported to json
820 info_dict['formats'] = formats
821 if self.params.get('listformats', None):
822 self.list_formats(info_dict)
823 return
824
825 req_format = self.params.get('format')
826 if req_format is None:
827 req_format = 'best'
828 formats_to_download = []
829 # The -1 is for supporting YoutubeIE
830 if req_format in ('-1', 'all'):
831 formats_to_download = formats
832 else:
833 for rfstr in req_format.split(','):
834 # We can accept formats requested in the format: 34/5/best, we pick
835 # the first that is available, starting from left
836 req_formats = rfstr.split('/')
837 for rf in req_formats:
838 if re.match(r'.+?\+.+?', rf) is not None:
839 # Two formats have been requested like '137+139'
840 format_1, format_2 = rf.split('+')
841 formats_info = (self.select_format(format_1, formats),
842 self.select_format(format_2, formats))
843 if all(formats_info):
844 # The first format must contain the video and the
845 # second the audio
846 if formats_info[0].get('vcodec') == 'none':
847 self.report_error('The first format must '
848 'contain the video, try using '
849 '"-f %s+%s"' % (format_2, format_1))
850 return
851 selected_format = {
852 'requested_formats': formats_info,
853 'format': rf,
854 'ext': formats_info[0]['ext'],
855 }
856 else:
857 selected_format = None
858 else:
859 selected_format = self.select_format(rf, formats)
860 if selected_format is not None:
861 formats_to_download.append(selected_format)
862 break
863 if not formats_to_download:
864 raise ExtractorError('requested format not available',
865 expected=True)
866
867 if download:
868 if len(formats_to_download) > 1:
869 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
870 for format in formats_to_download:
871 new_info = dict(info_dict)
872 new_info.update(format)
873 self.process_info(new_info)
874 # We update the info dict with the best quality format (backwards compatibility)
875 info_dict.update(formats_to_download[-1])
876 return info_dict
877
878 def process_info(self, info_dict):
879 """Process a single resolved IE result."""
880
881 assert info_dict.get('_type', 'video') == 'video'
882
883 max_downloads = self.params.get('max_downloads')
884 if max_downloads is not None:
885 if self._num_downloads >= int(max_downloads):
886 raise MaxDownloadsReached()
887
888 info_dict['fulltitle'] = info_dict['title']
889 if len(info_dict['title']) > 200:
890 info_dict['title'] = info_dict['title'][:197] + '...'
891
892 # Keep for backwards compatibility
893 info_dict['stitle'] = info_dict['title']
894
895 if 'format' not in info_dict:
896 info_dict['format'] = info_dict['ext']
897
898 reason = self._match_entry(info_dict)
899 if reason is not None:
900 self.to_screen('[download] ' + reason)
901 return
902
903 self._num_downloads += 1
904
905 filename = self.prepare_filename(info_dict)
906
907 # Forced printings
908 if self.params.get('forcetitle', False):
909 self.to_stdout(info_dict['fulltitle'])
910 if self.params.get('forceid', False):
911 self.to_stdout(info_dict['id'])
912 if self.params.get('forceurl', False):
913 # For RTMP URLs, also include the playpath
914 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
915 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
916 self.to_stdout(info_dict['thumbnail'])
917 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
918 self.to_stdout(info_dict['description'])
919 if self.params.get('forcefilename', False) and filename is not None:
920 self.to_stdout(filename)
921 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
922 self.to_stdout(formatSeconds(info_dict['duration']))
923 if self.params.get('forceformat', False):
924 self.to_stdout(info_dict['format'])
925 if self.params.get('forcejson', False):
926 info_dict['_filename'] = filename
927 self.to_stdout(json.dumps(info_dict))
928 if self.params.get('dump_single_json', False):
929 info_dict['_filename'] = filename
930
931 # Do nothing else if in simulate mode
932 if self.params.get('simulate', False):
933 return
934
935 if filename is None:
936 return
937
938 try:
939 dn = os.path.dirname(encodeFilename(filename))
940 if dn and not os.path.exists(dn):
941 os.makedirs(dn)
942 except (OSError, IOError) as err:
943 self.report_error('unable to create directory ' + compat_str(err))
944 return
945
946 if self.params.get('writedescription', False):
947 descfn = filename + '.description'
948 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
949 self.to_screen('[info] Video description is already present')
950 else:
951 try:
952 self.to_screen('[info] Writing video description to: ' + descfn)
953 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
954 descfile.write(info_dict['description'])
955 except (KeyError, TypeError):
956 self.report_warning('There\'s no description to write.')
957 except (OSError, IOError):
958 self.report_error('Cannot write description file ' + descfn)
959 return
960
961 if self.params.get('writeannotations', False):
962 annofn = filename + '.annotations.xml'
963 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
964 self.to_screen('[info] Video annotations are already present')
965 else:
966 try:
967 self.to_screen('[info] Writing video annotations to: ' + annofn)
968 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
969 annofile.write(info_dict['annotations'])
970 except (KeyError, TypeError):
971 self.report_warning('There are no annotations to write.')
972 except (OSError, IOError):
973 self.report_error('Cannot write annotations file: ' + annofn)
974 return
975
976 subtitles_are_requested = any([self.params.get('writesubtitles', False),
977 self.params.get('writeautomaticsub')])
978
979 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
980 # subtitles download errors are already managed as troubles in relevant IE
981 # that way it will silently go on when used with unsupporting IE
982 subtitles = info_dict['subtitles']
983 sub_format = self.params.get('subtitlesformat', 'srt')
984 for sub_lang in subtitles.keys():
985 sub = subtitles[sub_lang]
986 if sub is None:
987 continue
988 try:
989 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
990 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
991 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
992 else:
993 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
994 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
995 subfile.write(sub)
996 except (OSError, IOError):
997 self.report_error('Cannot write subtitles file ' + sub_filename)
998 return
999
1000 if self.params.get('writeinfojson', False):
1001 infofn = os.path.splitext(filename)[0] + '.info.json'
1002 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1003 self.to_screen('[info] Video description metadata is already present')
1004 else:
1005 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1006 try:
1007 write_json_file(info_dict, infofn)
1008 except (OSError, IOError):
1009 self.report_error('Cannot write metadata to JSON file ' + infofn)
1010 return
1011
1012 if self.params.get('writethumbnail', False):
1013 if info_dict.get('thumbnail') is not None:
1014 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1015 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1016 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1017 self.to_screen('[%s] %s: Thumbnail is already present' %
1018 (info_dict['extractor'], info_dict['id']))
1019 else:
1020 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1021 (info_dict['extractor'], info_dict['id']))
1022 try:
1023 uf = self.urlopen(info_dict['thumbnail'])
1024 with open(thumb_filename, 'wb') as thumbf:
1025 shutil.copyfileobj(uf, thumbf)
1026 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1027 (info_dict['extractor'], info_dict['id'], thumb_filename))
1028 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1029 self.report_warning('Unable to download thumbnail "%s": %s' %
1030 (info_dict['thumbnail'], compat_str(err)))
1031
1032 if not self.params.get('skip_download', False):
1033 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1034 success = True
1035 else:
1036 try:
1037 def dl(name, info):
1038 fd = get_suitable_downloader(info)(self, self.params)
1039 for ph in self._progress_hooks:
1040 fd.add_progress_hook(ph)
1041 if self.params.get('verbose'):
1042 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1043 return fd.download(name, info)
1044 if info_dict.get('requested_formats') is not None:
1045 downloaded = []
1046 success = True
1047 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1048 if not merger._executable:
1049 postprocessors = []
1050 self.report_warning('You have requested multiple '
1051 'formats but ffmpeg or avconv are not installed.'
1052 ' The formats won\'t be merged')
1053 else:
1054 postprocessors = [merger]
1055 for f in info_dict['requested_formats']:
1056 new_info = dict(info_dict)
1057 new_info.update(f)
1058 fname = self.prepare_filename(new_info)
1059 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1060 downloaded.append(fname)
1061 partial_success = dl(fname, new_info)
1062 success = success and partial_success
1063 info_dict['__postprocessors'] = postprocessors
1064 info_dict['__files_to_merge'] = downloaded
1065 else:
1066 # Just a single file
1067 success = dl(filename, info_dict)
1068 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1069 self.report_error('unable to download video data: %s' % str(err))
1070 return
1071 except (OSError, IOError) as err:
1072 raise UnavailableVideoError(err)
1073 except (ContentTooShortError, ) as err:
1074 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1075 return
1076
1077 if success:
1078 try:
1079 self.post_process(filename, info_dict)
1080 except (PostProcessingError) as err:
1081 self.report_error('postprocessing: %s' % str(err))
1082 return
1083
1084 self.record_download_archive(info_dict)
1085
1086 def download(self, url_list):
1087 """Download a given list of URLs."""
1088 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1089 if (len(url_list) > 1 and
1090 '%' not in outtmpl
1091 and self.params.get('max_downloads') != 1):
1092 raise SameFileError(outtmpl)
1093
1094 for url in url_list:
1095 try:
1096 #It also downloads the videos
1097 res = self.extract_info(url)
1098 except UnavailableVideoError:
1099 self.report_error('unable to download video')
1100 except MaxDownloadsReached:
1101 self.to_screen('[info] Maximum number of downloaded files reached.')
1102 raise
1103 else:
1104 if self.params.get('dump_single_json', False):
1105 self.to_stdout(json.dumps(res))
1106
1107 return self._download_retcode
1108
1109 def download_with_info_file(self, info_filename):
1110 with io.open(info_filename, 'r', encoding='utf-8') as f:
1111 info = json.load(f)
1112 try:
1113 self.process_ie_result(info, download=True)
1114 except DownloadError:
1115 webpage_url = info.get('webpage_url')
1116 if webpage_url is not None:
1117 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1118 return self.download([webpage_url])
1119 else:
1120 raise
1121 return self._download_retcode
1122
1123 def post_process(self, filename, ie_info):
1124 """Run all the postprocessors on the given file."""
1125 info = dict(ie_info)
1126 info['filepath'] = filename
1127 keep_video = None
1128 pps_chain = []
1129 if ie_info.get('__postprocessors') is not None:
1130 pps_chain.extend(ie_info['__postprocessors'])
1131 pps_chain.extend(self._pps)
1132 for pp in pps_chain:
1133 try:
1134 keep_video_wish, new_info = pp.run(info)
1135 if keep_video_wish is not None:
1136 if keep_video_wish:
1137 keep_video = keep_video_wish
1138 elif keep_video is None:
1139 # No clear decision yet, let IE decide
1140 keep_video = keep_video_wish
1141 except PostProcessingError as e:
1142 self.report_error(e.msg)
1143 if keep_video is False and not self.params.get('keepvideo', False):
1144 try:
1145 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1146 os.remove(encodeFilename(filename))
1147 except (IOError, OSError):
1148 self.report_warning('Unable to remove downloaded video file')
1149
1150 def _make_archive_id(self, info_dict):
1151 # Future-proof against any change in case
1152 # and backwards compatibility with prior versions
1153 extractor = info_dict.get('extractor_key')
1154 if extractor is None:
1155 if 'id' in info_dict:
1156 extractor = info_dict.get('ie_key') # key in a playlist
1157 if extractor is None:
1158 return None # Incomplete video information
1159 return extractor.lower() + ' ' + info_dict['id']
1160
1161 def in_download_archive(self, info_dict):
1162 fn = self.params.get('download_archive')
1163 if fn is None:
1164 return False
1165
1166 vid_id = self._make_archive_id(info_dict)
1167 if vid_id is None:
1168 return False # Incomplete video information
1169
1170 try:
1171 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1172 for line in archive_file:
1173 if line.strip() == vid_id:
1174 return True
1175 except IOError as ioe:
1176 if ioe.errno != errno.ENOENT:
1177 raise
1178 return False
1179
1180 def record_download_archive(self, info_dict):
1181 fn = self.params.get('download_archive')
1182 if fn is None:
1183 return
1184 vid_id = self._make_archive_id(info_dict)
1185 assert vid_id
1186 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1187 archive_file.write(vid_id + '\n')
1188
1189 @staticmethod
1190 def format_resolution(format, default='unknown'):
1191 if format.get('vcodec') == 'none':
1192 return 'audio only'
1193 if format.get('resolution') is not None:
1194 return format['resolution']
1195 if format.get('height') is not None:
1196 if format.get('width') is not None:
1197 res = '%sx%s' % (format['width'], format['height'])
1198 else:
1199 res = '%sp' % format['height']
1200 elif format.get('width') is not None:
1201 res = '?x%d' % format['width']
1202 else:
1203 res = default
1204 return res
1205
1206 def _format_note(self, fdict):
1207 res = ''
1208 if fdict.get('ext') in ['f4f', 'f4m']:
1209 res += '(unsupported) '
1210 if fdict.get('format_note') is not None:
1211 res += fdict['format_note'] + ' '
1212 if fdict.get('tbr') is not None:
1213 res += '%4dk ' % fdict['tbr']
1214 if fdict.get('container') is not None:
1215 if res:
1216 res += ', '
1217 res += '%s container' % fdict['container']
1218 if (fdict.get('vcodec') is not None and
1219 fdict.get('vcodec') != 'none'):
1220 if res:
1221 res += ', '
1222 res += fdict['vcodec']
1223 if fdict.get('vbr') is not None:
1224 res += '@'
1225 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1226 res += 'video@'
1227 if fdict.get('vbr') is not None:
1228 res += '%4dk' % fdict['vbr']
1229 if fdict.get('fps') is not None:
1230 res += ', %sfps' % fdict['fps']
1231 if fdict.get('acodec') is not None:
1232 if res:
1233 res += ', '
1234 if fdict['acodec'] == 'none':
1235 res += 'video only'
1236 else:
1237 res += '%-5s' % fdict['acodec']
1238 elif fdict.get('abr') is not None:
1239 if res:
1240 res += ', '
1241 res += 'audio'
1242 if fdict.get('abr') is not None:
1243 res += '@%3dk' % fdict['abr']
1244 if fdict.get('asr') is not None:
1245 res += ' (%5dHz)' % fdict['asr']
1246 if fdict.get('filesize') is not None:
1247 if res:
1248 res += ', '
1249 res += format_bytes(fdict['filesize'])
1250 elif fdict.get('filesize_approx') is not None:
1251 if res:
1252 res += ', '
1253 res += '~' + format_bytes(fdict['filesize_approx'])
1254 return res
1255
1256 def list_formats(self, info_dict):
1257 def line(format, idlen=20):
1258 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1259 format['format_id'],
1260 format['ext'],
1261 self.format_resolution(format),
1262 self._format_note(format),
1263 ))
1264
1265 formats = info_dict.get('formats', [info_dict])
1266 idlen = max(len('format code'),
1267 max(len(f['format_id']) for f in formats))
1268 formats_s = [line(f, idlen) for f in formats]
1269 if len(formats) > 1:
1270 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1271 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1272
1273 header_line = line({
1274 'format_id': 'format code', 'ext': 'extension',
1275 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1276 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1277 (info_dict['id'], header_line, '\n'.join(formats_s)))
1278
1279 def urlopen(self, req):
1280 """ Start an HTTP download """
1281
1282 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1283 # always respected by websites, some tend to give out URLs with non percent-encoded
1284 # non-ASCII characters (see telemb.py, ard.py [#3412])
1285 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1286 # To work around aforementioned issue we will replace request's original URL with
1287 # percent-encoded one
1288 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1289 url = req if req_is_string else req.get_full_url()
1290 url_escaped = escape_url(url)
1291
1292 # Substitute URL if any change after escaping
1293 if url != url_escaped:
1294 if req_is_string:
1295 req = url_escaped
1296 else:
1297 req = compat_urllib_request.Request(
1298 url_escaped, data=req.data, headers=req.headers,
1299 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1300
1301 return self._opener.open(req, timeout=self._socket_timeout)
1302
1303 def print_debug_header(self):
1304 if not self.params.get('verbose'):
1305 return
1306
1307 if type('') is not compat_str:
1308 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1309 self.report_warning(
1310 'Your Python is broken! Update to a newer and supported version')
1311
1312 stdout_encoding = getattr(
1313 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1314 encoding_str = (
1315 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1316 locale.getpreferredencoding(),
1317 sys.getfilesystemencoding(),
1318 stdout_encoding,
1319 self.get_encoding()))
1320 write_string(encoding_str, encoding=None)
1321
1322 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1323 try:
1324 sp = subprocess.Popen(
1325 ['git', 'rev-parse', '--short', 'HEAD'],
1326 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1327 cwd=os.path.dirname(os.path.abspath(__file__)))
1328 out, err = sp.communicate()
1329 out = out.decode().strip()
1330 if re.match('[0-9a-f]+', out):
1331 self._write_string('[debug] Git HEAD: ' + out + '\n')
1332 except:
1333 try:
1334 sys.exc_clear()
1335 except:
1336 pass
1337 self._write_string('[debug] Python version %s - %s\n' % (
1338 platform.python_version(), platform_name()))
1339
1340 exe_versions = FFmpegPostProcessor.get_versions()
1341 exe_versions['rtmpdump'] = rtmpdump_version()
1342 exe_str = ', '.join(
1343 '%s %s' % (exe, v)
1344 for exe, v in sorted(exe_versions.items())
1345 if v
1346 )
1347 if not exe_str:
1348 exe_str = 'none'
1349 self._write_string('[debug] exe versions: %s\n' % exe_str)
1350
1351 proxy_map = {}
1352 for handler in self._opener.handlers:
1353 if hasattr(handler, 'proxies'):
1354 proxy_map.update(handler.proxies)
1355 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1356
1357 def _setup_opener(self):
1358 timeout_val = self.params.get('socket_timeout')
1359 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1360
1361 opts_cookiefile = self.params.get('cookiefile')
1362 opts_proxy = self.params.get('proxy')
1363
1364 if opts_cookiefile is None:
1365 self.cookiejar = compat_cookiejar.CookieJar()
1366 else:
1367 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1368 opts_cookiefile)
1369 if os.access(opts_cookiefile, os.R_OK):
1370 self.cookiejar.load()
1371
1372 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1373 self.cookiejar)
1374 if opts_proxy is not None:
1375 if opts_proxy == '':
1376 proxies = {}
1377 else:
1378 proxies = {'http': opts_proxy, 'https': opts_proxy}
1379 else:
1380 proxies = compat_urllib_request.getproxies()
1381 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1382 if 'http' in proxies and 'https' not in proxies:
1383 proxies['https'] = proxies['http']
1384 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1385
1386 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1387 https_handler = make_HTTPS_handler(
1388 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1389 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1390 opener = compat_urllib_request.build_opener(
1391 https_handler, proxy_handler, cookie_processor, ydlh)
1392 # Delete the default user-agent header, which would otherwise apply in
1393 # cases where our custom HTTP handler doesn't come into play
1394 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1395 opener.addheaders = []
1396 self._opener = opener
1397
1398 def encode(self, s):
1399 if isinstance(s, bytes):
1400 return s # Already encoded
1401
1402 try:
1403 return s.encode(self.get_encoding())
1404 except UnicodeEncodeError as err:
1405 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1406 raise
1407
1408 def get_encoding(self):
1409 encoding = self.params.get('encoding')
1410 if encoding is None:
1411 encoding = preferredencoding()
1412 return encoding