debian/control: Drop really ancient Recommends to have ffprobe.

[youtubedl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 9605f8f99ac04bf72032b1cc8a64635bb6c2a8b8..00af78e0600f8d2136116e91bcda179f70dbf9a5 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -4,8 +4,10 @@
  from __future__ import absolute_import, unicode_literals
  
  import collections
+import contextlib
  import datetime
  import errno
+import fileinput
  import io
  import itertools
  import json
@@ -28,6 +30,7 @@ from .compat import (
      compat_basestring,
      compat_cookiejar,
      compat_expanduser,
+    compat_get_terminal_size,
      compat_http_client,
      compat_kwargs,
      compat_str,
@@ -46,21 +49,22 @@ from .utils import (
      ExtractorError,
      format_bytes,
      formatSeconds,
-    get_term_width,
+    HEADRequest,
      locked_file,
      make_HTTPS_handler,
      MaxDownloadsReached,
      PagedList,
      parse_filesize,
+    PerRequestProxyHandler,
      PostProcessingError,
      platform_name,
      preferredencoding,
      render_table,
      SameFileError,
      sanitize_filename,
+    sanitize_path,
      std_headers,
      subtitles_filename,
-    takewhile_inclusive,
      UnavailableVideoError,
      url_basename,
      version_tuple,
@@ -68,6 +72,7 @@ from .utils import (
      write_string,
      YoutubeDLHandler,
      prepend_extension,
+    replace_extension,
      args_to_str,
      age_restricted,
  )
@@ -114,7 +119,7 @@ class YoutubeDL(object):
  
      username:          Username for authentication purposes.
      password:          Password for authentication purposes.
-    videopassword:     Password for acces a video.
+    videopassword:     Password for accessing a video.
      usenetrc:          Use netrc for authentication instead.
      verbose:           Print additional info to stdout.
      quiet:             Do not print messages to stdout.
@@ -131,10 +136,10 @@ class YoutubeDL(object):
                         (or video) as a single JSON line.
      simulate:          Do not download the video files.
      format:            Video format code. See options.py for more information.
-    format_limit:      Highest quality format to try.
      outtmpl:           Template for output names.
      restrictfilenames: Do not allow "&" and spaces in file names
      ignoreerrors:      Do not stop on download errors.
+    force_generic_extractor: Force downloader to use the generic extractor
      nooverwrites:      Prevent overwriting files.
      playliststart:     Playlist item to start at.
      playlistend:       Playlist item to end at.
@@ -154,7 +159,7 @@ class YoutubeDL(object):
      allsubtitles:      Downloads all the subtitles of the video
                         (requires writesubtitles or writeautomaticsub)
      listsubtitles:     Lists all available subtitles for the video
-    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
+    subtitlesformat:   The format code for subtitles
      subtitleslangs:    List of languages of the subtitles to download
      keepvideo:         Keep the video file after post-processing
      daterange:         A DateRange object, download only if the upload_date is in the range.
@@ -181,6 +186,8 @@ class YoutubeDL(object):
      prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
                         At the moment, this is only supported by YouTube.
      proxy:             URL of the proxy server to use
+    cn_verification_proxy:  URL of the proxy to use for IP address verification
+                       on Chinese sites. (Experimental)
      socket_timeout:    Time to wait for unresponsive hosts, in seconds
      bidi_workaround:   Work around buggy terminals without bidirectional text
                         support, using fridibi
@@ -199,18 +206,25 @@ class YoutubeDL(object):
                         postprocessor.
      progress_hooks:    A list of functions that get called on download
                         progress, with a dictionary with the entries
-                       * status: One of "downloading" and "finished".
+                       * status: One of "downloading", "error", or "finished".
                                   Check this first and ignore unknown values.
  
-                       If status is one of "downloading" or "finished", the
+                       If status is one of "downloading", or "finished", the
                         following properties may also be present:
                         * filename: The final filename (always present)
+                       * tmpfilename: The filename we're currently writing to
                         * downloaded_bytes: Bytes on disk
                         * total_bytes: Size of the whole file, None if unknown
-                       * tmpfilename: The filename we're currently writing to
+                       * total_bytes_estimate: Guess of the eventual file size,
+                                               None if unavailable.
+                       * elapsed: The number of seconds since download started.
                         * eta: The estimated time in seconds, None if unknown
                         * speed: The download speed in bytes/second, None if
                                  unknown
+                       * fragment_index: The counter of the currently
+                                         downloaded video fragment.
+                       * fragment_count: The number of fragments (= individual
+                                         files that will be merged)
  
                         Progress hooks are guaranteed to be called at least once
                         (with status "finished") if the download is successful.
@@ -225,21 +239,31 @@ class YoutubeDL(object):
      call_home:         Boolean, true iff we are allowed to contact the
                         youtube-dl servers for debugging.
      sleep_interval:    Number of seconds to sleep before each download.
-    external_downloader:  Executable of the external downloader to call.
      listformats:       Print an overview of available video formats and exit.
      list_thumbnails:   Print a table of all thumbnails and exit.
-
+    match_filter:      A function that gets called with the info_dict of
+                       every video.
+                       If it returns a message, the video is ignored.
+                       If it returns None, the video is downloaded.
+                       match_filter_func in utils.py is one example for this.
+    no_color:          Do not emit color codes in output.
+
+    The following options determine which downloader is picked:
+    external_downloader: Executable of the external downloader to call.
+                       None or unset for standard (built-in) downloader.
+    hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
  
      The following parameters are not used by YoutubeDL itself, they are used by
-    the FileDownloader:
+    the downloader (see youtube_dl/downloader/common.py):
      nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
      noresizebuffer, retries, continuedl, noprogress, consoletitle,
-    xattr_set_filesize.
+    xattr_set_filesize, external_downloader_args.
  
      The following options are used by the post processors:
      prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
                         otherwise prefer avconv.
-    exec_cmd:          Arbitrary command to run after downloading
+    postprocessor_args: A list of additional command-line arguments for the
+                        postprocessor.
      """
  
      params = None
@@ -268,7 +292,7 @@ class YoutubeDL(object):
              try:
                  import pty
                  master, slave = pty.openpty()
-                width = get_term_width()
+                width = compat_get_terminal_size().columns
                  if width is None:
                      width_args = []
                  else:
@@ -292,8 +316,8 @@ class YoutubeDL(object):
                      raise
  
          if (sys.version_info >= (3,) and sys.platform != 'win32' and
-                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
-                and not params.get('restrictfilenames', False)):
+                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
+                not params.get('restrictfilenames', False)):
              # On Python 3, the Unicode filesystem API will throw errors (#1474)
              self.report_warning(
                  'Assuming --restrict-filenames since file system encoding '
@@ -301,8 +325,10 @@ class YoutubeDL(object):
                  'Set the LC_ALL environment variable to fix this.')
              self.params['restrictfilenames'] = True
  
-        if '%(stitle)s' in self.params.get('outtmpl', ''):
-            self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+        if isinstance(params.get('outtmpl'), bytes):
+            self.report_warning(
+                'Parameter outtmpl is bytes, but should be a unicode string. '
+                'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
  
          self._setup_opener()
  
@@ -485,7 +511,7 @@ class YoutubeDL(object):
          else:
              if self.params.get('no_warnings'):
                  return
-            if self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
                  _msg_header = '\033[0;33mWARNING:\033[0m'
              else:
                  _msg_header = 'WARNING:'
@@ -497,7 +523,7 @@ class YoutubeDL(object):
          Do the same as trouble, but prefixes the message with 'ERROR:', colored
          in red if stderr is a tty file.
          '''
-        if self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
              _msg_header = '\033[0;31mERROR:\033[0m'
          else:
              _msg_header = 'ERROR:'
@@ -541,7 +567,7 @@ class YoutubeDL(object):
                                   if v is not None)
              template_dict = collections.defaultdict(lambda: 'NA', template_dict)
  
-            outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+            outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
              tmpl = compat_expanduser(outtmpl)
              filename = tmpl % template_dict
              # Temporary fix for #4787
@@ -554,7 +580,7 @@ class YoutubeDL(object):
              self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
              return None
  
-    def _match_entry(self, info_dict):
+    def _match_entry(self, info_dict, incomplete):
          """ Returns None iff the file should be downloaded """
  
          video_title = info_dict.get('title', info_dict.get('id', 'video'))
@@ -583,9 +609,17 @@ class YoutubeDL(object):
              if max_views is not None and view_count > max_views:
                  return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
          if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
-            return 'Skipping "%s" because it is age restricted' % title
+            return 'Skipping "%s" because it is age restricted' % video_title
          if self.in_download_archive(info_dict):
              return '%s has already been recorded in archive' % video_title
+
+        if not incomplete:
+            match_filter = self.params.get('match_filter')
+            if match_filter is not None:
+                ret = match_filter(info_dict)
+                if ret is not None:
+                    return ret
+
          return None
  
      @staticmethod
@@ -595,12 +629,15 @@ class YoutubeDL(object):
              info_dict.setdefault(key, value)
  
      def extract_info(self, url, download=True, ie_key=None, extra_info={},
-                     process=True):
+                     process=True, force_generic_extractor=False):
          '''
          Returns a list with a dictionary for each video we find.
          If 'download', also downloads the videos.
          extra_info is a dict containing the extra values to add to each result
-         '''
+        '''
+
+        if not ie_key and force_generic_extractor:
+            ie_key = 'Generic'
  
          if ie_key:
              ies = [self.get_info_extractor(ie_key)]
@@ -729,7 +766,9 @@ class YoutubeDL(object):
              if isinstance(ie_entries, list):
                  n_all_entries = len(ie_entries)
                  if playlistitems:
-                    entries = [ie_entries[i - 1] for i in playlistitems]
+                    entries = [
+                        ie_entries[i - 1] for i in playlistitems
+                        if -n_all_entries <= i - 1 < n_all_entries]
                  else:
                      entries = ie_entries[playliststart:playlistend]
                  n_entries = len(entries)
@@ -779,7 +818,7 @@ class YoutubeDL(object):
                      'extractor_key': ie_result['extractor_key'],
                  }
  
-                reason = self._match_entry(entry)
+                reason = self._match_entry(entry, incomplete=True)
                  if reason is not None:
                      self.to_screen('[download] ' + reason)
                      continue
@@ -826,26 +865,43 @@ class YoutubeDL(object):
              '!=': operator.ne,
          }
          operator_rex = re.compile(r'''(?x)\s*\[
-            (?P<key>width|height|tbr|abr|vbr|filesize|fps)
+            (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
              \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
              (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
              \]$
              ''' % '|'.join(map(re.escape, OPERATORS.keys())))
          m = operator_rex.search(format_spec)
+        if m:
+            try:
+                comparison_value = int(m.group('value'))
+            except ValueError:
+                comparison_value = parse_filesize(m.group('value'))
+                if comparison_value is None:
+                    comparison_value = parse_filesize(m.group('value') + 'B')
+                if comparison_value is None:
+                    raise ValueError(
+                        'Invalid value %r in format specification %r' % (
+                            m.group('value'), format_spec))
+            op = OPERATORS[m.group('op')]
+
          if not m:
-            raise ValueError('Invalid format specification %r' % format_spec)
+            STR_OPERATORS = {
+                '=': operator.eq,
+                '!=': operator.ne,
+            }
+            str_operator_rex = re.compile(r'''(?x)\s*\[
+                \s*(?P<key>ext|acodec|vcodec|container|protocol)
+                \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+                \s*(?P<value>[a-zA-Z0-9_-]+)
+                \s*\]$
+                ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+            m = str_operator_rex.search(format_spec)
+            if m:
+                comparison_value = m.group('value')
+                op = STR_OPERATORS[m.group('op')]
  
-        try:
-            comparison_value = int(m.group('value'))
-        except ValueError:
-            comparison_value = parse_filesize(m.group('value'))
-            if comparison_value is None:
-                comparison_value = parse_filesize(m.group('value') + 'B')
-            if comparison_value is None:
-                raise ValueError(
-                    'Invalid value %r in format specification %r' % (
-                        m.group('value'), format_spec))
-        op = OPERATORS[m.group('op')]
+        if not m:
+            raise ValueError('Invalid format specification %r' % format_spec)
  
          def _filter(f):
              actual_value = f.get(m.group('key'))
@@ -867,10 +923,17 @@ class YoutubeDL(object):
          if not available_formats:
              return None
  
-        if format_spec == 'best' or format_spec is None:
-            return available_formats[-1]
-        elif format_spec == 'worst':
-            return available_formats[0]
+        if format_spec in ['best', 'worst', None]:
+            format_idx = 0 if format_spec == 'worst' else -1
+            audiovideo_formats = [
+                f for f in available_formats
+                if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
+            if audiovideo_formats:
+                return audiovideo_formats[format_idx]
+            # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
+            elif (all(f.get('acodec') != 'none' for f in available_formats) or
+                  all(f.get('vcodec') != 'none' for f in available_formats)):
+                return available_formats[format_idx]
          elif format_spec == 'bestaudio':
              audio_formats = [
                  f for f in available_formats
@@ -920,27 +983,9 @@ class YoutubeDL(object):
          return res
  
      def _calc_cookies(self, info_dict):
-        class _PseudoRequest(object):
-            def __init__(self, url):
-                self.url = url
-                self.headers = {}
-                self.unverifiable = False
-
-            def add_unredirected_header(self, k, v):
-                self.headers[k] = v
-
-            def get_full_url(self):
-                return self.url
-
-            def is_unverifiable(self):
-                return self.unverifiable
-
-            def has_header(self, h):
-                return h in self.headers
-
-        pr = _PseudoRequest(info_dict['url'])
+        pr = compat_urllib_request.Request(info_dict['url'])
          self.cookiejar.add_cookie_header(pr)
-        return pr.headers.get('Cookie')
+        return pr.get_header('Cookie')
  
      def process_video_result(self, info_dict, download=True):
          assert info_dict.get('_type', 'video') == 'video'
@@ -965,7 +1010,7 @@ class YoutubeDL(object):
                  t.get('preference'), t.get('width'), t.get('height'),
                  t.get('id'), t.get('url')))
              for i, t in enumerate(thumbnails):
-                if 'width' in t and 'height' in t:
+                if t.get('width') and t.get('height'):
                      t['resolution'] = '%dx%d' % (t['width'], t['height'])
                  if t.get('id') is None:
                      t['id'] = '%d' % i
@@ -977,19 +1022,22 @@ class YoutubeDL(object):
              info_dict['display_id'] = info_dict['id']
  
          if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
-            # Working around negative timestamps in Windows
-            # (see http://bugs.python.org/issue1646728)
-            if info_dict['timestamp'] < 0 and os.name == 'nt':
-                info_dict['timestamp'] = 0
-            upload_date = datetime.datetime.utcfromtimestamp(
-                info_dict['timestamp'])
-            info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
-
-        # This extractors handle format selection themselves
-        if info_dict['extractor'] in ['Youku']:
-            if download:
-                self.process_info(info_dict)
-            return info_dict
+            # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+            # see http://bugs.python.org/issue1646728)
+            try:
+                upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
+                info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+            except (ValueError, OverflowError, OSError):
+                pass
+
+        if self.params.get('listsubtitles', False):
+            if 'automatic_captions' in info_dict:
+                self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+            return
+        info_dict['requested_subtitles'] = self.process_subtitles(
+            info_dict['id'], info_dict.get('subtitles'),
+            info_dict.get('automatic_captions'))
  
          # We now pick which formats have to be downloaded
          if info_dict.get('formats') is None:
@@ -1001,6 +1049,8 @@ class YoutubeDL(object):
          if not formats:
              raise ExtractorError('No video formats found!')
  
+        formats_dict = {}
+
          # We check that all the formats have the format and format_id fields
          for i, format in enumerate(formats):
              if 'url' not in format:
@@ -1008,6 +1058,18 @@ class YoutubeDL(object):
  
              if format.get('format_id') is None:
                  format['format_id'] = compat_str(i)
+            format_id = format['format_id']
+            if format_id not in formats_dict:
+                formats_dict[format_id] = []
+            formats_dict[format_id].append(format)
+
+        # Make sure all formats have unique format_id
+        for format_id, ambiguous_formats in formats_dict.items():
+            if len(ambiguous_formats) > 1:
+                for i, format in enumerate(ambiguous_formats):
+                    format['format_id'] = '%s-%d' % (format_id, i)
+
+        for i, format in enumerate(formats):
              if format.get('format') is None:
                  format['format'] = '{id} - {res}{note}'.format(
                      id=format['format_id'],
@@ -1023,12 +1085,6 @@ class YoutubeDL(object):
              full_format_info.update(format)
              format['http_headers'] = self._calc_headers(full_format_info)
  
-        format_limit = self.params.get('format_limit', None)
-        if format_limit:
-            formats = list(takewhile_inclusive(
-                lambda f: f['format_id'] != format_limit, formats
-            ))
-
          # TODO Central sorting goes here
  
          if formats[0] is not info_dict:
@@ -1046,10 +1102,16 @@ class YoutubeDL(object):
  
          req_format = self.params.get('format')
          if req_format is None:
-            req_format = 'best'
+            req_format_list = []
+            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
+                    info_dict['extractor'] in ['youtube', 'ted']):
+                merger = FFmpegMergerPP(self)
+                if merger.available and merger.can_merge():
+                    req_format_list.append('bestvideo+bestaudio')
+            req_format_list.append('best')
+            req_format = '/'.join(req_format_list)
          formats_to_download = []
-        # The -1 is for supporting YoutubeIE
-        if req_format in ('-1', 'all'):
+        if req_format == 'all':
              formats_to_download = formats
          else:
              for rfstr in req_format.split(','):
@@ -1113,6 +1175,55 @@ class YoutubeDL(object):
          info_dict.update(formats_to_download[-1])
          return info_dict
  
+    def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+        """Select the requested subtitles and their format"""
+        available_subs = {}
+        if normal_subtitles and self.params.get('writesubtitles'):
+            available_subs.update(normal_subtitles)
+        if automatic_captions and self.params.get('writeautomaticsub'):
+            for lang, cap_info in automatic_captions.items():
+                if lang not in available_subs:
+                    available_subs[lang] = cap_info
+
+        if (not self.params.get('writesubtitles') and not
+                self.params.get('writeautomaticsub') or not
+                available_subs):
+            return None
+
+        if self.params.get('allsubtitles', False):
+            requested_langs = available_subs.keys()
+        else:
+            if self.params.get('subtitleslangs', False):
+                requested_langs = self.params.get('subtitleslangs')
+            elif 'en' in available_subs:
+                requested_langs = ['en']
+            else:
+                requested_langs = [list(available_subs.keys())[0]]
+
+        formats_query = self.params.get('subtitlesformat', 'best')
+        formats_preference = formats_query.split('/') if formats_query else []
+        subs = {}
+        for lang in requested_langs:
+            formats = available_subs.get(lang)
+            if formats is None:
+                self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+                continue
+            for ext in formats_preference:
+                if ext == 'best':
+                    f = formats[-1]
+                    break
+                matches = list(filter(lambda f: f['ext'] == ext, formats))
+                if matches:
+                    f = matches[-1]
+                    break
+            else:
+                f = formats[-1]
+                self.report_warning(
+                    'No subtitle format found matching "%s" for language %s, '
+                    'using %s' % (formats_query, lang, f['ext']))
+            subs[lang] = f
+        return subs
+
      def process_info(self, info_dict):
          """Process a single resolved IE result."""
  
@@ -1127,13 +1238,10 @@ class YoutubeDL(object):
          if len(info_dict['title']) > 200:
              info_dict['title'] = info_dict['title'][:197] + '...'
  
-        # Keep for backwards compatibility
-        info_dict['stitle'] = info_dict['title']
-
          if 'format' not in info_dict:
              info_dict['format'] = info_dict['ext']
  
-        reason = self._match_entry(info_dict)
+        reason = self._match_entry(info_dict, incomplete=False)
          if reason is not None:
              self.to_screen('[download] ' + reason)
              return
@@ -1175,7 +1283,7 @@ class YoutubeDL(object):
              return
  
          try:
-            dn = os.path.dirname(encodeFilename(filename))
+            dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
              if dn and not os.path.exists(dn):
                  os.makedirs(dn)
          except (OSError, IOError) as err:
@@ -1183,7 +1291,7 @@ class YoutubeDL(object):
              return
  
          if self.params.get('writedescription', False):
-            descfn = filename + '.description'
+            descfn = replace_extension(filename, 'description', info_dict.get('ext'))
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
                  self.to_screen('[info] Video description is already present')
              elif info_dict.get('description') is None:
@@ -1198,7 +1306,7 @@ class YoutubeDL(object):
                      return
  
          if self.params.get('writeannotations', False):
-            annofn = filename + '.annotations.xml'
+            annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
                  self.to_screen('[info] Video annotations are already present')
              else:
@@ -1215,15 +1323,23 @@ class YoutubeDL(object):
          subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                         self.params.get('writeautomaticsub')])
  
-        if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+        if subtitles_are_requested and info_dict.get('requested_subtitles'):
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
-            subtitles = info_dict['subtitles']
-            sub_format = self.params.get('subtitlesformat', 'srt')
-            for sub_lang in subtitles.keys():
-                sub = subtitles[sub_lang]
-                if sub is None:
-                    continue
+            subtitles = info_dict['requested_subtitles']
+            ie = self.get_info_extractor(info_dict['extractor_key'])
+            for sub_lang, sub_info in subtitles.items():
+                sub_format = sub_info['ext']
+                if sub_info.get('data') is not None:
+                    sub_data = sub_info['data']
+                else:
+                    try:
+                        sub_data = ie._download_webpage(
+                            sub_info['url'], info_dict['id'], note=False)
+                    except ExtractorError as err:
+                        self.report_warning('Unable to download subtitle for "%s": %s' %
+                                            (sub_lang, compat_str(err.cause)))
+                        continue
                  try:
                      sub_filename = subtitles_filename(filename, sub_lang, sub_format)
                      if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
@@ -1231,19 +1347,19 @@ class YoutubeDL(object):
                      else:
                          self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
                          with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                            subfile.write(sub)
+                            subfile.write(sub_data)
                  except (OSError, IOError):
                      self.report_error('Cannot write subtitles file ' + sub_filename)
                      return
  
          if self.params.get('writeinfojson', False):
-            infofn = os.path.splitext(filename)[0] + '.info.json'
+            infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
                  self.to_screen('[info] Video description metadata is already present')
              else:
                  self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
                  try:
-                    write_json_file(info_dict, infofn)
+                    write_json_file(self.filter_requested_info(info_dict), infofn)
                  except (OSError, IOError):
                      self.report_error('Cannot write metadata to JSON file ' + infofn)
                      return
@@ -1263,24 +1379,57 @@ class YoutubeDL(object):
                  if info_dict.get('requested_formats') is not None:
                      downloaded = []
                      success = True
-                    merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
-                    if not merger._executable:
+                    merger = FFmpegMergerPP(self)
+                    if not merger.available:
                          postprocessors = []
                          self.report_warning('You have requested multiple '
                                              'formats but ffmpeg or avconv are not installed.'
-                                            ' The formats won\'t be merged')
+                                            ' The formats won\'t be merged.')
                      else:
                          postprocessors = [merger]
-                    for f in info_dict['requested_formats']:
-                        new_info = dict(info_dict)
-                        new_info.update(f)
-                        fname = self.prepare_filename(new_info)
-                        fname = prepend_extension(fname, 'f%s' % f['format_id'])
-                        downloaded.append(fname)
-                        partial_success = dl(fname, new_info)
-                        success = success and partial_success
-                    info_dict['__postprocessors'] = postprocessors
-                    info_dict['__files_to_merge'] = downloaded
+
+                    def compatible_formats(formats):
+                        video, audio = formats
+                        # Check extension
+                        video_ext, audio_ext = audio.get('ext'), video.get('ext')
+                        if video_ext and audio_ext:
+                            COMPATIBLE_EXTS = (
+                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
+                                ('webm')
+                            )
+                            for exts in COMPATIBLE_EXTS:
+                                if video_ext in exts and audio_ext in exts:
+                                    return True
+                        # TODO: Check acodec/vcodec
+                        return False
+
+                    filename_real_ext = os.path.splitext(filename)[1][1:]
+                    filename_wo_ext = (
+                        os.path.splitext(filename)[0]
+                        if filename_real_ext == info_dict['ext']
+                        else filename)
+                    requested_formats = info_dict['requested_formats']
+                    if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
+                        info_dict['ext'] = 'mkv'
+                        self.report_warning(
+                            'Requested formats are incompatible for merge and will be merged into mkv.')
+                    # Ensure filename always has a correct extension for successful merge
+                    filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+                    if os.path.exists(encodeFilename(filename)):
+                        self.to_screen(
+                            '[download] %s has already been downloaded and '
+                            'merged' % filename)
+                    else:
+                        for f in requested_formats:
+                            new_info = dict(info_dict)
+                            new_info.update(f)
+                            fname = self.prepare_filename(new_info)
+                            fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+                            downloaded.append(fname)
+                            partial_success = dl(fname, new_info)
+                            success = success and partial_success
+                        info_dict['__postprocessors'] = postprocessors
+                        info_dict['__files_to_merge'] = downloaded
                  else:
                      # Just a single file
                      success = dl(filename, info_dict)
@@ -1343,14 +1492,15 @@ class YoutubeDL(object):
          """Download a given list of URLs."""
          outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
          if (len(url_list) > 1 and
-                '%' not in outtmpl
-                and self.params.get('max_downloads') != 1):
+                '%' not in outtmpl and
+                self.params.get('max_downloads') != 1):
              raise SameFileError(outtmpl)
  
          for url in url_list:
              try:
                  # It also downloads the videos
-                res = self.extract_info(url)
+                res = self.extract_info(
+                    url, force_generic_extractor=self.params.get('force_generic_extractor', False))
              except UnavailableVideoError:
                  self.report_error('unable to download video')
              except MaxDownloadsReached:
@@ -1363,8 +1513,11 @@ class YoutubeDL(object):
          return self._download_retcode
  
      def download_with_info_file(self, info_filename):
-        with io.open(info_filename, 'r', encoding='utf-8') as f:
-            info = json.load(f)
+        with contextlib.closing(fileinput.FileInput(
+                [info_filename], mode='r',
+                openhook=fileinput.hook_encoded('utf-8'))) as f:
+            # FileInput doesn't have a read method, we can't call json.load
+            info = self.filter_requested_info(json.loads('\n'.join(f)))
          try:
              self.process_ie_result(info, download=True)
          except DownloadError:
@@ -1376,6 +1529,12 @@ class YoutubeDL(object):
                  raise
          return self._download_retcode
  
+    @staticmethod
+    def filter_requested_info(info_dict):
+        return dict(
+            (k, v) for k, v in info_dict.items()
+            if k not in ['requested_formats', 'requested_subtitles'])
+
      def post_process(self, filename, ie_info):
          """Run all the postprocessors on the given file."""
          info = dict(ie_info)
@@ -1385,24 +1544,18 @@ class YoutubeDL(object):
              pps_chain.extend(ie_info['__postprocessors'])
          pps_chain.extend(self._pps)
          for pp in pps_chain:
-            keep_video = None
-            old_filename = info['filepath']
+            files_to_delete = []
              try:
-                keep_video_wish, info = pp.run(info)
-                if keep_video_wish is not None:
-                    if keep_video_wish:
-                        keep_video = keep_video_wish
-                    elif keep_video is None:
-                        # No clear decision yet, let IE decide
-                        keep_video = keep_video_wish
+                files_to_delete, info = pp.run(info)
              except PostProcessingError as e:
                  self.report_error(e.msg)
-            if keep_video is False and not self.params.get('keepvideo', False):
-                try:
+            if files_to_delete and not self.params.get('keepvideo', False):
+                for old_filename in files_to_delete:
                      self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
-                    os.remove(encodeFilename(old_filename))
-                except (IOError, OSError):
-                    self.report_warning('Unable to remove downloaded video file')
+                    try:
+                        os.remove(encodeFilename(old_filename))
+                    except (IOError, OSError):
+                        self.report_warning('Unable to remove downloaded original file')
  
      def _make_archive_id(self, info_dict):
          # Future-proof against any change in case
@@ -1511,30 +1664,18 @@ class YoutubeDL(object):
          return res
  
      def list_formats(self, info_dict):
-        def line(format, idlen=20):
-            return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
-                format['format_id'],
-                format['ext'],
-                self.format_resolution(format),
-                self._format_note(format),
-            ))
-
          formats = info_dict.get('formats', [info_dict])
-        idlen = max(len('format code'),
-                    max(len(f['format_id']) for f in formats))
-        formats_s = [
-            line(f, idlen) for f in formats
+        table = [
+            [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
+            for f in formats
              if f.get('preference') is None or f['preference'] >= -1000]
          if len(formats) > 1:
-            formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
-            formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
+            table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
  
-        header_line = line({
-            'format_id': 'format code', 'ext': 'extension',
-            'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
+        header_line = ['format code', 'extension', 'resolution', 'note']
          self.to_screen(
-            '[info] Available formats for %s:\n%s\n%s' %
-            (info_dict['id'], header_line, '\n'.join(formats_s)))
+            '[info] Available formats for %s:\n%s' %
+            (info_dict['id'], render_table(header_line, table)))
  
      def list_thumbnails(self, info_dict):
          thumbnails = info_dict.get('thumbnails')
@@ -1553,6 +1694,17 @@ class YoutubeDL(object):
              ['ID', 'width', 'height', 'URL'],
              [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
  
+    def list_subtitles(self, video_id, subtitles, name='subtitles'):
+        if not subtitles:
+            self.to_screen('%s has no %s' % (video_id, name))
+            return
+        self.to_screen(
+            'Available %s for %s:' % (name, video_id))
+        self.to_screen(render_table(
+            ['Language', 'formats'],
+            [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+                for lang, formats in subtitles.items()]))
+
      def urlopen(self, req):
          """ Start an HTTP download """
  
@@ -1571,7 +1723,8 @@ class YoutubeDL(object):
              if req_is_string:
                  req = url_escaped
              else:
-                req = compat_urllib_request.Request(
+                req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
+                req = req_type(
                      url_escaped, data=req.data, headers=req.headers,
                      origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
  
@@ -1606,15 +1759,15 @@ class YoutubeDL(object):
              out = out.decode().strip()
              if re.match('[0-9a-f]+', out):
                  self._write_string('[debug] Git HEAD: ' + out + '\n')
-        except:
+        except Exception:
              try:
                  sys.exc_clear()
-            except:
+            except Exception:
                  pass
          self._write_string('[debug] Python version %s - %s\n' % (
              platform.python_version(), platform_name()))
  
-        exe_versions = FFmpegPostProcessor.get_versions()
+        exe_versions = FFmpegPostProcessor.get_versions(self)
          exe_versions['rtmpdump'] = rtmpdump_version()
          exe_str = ', '.join(
              '%s %s' % (exe, v)
@@ -1669,13 +1822,14 @@ class YoutubeDL(object):
              # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
              if 'http' in proxies and 'https' not in proxies:
                  proxies['https'] = proxies['http']
-        proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+        proxy_handler = PerRequestProxyHandler(proxies)
  
          debuglevel = 1 if self.params.get('debug_printtraffic') else 0
          https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
          ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
          opener = compat_urllib_request.build_opener(
-            https_handler, proxy_handler, cookie_processor, ydlh)
+            proxy_handler, https_handler, cookie_processor, ydlh)
+
          # Delete the default user-agent header, which would otherwise apply in
          # cases where our custom HTTP handler doesn't come into play
          # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
@@ -1716,7 +1870,7 @@ class YoutubeDL(object):
              thumb_ext = determine_ext(t['url'], 'jpg')
              suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
              thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
-            thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+            t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
  
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
                  self.to_screen('[%s] %s: Thumbnail %sis already present' %