debian/changelog: Start new release.

[youtubedl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

old mode 100644 (file)

new mode 100755 (executable)

index d40314e..3dff723
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -4,9 +4,11 @@
  from __future__ import absolute_import, unicode_literals
  
  import collections
+import datetime
  import errno
  import io
  import json
+import locale
  import os
  import platform
  import re
@@ -29,6 +31,7 @@ from .utils import (
      ContentTooShortError,
      date_from_str,
      DateRange,
+    DEFAULT_OUTTMPL,
      determine_ext,
      DownloadError,
      encodeFilename,
@@ -39,6 +42,7 @@ from .utils import (
      locked_file,
      make_HTTPS_handler,
      MaxDownloadsReached,
+    PagedList,
      PostProcessingError,
      platform_name,
      preferredencoding,
@@ -92,6 +96,7 @@ class YoutubeDL(object):
      usenetrc:          Use netrc for authentication instead.
      verbose:           Print additional info to stdout.
      quiet:             Do not print messages to stdout.
+    no_warnings:       Do not print out anything for warnings.
      forceurl:          Force printing final URL.
      forcetitle:        Force printing title.
      forceid:           Force printing ID.
@@ -146,11 +151,17 @@ class YoutubeDL(object):
                         again.
      cookiefile:        File name where cookies should be read from and dumped to.
      nocheckcertificate:Do not verify SSL certificates
+    prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
+                       At the moment, this is only supported by YouTube.
      proxy:             URL of the proxy server to use
      socket_timeout:    Time to wait for unresponsive hosts, in seconds
      bidi_workaround:   Work around buggy terminals without bidirectional text
                         support, using fridibi
      debug_printtraffic:Print out sent and received HTTP traffic
+    include_ads:       Download ads as well
+    default_search:    Prepend this string if an input url is not valid.
+                       'auto' for elaborate guessing
+    encoding:          Use this encoding instead of the system-specified.
  
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
@@ -276,6 +287,9 @@ class YoutubeDL(object):
          """Print message to stdout if not in quiet mode."""
          return self.to_stdout(message, skip_eol, check_quiet=True)
  
+    def _write_string(self, s, out=None):
+        write_string(s, out=out, encoding=self.params.get('encoding'))
+
      def to_stdout(self, message, skip_eol=False, check_quiet=False):
          """Print message to stdout if not in quiet mode."""
          if self.params.get('logger'):
@@ -285,7 +299,7 @@ class YoutubeDL(object):
              terminator = ['\n', ''][skip_eol]
              output = message + terminator
  
-            write_string(output, self._screen_file)
+            self._write_string(output, self._screen_file)
  
      def to_stderr(self, message):
          """Print message to stderr."""
@@ -295,7 +309,7 @@ class YoutubeDL(object):
          else:
              message = self._bidi_workaround(message)
              output = message + '\n'
-            write_string(output, self._err_file)
+            self._write_string(output, self._err_file)
  
      def to_console_title(self, message):
          if not self.params.get('consoletitle', False):
@@ -305,21 +319,21 @@ class YoutubeDL(object):
              # already of type unicode()
              ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
          elif 'TERM' in os.environ:
-            write_string('\033]0;%s\007' % message, self._screen_file)
+            self._write_string('\033]0;%s\007' % message, self._screen_file)
  
      def save_console_title(self):
          if not self.params.get('consoletitle', False):
              return
          if 'TERM' in os.environ:
              # Save the title on stack
-            write_string('\033[22;0t', self._screen_file)
+            self._write_string('\033[22;0t', self._screen_file)
  
      def restore_console_title(self):
          if not self.params.get('consoletitle', False):
              return
          if 'TERM' in os.environ:
              # Restore the title from stack
-            write_string('\033[23;0t', self._screen_file)
+            self._write_string('\033[23;0t', self._screen_file)
  
      def __enter__(self):
          self.save_console_title()
@@ -327,7 +341,7 @@ class YoutubeDL(object):
  
      def __exit__(self, *args):
          self.restore_console_title()
-    
+
          if self.params.get('cookiefile') is not None:
              self.cookiejar.save()
  
@@ -366,12 +380,17 @@ class YoutubeDL(object):
          Print the message to stderr, it will be prefixed with 'WARNING:'
          If stderr is a tty file the 'WARNING:' will be colored
          '''
-        if self._err_file.isatty() and os.name != 'nt':
-            _msg_header = '\033[0;33mWARNING:\033[0m'
+        if self.params.get('logger') is not None:
+            self.params['logger'].warning(message)
          else:
-            _msg_header = 'WARNING:'
-        warning_message = '%s %s' % (_msg_header, message)
-        self.to_stderr(warning_message)
+            if self.params.get('no_warnings'):
+                return
+            if self._err_file.isatty() and os.name != 'nt':
+                _msg_header = '\033[0;33mWARNING:\033[0m'
+            else:
+                _msg_header = 'WARNING:'
+            warning_message = '%s %s' % (_msg_header, message)
+            self.to_stderr(warning_message)
  
      def report_error(self, message, tb=None):
          '''
@@ -392,10 +411,6 @@ class YoutubeDL(object):
          except UnicodeEncodeError:
              self.to_screen('[download] The file has already been downloaded')
  
-    def increment_downloads(self):
-        """Increment the ordinal that assigns a number to each file."""
-        self._num_downloads += 1
-
      def prepare_filename(self, info_dict):
          """Generate the output filename."""
          try:
@@ -409,6 +424,13 @@ class YoutubeDL(object):
              template_dict['autonumber'] = autonumber_templ % self._num_downloads
              if template_dict.get('playlist_index') is not None:
                  template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
+            if template_dict.get('resolution') is None:
+                if template_dict.get('width') and template_dict.get('height'):
+                    template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
+                elif template_dict.get('height'):
+                    template_dict['resolution'] = '%sp' % template_dict['height']
+                elif template_dict.get('width'):
+                    template_dict['resolution'] = '?x%d' % template_dict['width']
  
              sanitize = lambda k, v: sanitize_filename(
                  compat_str(v),
@@ -419,7 +441,8 @@ class YoutubeDL(object):
                                   if v is not None)
              template_dict = collections.defaultdict(lambda: 'NA', template_dict)
  
-            tmpl = os.path.expanduser(self.params['outtmpl'])
+            outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+            tmpl = os.path.expanduser(outtmpl)
              filename = tmpl % template_dict
              return filename
          except ValueError as err:
@@ -499,13 +522,7 @@ class YoutubeDL(object):
                          '_type': 'compat_list',
                          'entries': ie_result,
                      }
-                self.add_extra_info(ie_result,
-                    {
-                        'extractor': ie.IE_NAME,
-                        'webpage_url': url,
-                        'webpage_url_basename': url_basename(url),
-                        'extractor_key': ie.ie_key(),
-                    })
+                self.add_default_extra_info(ie_result, ie, url)
                  if process:
                      return self.process_ie_result(ie_result, download, extra_info)
                  else:
@@ -513,6 +530,8 @@ class YoutubeDL(object):
              except ExtractorError as de: # An error we somewhat expected
                  self.report_error(compat_str(de), de.format_traceback())
                  break
+            except MaxDownloadsReached:
+                raise
              except Exception as e:
                  if self.params.get('ignoreerrors', False):
                      self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
@@ -520,7 +539,15 @@ class YoutubeDL(object):
                  else:
                      raise
          else:
-            self.report_error('no suitable InfoExtractor: %s' % url)
+            self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+    def add_default_extra_info(self, ie_result, ie, url):
+        self.add_extra_info(ie_result, {
+            'extractor': ie.IE_NAME,
+            'webpage_url': url,
+            'webpage_url_basename': url_basename(url),
+            'extractor_key': ie.ie_key(),
+        })
  
      def process_ie_result(self, ie_result, download=True, extra_info={}):
          """
@@ -575,19 +602,27 @@ class YoutubeDL(object):
  
              playlist_results = []
  
-            n_all_entries = len(ie_result['entries'])
              playliststart = self.params.get('playliststart', 1) - 1
              playlistend = self.params.get('playlistend', None)
              # For backwards compatibility, interpret -1 as whole list
              if playlistend == -1:
                  playlistend = None
  
-            entries = ie_result['entries'][playliststart:playlistend]
-            n_entries = len(entries)
-
-            self.to_screen(
-                "[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
-                (ie_result['extractor'], playlist, n_all_entries, n_entries))
+            if isinstance(ie_result['entries'], list):
+                n_all_entries = len(ie_result['entries'])
+                entries = ie_result['entries'][playliststart:playlistend]
+                n_entries = len(entries)
+                self.to_screen(
+                    "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
+                    (ie_result['extractor'], playlist, n_all_entries, n_entries))
+            else:
+                assert isinstance(ie_result['entries'], PagedList)
+                entries = ie_result['entries'].getslice(
+                    playliststart, playlistend)
+                n_entries = len(entries)
+                self.to_screen(
+                    "[%s] playlist %s: Downloading %d videos" %
+                    (ie_result['extractor'], playlist, n_entries))
  
              for i, entry in enumerate(entries, 1):
                  self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
@@ -634,6 +669,30 @@ class YoutubeDL(object):
              return available_formats[-1]
          elif format_spec == 'worst':
              return available_formats[0]
+        elif format_spec == 'bestaudio':
+            audio_formats = [
+                f for f in available_formats
+                if f.get('vcodec') == 'none']
+            if audio_formats:
+                return audio_formats[-1]
+        elif format_spec == 'worstaudio':
+            audio_formats = [
+                f for f in available_formats
+                if f.get('vcodec') == 'none']
+            if audio_formats:
+                return audio_formats[0]
+        elif format_spec == 'bestvideo':
+            video_formats = [
+                f for f in available_formats
+                if f.get('acodec') == 'none']
+            if video_formats:
+                return video_formats[-1]
+        elif format_spec == 'worstvideo':
+            video_formats = [
+                f for f in available_formats
+                if f.get('acodec') == 'none']
+            if video_formats:
+                return video_formats[0]
          else:
              extensions = ['mp4', 'flv', 'webm', '3gp']
              if format_spec in extensions:
@@ -648,11 +707,35 @@ class YoutubeDL(object):
      def process_video_result(self, info_dict, download=True):
          assert info_dict.get('_type', 'video') == 'video'
  
+        if 'id' not in info_dict:
+            raise ExtractorError('Missing "id" field in extractor result')
+        if 'title' not in info_dict:
+            raise ExtractorError('Missing "title" field in extractor result')
+
          if 'playlist' not in info_dict:
              # It isn't part of a playlist
              info_dict['playlist'] = None
              info_dict['playlist_index'] = None
  
+        thumbnails = info_dict.get('thumbnails')
+        if thumbnails:
+            thumbnails.sort(key=lambda t: (
+                t.get('width'), t.get('height'), t.get('url')))
+            for t in thumbnails:
+                if 'width' in t and 'height' in t:
+                    t['resolution'] = '%dx%d' % (t['width'], t['height'])
+
+        if thumbnails and 'thumbnail' not in info_dict:
+            info_dict['thumbnail'] = thumbnails[-1]['url']
+
+        if 'display_id' not in info_dict and 'id' in info_dict:
+            info_dict['display_id'] = info_dict['id']
+
+        if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
+            upload_date = datetime.datetime.utcfromtimestamp(
+                info_dict['timestamp'])
+            info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+
          # This extractors handle format selection themselves
          if info_dict['extractor'] in ['Youku']:
              if download:
@@ -666,8 +749,14 @@ class YoutubeDL(object):
          else:
              formats = info_dict['formats']
  
+        if not formats:
+            raise ExtractorError('No video formats found!')
+
          # We check that all the formats have the format and format_id fields
-        for (i, format) in enumerate(formats):
+        for i, format in enumerate(formats):
+            if 'url' not in format:
+                raise ExtractorError('Missing "url" key in result (index %d)' % i)
+
              if format.get('format_id') is None:
                  format['format_id'] = compat_str(i)
              if format.get('format') is None:
@@ -678,7 +767,7 @@ class YoutubeDL(object):
                  )
              # Automatically determine file extension if missing
              if 'ext' not in format:
-                format['ext'] = determine_ext(format['url'])
+                format['ext'] = determine_ext(format['url']).lower()
  
          format_limit = self.params.get('format_limit', None)
          if format_limit:
@@ -688,17 +777,17 @@ class YoutubeDL(object):
  
          # TODO Central sorting goes here
  
-        if formats[0] is not info_dict: 
+        if formats[0] is not info_dict:
              # only set the 'formats' fields if the original info_dict list them
              # otherwise we end up with a circular reference, the first (and unique)
-            # element in the 'formats' field in info_dict is info_dict itself, 
+            # element in the 'formats' field in info_dict is info_dict itself,
              # wich can't be exported to json
              info_dict['formats'] = formats
          if self.params.get('listformats', None):
              self.list_formats(info_dict)
              return
  
-        req_format = self.params.get('format', 'best')
+        req_format = self.params.get('format')
          if req_format is None:
              req_format = 'best'
          formats_to_download = []
@@ -747,8 +836,11 @@ class YoutubeDL(object):
          """Process a single resolved IE result."""
  
          assert info_dict.get('_type', 'video') == 'video'
-        #We increment the download the download count here to match the previous behaviour.
-        self.increment_downloads()
+
+        max_downloads = self.params.get('max_downloads')
+        if max_downloads is not None:
+            if self._num_downloads >= int(max_downloads):
+                raise MaxDownloadsReached()
  
          info_dict['fulltitle'] = info_dict['title']
          if len(info_dict['title']) > 200:
@@ -765,10 +857,7 @@ class YoutubeDL(object):
              self.to_screen('[download] ' + reason)
              return
  
-        max_downloads = self.params.get('max_downloads')
-        if max_downloads is not None:
-            if self._num_downloads > int(max_downloads):
-                raise MaxDownloadsReached()
+        self._num_downloads += 1
  
          filename = self.prepare_filename(info_dict)
  
@@ -803,7 +892,7 @@ class YoutubeDL(object):
  
          try:
              dn = os.path.dirname(encodeFilename(filename))
-            if dn != '' and not os.path.exists(dn):
+            if dn and not os.path.exists(dn):
                  os.makedirs(dn)
          except (OSError, IOError) as err:
              self.report_error('unable to create directory ' + compat_str(err))
@@ -860,7 +949,7 @@ class YoutubeDL(object):
                          with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
                                  subfile.write(sub)
                  except (OSError, IOError):
-                    self.report_error('Cannot write subtitles file ' + descfn)
+                    self.report_error('Cannot write subtitles file ' + sub_filename)
                      return
  
          if self.params.get('writeinfojson', False):
@@ -886,7 +975,7 @@ class YoutubeDL(object):
                      self.to_screen('[%s] %s: Downloading thumbnail ...' %
                                     (info_dict['extractor'], info_dict['id']))
                      try:
-                        uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
+                        uf = self.urlopen(info_dict['thumbnail'])
                          with open(thumb_filename, 'wb') as thumbf:
                              shutil.copyfileobj(uf, thumbf)
                          self.to_screen('[%s] %s: Writing thumbnail to: %s' %
@@ -904,10 +993,20 @@ class YoutubeDL(object):
                          fd = get_suitable_downloader(info)(self, self.params)
                          for ph in self._progress_hooks:
                              fd.add_progress_hook(ph)
+                        if self.params.get('verbose'):
+                            self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
                          return fd.download(name, info)
                      if info_dict.get('requested_formats') is not None:
                          downloaded = []
                          success = True
+                        merger = FFmpegMergerPP(self)
+                        if not merger._get_executable():
+                            postprocessors = []
+                            self.report_warning('You have requested multiple '
+                                'formats but ffmpeg or avconv are not installed.'
+                                ' The formats won\'t be merged')
+                        else:
+                            postprocessors = [merger]
                          for f in info_dict['requested_formats']:
                              new_info = dict(info_dict)
                              new_info.update(f)
@@ -916,7 +1015,7 @@ class YoutubeDL(object):
                              downloaded.append(fname)
                              partial_success = dl(fname, new_info)
                              success = success and partial_success
-                        info_dict['__postprocessors'] = [FFmpegMergerPP(self)]
+                        info_dict['__postprocessors'] = postprocessors
                          info_dict['__files_to_merge'] = downloaded
                      else:
                          # Just a single file
@@ -941,10 +1040,11 @@ class YoutubeDL(object):
  
      def download(self, url_list):
          """Download a given list of URLs."""
+        outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
          if (len(url_list) > 1 and
-                '%' not in self.params['outtmpl']
+                '%' not in outtmpl
                  and self.params.get('max_downloads') != 1):
-            raise SameFileError(self.params['outtmpl'])
+            raise SameFileError(outtmpl)
  
          for url in url_list:
              try:
@@ -1055,46 +1155,57 @@ class YoutubeDL(object):
              res = default
          return res
  
-    def list_formats(self, info_dict):
-        def format_note(fdict):
-            res = ''
-            if fdict.get('ext') in ['f4f', 'f4m']:
-                res += '(unsupported) '
-            if fdict.get('format_note') is not None:
-                res += fdict['format_note'] + ' '
-            if fdict.get('tbr') is not None:
-                res += '%4dk ' % fdict['tbr']
-            if (fdict.get('vcodec') is not None and
-                    fdict.get('vcodec') != 'none'):
-                res += '%-5s' % fdict['vcodec']
-                if fdict.get('vbr') is not None:
-                    res += '@'
-            elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
-                res += 'video@'
+    def _format_note(self, fdict):
+        res = ''
+        if fdict.get('ext') in ['f4f', 'f4m']:
+            res += '(unsupported) '
+        if fdict.get('format_note') is not None:
+            res += fdict['format_note'] + ' '
+        if fdict.get('tbr') is not None:
+            res += '%4dk ' % fdict['tbr']
+        if fdict.get('container') is not None:
+            if res:
+                res += ', '
+            res += '%s container' % fdict['container']
+        if (fdict.get('vcodec') is not None and
+                fdict.get('vcodec') != 'none'):
+            if res:
+                res += ', '
+            res += fdict['vcodec']
              if fdict.get('vbr') is not None:
-                res += '%4dk' % fdict['vbr']
-            if fdict.get('acodec') is not None:
-                if res:
-                    res += ', '
+                res += '@'
+        elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
+            res += 'video@'
+        if fdict.get('vbr') is not None:
+            res += '%4dk' % fdict['vbr']
+        if fdict.get('acodec') is not None:
+            if res:
+                res += ', '
+            if fdict['acodec'] == 'none':
+                res += 'video only'
+            else:
                  res += '%-5s' % fdict['acodec']
-            elif fdict.get('abr') is not None:
-                if res:
-                    res += ', '
-                res += 'audio'
-            if fdict.get('abr') is not None:
-                res += '@%3dk' % fdict['abr']
-            if fdict.get('filesize') is not None:
-                if res:
-                    res += ', '
-                res += format_bytes(fdict['filesize'])
-            return res
+        elif fdict.get('abr') is not None:
+            if res:
+                res += ', '
+            res += 'audio'
+        if fdict.get('abr') is not None:
+            res += '@%3dk' % fdict['abr']
+        if fdict.get('asr') is not None:
+            res += ' (%5dHz)' % fdict['asr']
+        if fdict.get('filesize') is not None:
+            if res:
+                res += ', '
+            res += format_bytes(fdict['filesize'])
+        return res
  
+    def list_formats(self, info_dict):
          def line(format, idlen=20):
              return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
                  format['format_id'],
                  format['ext'],
                  self.format_resolution(format),
-                format_note(format),
+                self._format_note(format),
              ))
  
          formats = info_dict.get('formats', [info_dict])
@@ -1102,8 +1213,8 @@ class YoutubeDL(object):
                      max(len(f['format_id']) for f in formats))
          formats_s = [line(f, idlen) for f in formats]
          if len(formats) > 1:
-            formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
-            formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
+            formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
+            formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
  
          header_line = line({
              'format_id': 'format code', 'ext': 'extension',
@@ -1113,12 +1224,22 @@ class YoutubeDL(object):
  
      def urlopen(self, req):
          """ Start an HTTP download """
-        return self._opener.open(req)
+        return self._opener.open(req, timeout=self._socket_timeout)
  
      def print_debug_header(self):
          if not self.params.get('verbose'):
              return
-        write_string('[debug] youtube-dl version ' + __version__ + '\n')
+
+        write_string(
+            '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
+                locale.getpreferredencoding(),
+                sys.getfilesystemencoding(),
+                sys.stdout.encoding,
+                self.get_encoding()),
+            encoding=None
+        )
+
+        self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
          try:
              sp = subprocess.Popen(
                  ['git', 'rev-parse', '--short', 'HEAD'],
@@ -1127,24 +1248,24 @@ class YoutubeDL(object):
              out, err = sp.communicate()
              out = out.decode().strip()
              if re.match('[0-9a-f]+', out):
-                write_string('[debug] Git HEAD: ' + out + '\n')
+                self._write_string('[debug] Git HEAD: ' + out + '\n')
          except:
              try:
                  sys.exc_clear()
              except:
                  pass
-        write_string('[debug] Python version %s - %s' %
+        self._write_string('[debug] Python version %s - %s' %
                       (platform.python_version(), platform_name()) + '\n')
  
          proxy_map = {}
          for handler in self._opener.handlers:
              if hasattr(handler, 'proxies'):
                  proxy_map.update(handler.proxies)
-        write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+        self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
  
      def _setup_opener(self):
          timeout_val = self.params.get('socket_timeout')
-        timeout = 600 if timeout_val is None else float(timeout_val)
+        self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
  
          opts_cookiefile = self.params.get('cookiefile')
          opts_proxy = self.params.get('proxy')
@@ -1183,6 +1304,18 @@ class YoutubeDL(object):
          opener.addheaders = []
          self._opener = opener
  
-        # TODO remove this global modification
-        compat_urllib_request.install_opener(opener)
-        socket.setdefaulttimeout(timeout)
+    def encode(self, s):
+        if isinstance(s, bytes):
+            return s  # Already encoded
+
+        try:
+            return s.encode(self.get_encoding())
+        except UnicodeEncodeError as err:
+            err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
+            raise
+
+    def get_encoding(self):
+        encoding = self.params.get('encoding')
+        if encoding is None:
+            encoding = preferredencoding()
+        return encoding