Prepare to upload

[youtubedl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 0a7f36c98a02e8401a966c95e6b8a780e8566e98..4493fd0e1aabc0452041a68262772b9b5d223d79 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -65,6 +65,7 @@ from .utils import (
      locked_file,
      make_HTTPS_handler,
      MaxDownloadsReached,
+    orderedSet,
      PagedList,
      parse_filesize,
      PerRequestProxyHandler,
@@ -87,6 +88,7 @@ from .utils import (
      version_tuple,
      write_json_file,
      write_string,
+    YoutubeDLCookieJar,
      YoutubeDLCookieProcessor,
      YoutubeDLHandler,
  )
@@ -210,7 +212,7 @@ class YoutubeDL(object):
                         At the moment, this is only supported by YouTube.
      proxy:             URL of the proxy server to use
      geo_verification_proxy:  URL of the proxy to use for IP address verification
-                       on geo-restricted sites. (Experimental)
+                       on geo-restricted sites.
      socket_timeout:    Time to wait for unresponsive hosts, in seconds
      bidi_workaround:   Work around buggy terminals without bidirectional text
                         support, using fridibi
@@ -258,7 +260,7 @@ class YoutubeDL(object):
                         - "warn": only emit a warning
                         - "detect_or_warn": check whether we can do anything
                                             about it, warn otherwise (default)
-    source_address:    (Experimental) Client-side IP address to bind to.
+    source_address:    Client-side IP address to bind to.
      call_home:         Boolean, true iff we are allowed to contact the
                         youtube-dl servers for debugging.
      sleep_interval:    Number of seconds to sleep before each download when
@@ -280,11 +282,14 @@ class YoutubeDL(object):
                         match_filter_func in utils.py is one example for this.
      no_color:          Do not emit color codes in output.
      geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
-                       HTTP header (experimental)
+                       HTTP header
      geo_bypass_country:
                         Two-letter ISO 3166-2 country code that will be used for
                         explicit geographic restriction bypassing via faking
-                       X-Forwarded-For HTTP header (experimental)
+                       X-Forwarded-For HTTP header
+    geo_bypass_ip_block:
+                       IP range in CIDR notation that will be used similarly to
+                       geo_bypass_country
  
      The following options determine which downloader is picked:
      external_downloader: Executable of the external downloader to call.
@@ -297,13 +302,20 @@ class YoutubeDL(object):
      the downloader (see youtube_dl/downloader/common.py):
      nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
      noresizebuffer, retries, continuedl, noprogress, consoletitle,
-    xattr_set_filesize, external_downloader_args, hls_use_mpegts.
+    xattr_set_filesize, external_downloader_args, hls_use_mpegts,
+    http_chunk_size.
  
      The following options are used by the post processors:
-    prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
-                       otherwise prefer avconv.
+    prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
+                       otherwise prefer ffmpeg.
      postprocessor_args: A list of additional command-line arguments for the
                          postprocessor.
+
+    The following options are used by the Youtube extractor:
+    youtube_include_dash_manifest: If True (default), DASH manifests and related
+                        data will be downloaded and processed by extractor.
+                        You can reduce network I/O by disabling it if you don't
+                        care about DASH.
      """
  
      _NUMERIC_FIELDS = set((
@@ -524,6 +536,8 @@ class YoutubeDL(object):
      def save_console_title(self):
          if not self.params.get('consoletitle', False):
              return
+        if self.params.get('simulate', False):
+            return
          if compat_os_name != 'nt' and 'TERM' in os.environ:
              # Save the title on stack
              self._write_string('\033[22;0t', self._screen_file)
@@ -531,6 +545,8 @@ class YoutubeDL(object):
      def restore_console_title(self):
          if not self.params.get('consoletitle', False):
              return
+        if self.params.get('simulate', False):
+            return
          if compat_os_name != 'nt' and 'TERM' in os.environ:
              # Restore the title from stack
              self._write_string('\033[23;0t', self._screen_file)
@@ -543,7 +559,7 @@ class YoutubeDL(object):
          self.restore_console_title()
  
          if self.params.get('cookiefile') is not None:
-            self.cookiejar.save()
+            self.cookiejar.save(ignore_discard=True, ignore_expires=True)
  
      def trouble(self, message=None, tb=None):
          """Determine action to take when a download problem appears.
@@ -902,15 +918,25 @@ class YoutubeDL(object):
                                  yield int(item)
                          else:
                              yield int(string_segment)
-                playlistitems = iter_playlistitems(playlistitems_str)
+                playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
  
              ie_entries = ie_result['entries']
+
+            def make_playlistitems_entries(list_ie_entries):
+                num_entries = len(list_ie_entries)
+                return [
+                    list_ie_entries[i - 1] for i in playlistitems
+                    if -num_entries <= i - 1 < num_entries]
+
+            def report_download(num_entries):
+                self.to_screen(
+                    '[%s] playlist %s: Downloading %d videos' %
+                    (ie_result['extractor'], playlist, num_entries))
+
              if isinstance(ie_entries, list):
                  n_all_entries = len(ie_entries)
                  if playlistitems:
-                    entries = [
-                        ie_entries[i - 1] for i in playlistitems
-                        if -n_all_entries <= i - 1 < n_all_entries]
+                    entries = make_playlistitems_entries(ie_entries)
                  else:
                      entries = ie_entries[playliststart:playlistend]
                  n_entries = len(entries)
@@ -928,20 +954,16 @@ class YoutubeDL(object):
                      entries = ie_entries.getslice(
                          playliststart, playlistend)
                  n_entries = len(entries)
-                self.to_screen(
-                    '[%s] playlist %s: Downloading %d videos' %
-                    (ie_result['extractor'], playlist, n_entries))
+                report_download(n_entries)
              else:  # iterable
                  if playlistitems:
-                    entry_list = list(ie_entries)
-                    entries = [entry_list[i - 1] for i in playlistitems]
+                    entries = make_playlistitems_entries(list(itertools.islice(
+                        ie_entries, 0, max(playlistitems))))
                  else:
                      entries = list(itertools.islice(
                          ie_entries, playliststart, playlistend))
                  n_entries = len(entries)
-                self.to_screen(
-                    '[%s] playlist %s: Downloading %d videos' %
-                    (ie_result['extractor'], playlist, n_entries))
+                report_download(n_entries)
  
              if self.params.get('playlistreverse', False):
                  entries = entries[::-1]
@@ -962,6 +984,8 @@ class YoutubeDL(object):
                      'playlist': playlist,
                      'playlist_id': ie_result.get('id'),
                      'playlist_title': ie_result.get('title'),
+                    'playlist_uploader': ie_result.get('uploader'),
+                    'playlist_uploader_id': ie_result.get('uploader_id'),
                      'playlist_index': i + playliststart,
                      'extractor': ie_result['extractor'],
                      'webpage_url': ie_result['webpage_url'],
@@ -1017,7 +1041,7 @@ class YoutubeDL(object):
              '!=': operator.ne,
          }
          operator_rex = re.compile(r'''(?x)\s*
-            (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
+            (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
              \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
              (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
              $
@@ -1066,22 +1090,27 @@ class YoutubeDL(object):
          return _filter
  
      def _default_format_spec(self, info_dict, download=True):
-        req_format_list = []
  
-        def can_have_partial_formats():
+        def can_merge():
+            merger = FFmpegMergerPP(self)
+            return merger.available and merger.can_merge()
+
+        def prefer_best():
              if self.params.get('simulate', False):
-                return True
+                return False
              if not download:
-                return True
-            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
                  return False
+            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+                return True
              if info_dict.get('is_live'):
-                return False
-            merger = FFmpegMergerPP(self)
-            return merger.available and merger.can_merge()
-        if can_have_partial_formats():
-            req_format_list.append('bestvideo+bestaudio')
-        req_format_list.append('best')
+                return True
+            if not can_merge():
+                return True
+            return False
+
+        req_format_list = ['bestvideo+bestaudio', 'best']
+        if prefer_best():
+            req_format_list.reverse()
          return '/'.join(req_format_list)
  
      def build_format_selector(self, format_spec):
@@ -1454,23 +1483,28 @@ class YoutubeDL(object):
              if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
                  info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
  
+        for cc_kind in ('subtitles', 'automatic_captions'):
+            cc = info_dict.get(cc_kind)
+            if cc:
+                for _, subtitle in cc.items():
+                    for subtitle_format in subtitle:
+                        if subtitle_format.get('url'):
+                            subtitle_format['url'] = sanitize_url(subtitle_format['url'])
+                        if subtitle_format.get('ext') is None:
+                            subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+
+        automatic_captions = info_dict.get('automatic_captions')
          subtitles = info_dict.get('subtitles')
-        if subtitles:
-            for _, subtitle in subtitles.items():
-                for subtitle_format in subtitle:
-                    if subtitle_format.get('url'):
-                        subtitle_format['url'] = sanitize_url(subtitle_format['url'])
-                    if subtitle_format.get('ext') is None:
-                        subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
  
          if self.params.get('listsubtitles', False):
              if 'automatic_captions' in info_dict:
-                self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+                self.list_subtitles(
+                    info_dict['id'], automatic_captions, 'automatic captions')
              self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
              return
+
          info_dict['requested_subtitles'] = self.process_subtitles(
-            info_dict['id'], subtitles,
-            info_dict.get('automatic_captions'))
+            info_dict['id'], subtitles, automatic_captions)
  
          # We now pick which formats have to be downloaded
          if info_dict.get('formats') is None:
@@ -1828,7 +1862,7 @@ class YoutubeDL(object):
                      def compatible_formats(formats):
                          video, audio = formats
                          # Check extension
-                        video_ext, audio_ext = audio.get('ext'), video.get('ext')
+                        video_ext, audio_ext = video.get('ext'), audio.get('ext')
                          if video_ext and audio_ext:
                              COMPATIBLE_EXTS = (
                                  ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
@@ -2213,8 +2247,16 @@ class YoutubeDL(object):
                  sys.exc_clear()
              except Exception:
                  pass
-        self._write_string('[debug] Python version %s - %s\n' % (
-            platform.python_version(), platform_name()))
+
+        def python_implementation():
+            impl_name = platform.python_implementation()
+            if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+                return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
+            return impl_name
+
+        self._write_string('[debug] Python version %s (%s) - %s\n' % (
+            platform.python_version(), python_implementation(),
+            platform_name()))
  
          exe_versions = FFmpegPostProcessor.get_versions(self)
          exe_versions['rtmpdump'] = rtmpdump_version()
@@ -2256,10 +2298,9 @@ class YoutubeDL(object):
              self.cookiejar = compat_cookiejar.CookieJar()
          else:
              opts_cookiefile = expand_path(opts_cookiefile)
-            self.cookiejar = compat_cookiejar.MozillaCookieJar(
-                opts_cookiefile)
+            self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
              if os.access(opts_cookiefile, os.R_OK):
-                self.cookiejar.load()
+                self.cookiejar.load(ignore_discard=True, ignore_expires=True)
  
          cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
          if opts_proxy is not None: