debian/control: Update list of supported sites.

[youtubedl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 5036289b062f9ee05ab5c872c76ab38babcb4a54..a7bf5a1b06766094cc06e85fd40c6fa64c2cc64b 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1,10 +1,11 @@
  #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
  
  from __future__ import absolute_import, unicode_literals
  
  import collections
  import contextlib
+import copy
  import datetime
  import errno
  import fileinput
@@ -23,6 +24,7 @@ import sys
  import time
  import tokenize
  import traceback
+import random
  
  from .compat import (
      compat_basestring,
@@ -130,6 +132,9 @@ class YoutubeDL(object):
      username:          Username for authentication purposes.
      password:          Password for authentication purposes.
      videopassword:     Password for accessing a video.
+    ap_mso:            Adobe Pass multiple-system operator identifier.
+    ap_username:       Multiple-system operator account username.
+    ap_password:       Multiple-system operator account password.
      usenetrc:          Use netrc for authentication instead.
      verbose:           Print additional info to stdout.
      quiet:             Do not print messages to stdout.
@@ -155,6 +160,7 @@ class YoutubeDL(object):
      playlistend:       Playlist item to end at.
      playlist_items:    Specific indices of playlist to download.
      playlistreverse:   Download playlist items in reverse order.
+    playlistrandom:    Download playlist items in random order.
      matchtitle:        Download only matching titles.
      rejecttitle:       Reject downloads for matching titles.
      logger:            Log messages to a logging.Logger instance.
@@ -196,8 +202,8 @@ class YoutubeDL(object):
      prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
                         At the moment, this is only supported by YouTube.
      proxy:             URL of the proxy server to use
-    cn_verification_proxy:  URL of the proxy to use for IP address verification
-                       on Chinese sites. (Experimental)
+    geo_verification_proxy:  URL of the proxy to use for IP address verification
+                       on geo-restricted sites. (Experimental)
      socket_timeout:    Time to wait for unresponsive hosts, in seconds
      bidi_workaround:   Work around buggy terminals without bidirectional text
                         support, using fridibi
@@ -248,7 +254,16 @@ class YoutubeDL(object):
      source_address:    (Experimental) Client-side IP address to bind to.
      call_home:         Boolean, true iff we are allowed to contact the
                         youtube-dl servers for debugging.
-    sleep_interval:    Number of seconds to sleep before each download.
+    sleep_interval:    Number of seconds to sleep before each download when
+                       used alone or a lower bound of a range for randomized
+                       sleep before each download (minimum possible number
+                       of seconds to sleep) when used along with
+                       max_sleep_interval.
+    max_sleep_interval:Upper bound of a range for randomized sleep before each
+                       download (maximum possible number of seconds to sleep).
+                       Must only be used along with sleep_interval.
+                       Actual sleep time will be a random float from range
+                       [sleep_interval; max_sleep_interval].
      listformats:       Print an overview of available video formats and exit.
      list_thumbnails:   Print a table of all thumbnails and exit.
      match_filter:      A function that gets called with the info_dict of
@@ -304,6 +319,11 @@ class YoutubeDL(object):
          self.params.update(params)
          self.cache = Cache(self)
  
+        if self.params.get('cn_verification_proxy') is not None:
+            self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
+            if self.params.get('geo_verification_proxy') is None:
+                self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
+
          if params.get('bidi_workaround', False):
              try:
                  import pty
@@ -566,7 +586,7 @@ class YoutubeDL(object):
              if autonumber_size is None:
                  autonumber_size = 5
              autonumber_templ = '%0' + str(autonumber_size) + 'd'
-            template_dict['autonumber'] = autonumber_templ % self._num_downloads
+            template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
              if template_dict.get('playlist_index') is not None:
                  template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
              if template_dict.get('resolution') is None:
@@ -824,6 +844,9 @@ class YoutubeDL(object):
              if self.params.get('playlistreverse', False):
                  entries = entries[::-1]
  
+            if self.params.get('playlistrandom', False):
+                random.shuffle(entries)
+
              for i, entry in enumerate(entries, 1):
                  self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
                  extra = {
@@ -1046,9 +1069,9 @@ class YoutubeDL(object):
              if isinstance(selector, list):
                  fs = [_build_selector_function(s) for s in selector]
  
-                def selector_function(formats):
+                def selector_function(ctx):
                      for f in fs:
-                        for format in f(formats):
+                        for format in f(ctx):
                              yield format
                  return selector_function
              elif selector.type == GROUP:
@@ -1056,17 +1079,17 @@ class YoutubeDL(object):
              elif selector.type == PICKFIRST:
                  fs = [_build_selector_function(s) for s in selector.selector]
  
-                def selector_function(formats):
+                def selector_function(ctx):
                      for f in fs:
-                        picked_formats = list(f(formats))
+                        picked_formats = list(f(ctx))
                          if picked_formats:
                              return picked_formats
                      return []
              elif selector.type == SINGLE:
                  format_spec = selector.selector
  
-                def selector_function(formats):
-                    formats = list(formats)
+                def selector_function(ctx):
+                    formats = list(ctx['formats'])
                      if not formats:
                          return
                      if format_spec == 'all':
@@ -1079,9 +1102,10 @@ class YoutubeDL(object):
                              if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
                          if audiovideo_formats:
                              yield audiovideo_formats[format_idx]
-                        # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
-                        elif (all(f.get('acodec') != 'none' for f in formats) or
-                              all(f.get('vcodec') != 'none' for f in formats)):
+                        # for extractors with incomplete formats (audio only (soundcloud)
+                        # or video only (imgur)) we will fallback to best/worst
+                        # {video,audio}-only format
+                        elif ctx['incomplete_formats']:
                              yield formats[format_idx]
                      elif format_spec == 'bestaudio':
                          audio_formats = [
@@ -1155,17 +1179,18 @@ class YoutubeDL(object):
                      }
                  video_selector, audio_selector = map(_build_selector_function, selector.selector)
  
-                def selector_function(formats):
-                    formats = list(formats)
-                    for pair in itertools.product(video_selector(formats), audio_selector(formats)):
+                def selector_function(ctx):
+                    for pair in itertools.product(
+                            video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
                          yield _merge(pair)
  
              filters = [self._build_format_filter(f) for f in selector.filters]
  
-            def final_selector(formats):
+            def final_selector(ctx):
+                ctx_copy = copy.deepcopy(ctx)
                  for _filter in filters:
-                    formats = list(filter(_filter, formats))
-                return selector_function(formats)
+                    ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
+                return selector_function(ctx_copy)
              return final_selector
  
          stream = io.BytesIO(format_spec.encode('utf-8'))
@@ -1239,8 +1264,10 @@ class YoutubeDL(object):
                  info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
          if thumbnails:
              thumbnails.sort(key=lambda t: (
-                t.get('preference'), t.get('width'), t.get('height'),
-                t.get('id'), t.get('url')))
+                t.get('preference') if t.get('preference') is not None else -1,
+                t.get('width') if t.get('width') is not None else -1,
+                t.get('height') if t.get('height') is not None else -1,
+                t.get('id') if t.get('id') is not None else '', t.get('url')))
              for i, t in enumerate(thumbnails):
                  t['url'] = sanitize_url(t['url'])
                  if t.get('width') and t.get('height'):
@@ -1282,7 +1309,7 @@ class YoutubeDL(object):
                  for subtitle_format in subtitle:
                      if subtitle_format.get('url'):
                          subtitle_format['url'] = sanitize_url(subtitle_format['url'])
-                    if 'ext' not in subtitle_format:
+                    if subtitle_format.get('ext') is None:
                          subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
  
          if self.params.get('listsubtitles', False):
@@ -1317,7 +1344,7 @@ class YoutubeDL(object):
                  format['format_id'] = compat_str(i)
              else:
                  # Sanitize format_id from characters used in format selector expression
-                format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
+                format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
              format_id = format['format_id']
              if format_id not in formats_dict:
                  formats_dict[format_id] = []
@@ -1337,11 +1364,11 @@ class YoutubeDL(object):
                      note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
                  )
              # Automatically determine file extension if missing
-            if 'ext' not in format:
+            if format.get('ext') is None:
                  format['ext'] = determine_ext(format['url']).lower()
              # Automatically determine protocol if missing (useful for format
              # selection purposes)
-            if 'protocol' not in format:
+            if format.get('protocol') is None:
                  format['protocol'] = determine_protocol(format)
              # Add HTTP headers, so that external programs can use them from the
              # json output
@@ -1372,7 +1399,34 @@ class YoutubeDL(object):
              req_format_list.append('best')
              req_format = '/'.join(req_format_list)
          format_selector = self.build_format_selector(req_format)
-        formats_to_download = list(format_selector(formats))
+
+        # While in format selection we may need to have an access to the original
+        # format set in order to calculate some metrics or do some processing.
+        # For now we need to be able to guess whether original formats provided
+        # by extractor are incomplete or not (i.e. whether extractor provides only
+        # video-only or audio-only formats) for proper formats selection for
+        # extractors with such incomplete formats (see
+        # https://github.com/rg3/youtube-dl/pull/5556).
+        # Since formats may be filtered during format selection and may not match
+        # the original formats the results may be incorrect. Thus original formats
+        # or pre-calculated metrics should be passed to format selection routines
+        # as well.
+        # We will pass a context object containing all necessary additional data
+        # instead of just formats.
+        # This fixes incorrect format selection issue (see
+        # https://github.com/rg3/youtube-dl/issues/10083).
+        incomplete_formats = (
+            # All formats are video-only or
+            all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
+            # all formats are audio-only
+            all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
+
+        ctx = {
+            'formats': formats,
+            'incomplete_formats': incomplete_formats,
+        }
+
+        formats_to_download = list(format_selector(ctx))
          if not formats_to_download:
              raise ExtractorError('requested format not available',
                                   expected=True)
@@ -1559,7 +1613,9 @@ class YoutubeDL(object):
                          self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
                      else:
                          self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
-                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+                        # Use newline='' to prevent conversion of newline characters
+                        # See https://github.com/rg3/youtube-dl/issues/10268
+                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
                              subfile.write(sub_data)
                  except (OSError, IOError):
                      self.report_error('Cannot write subtitles file ' + sub_filename)
@@ -1607,7 +1663,7 @@ class YoutubeDL(object):
                          video_ext, audio_ext = audio.get('ext'), video.get('ext')
                          if video_ext and audio_ext:
                              COMPATIBLE_EXTS = (
-                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
+                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
                                  ('webm')
                              )
                              for exts in COMPATIBLE_EXTS: