debian/patches: Drop patch included in version 2017.02.07.

[youtubedl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index b3b687a314681de17547104f7f99e883a312c322..67a847ebad8238fc4f368f46b336b80e6caa3673 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,5 +1,5 @@
  #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
  
  from __future__ import unicode_literals
  
@@ -42,6 +42,7 @@ from .compat import (
      compat_html_entities_html5,
      compat_http_client,
      compat_kwargs,
+    compat_os_name,
      compat_parse_qs,
      compat_shlex_quote,
      compat_socket_create_connection,
@@ -85,12 +86,24 @@ std_headers = {
  }
  
  
+USER_AGENTS = {
+    'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
+}
+
+
  NO_DEFAULT = object()
  
  ENGLISH_MONTH_NAMES = [
      'January', 'February', 'March', 'April', 'May', 'June',
      'July', 'August', 'September', 'October', 'November', 'December']
  
+MONTH_NAMES = {
+    'en': ENGLISH_MONTH_NAMES,
+    'fr': [
+        'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
+        'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
+}
+
  KNOWN_EXTENSIONS = (
      'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
      'flv', 'f4v', 'f4a', 'f4b',
@@ -115,7 +128,13 @@ DATE_FORMATS = (
      '%d %B %Y',
      '%d %b %Y',
      '%B %d %Y',
+    '%B %dst %Y',
+    '%B %dnd %Y',
+    '%B %dth %Y',
      '%b %d %Y',
+    '%b %dst %Y',
+    '%b %dnd %Y',
+    '%b %dth %Y',
      '%b %dst %Y %I:%M',
      '%b %dnd %Y %I:%M',
      '%b %dth %Y %I:%M',
@@ -124,6 +143,7 @@ DATE_FORMATS = (
      '%Y/%m/%d',
      '%Y/%m/%d %H:%M',
      '%Y/%m/%d %H:%M:%S',
+    '%Y-%m-%d %H:%M',
      '%Y-%m-%d %H:%M:%S',
      '%Y-%m-%d %H:%M:%S.%f',
      '%d.%m.%Y %H:%M',
@@ -134,6 +154,8 @@ DATE_FORMATS = (
      '%Y-%m-%dT%H:%M:%S',
      '%Y-%m-%dT%H:%M:%S.%f',
      '%Y-%m-%dT%H:%M',
+    '%b %d %Y at %H:%M',
+    '%b %d %Y at %H:%M:%S',
  )
  
  DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -155,6 +177,8 @@ DATE_FORMATS_MONTH_FIRST.extend([
      '%m/%d/%Y %H:%M:%S',
  ])
  
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+
  
  def preferredencoding():
      """Get preferred encoding.
@@ -484,7 +508,7 @@ def sanitize_path(s):
      if drive_or_unc:
          norm_path.pop(0)
      sanitized_path = [
-        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
+        path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
          for path_part in norm_path]
      if drive_or_unc:
          sanitized_path.insert(0, drive_or_unc + os.path.sep)
@@ -768,6 +792,26 @@ class ContentTooShortError(Exception):
          self.expected = expected
  
  
+class XAttrMetadataError(Exception):
+    def __init__(self, code=None, msg='Unknown error'):
+        super(XAttrMetadataError, self).__init__(msg)
+        self.code = code
+        self.msg = msg
+
+        # Parsing code and msg
+        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+            self.reason = 'NO_SPACE'
+        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+            self.reason = 'VALUE_TOO_LONG'
+        else:
+            self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(Exception):
+    pass
+
+
  def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
      # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
      # expected HTTP responses to meet HTTP/1.0 or later (see also
@@ -1146,7 +1190,7 @@ def date_from_str(date_str):
          return today
      if date_str == 'yesterday':
          return today - datetime.timedelta(days=1)
-    match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+    match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
          time = int(match.group('time'))
@@ -1504,38 +1548,63 @@ def parse_filesize(s):
      _UNIT_TABLE = {
          'B': 1,
          'b': 1,
+        'bytes': 1,
          'KiB': 1024,
          'KB': 1000,
          'kB': 1024,
          'Kb': 1000,
+        'kb': 1000,
+        'kilobytes': 1000,
+        'kibibytes': 1024,
          'MiB': 1024 ** 2,
          'MB': 1000 ** 2,
          'mB': 1024 ** 2,
          'Mb': 1000 ** 2,
+        'mb': 1000 ** 2,
+        'megabytes': 1000 ** 2,
+        'mebibytes': 1024 ** 2,
          'GiB': 1024 ** 3,
          'GB': 1000 ** 3,
          'gB': 1024 ** 3,
          'Gb': 1000 ** 3,
+        'gb': 1000 ** 3,
+        'gigabytes': 1000 ** 3,
+        'gibibytes': 1024 ** 3,
          'TiB': 1024 ** 4,
          'TB': 1000 ** 4,
          'tB': 1024 ** 4,
          'Tb': 1000 ** 4,
+        'tb': 1000 ** 4,
+        'terabytes': 1000 ** 4,
+        'tebibytes': 1024 ** 4,
          'PiB': 1024 ** 5,
          'PB': 1000 ** 5,
          'pB': 1024 ** 5,
          'Pb': 1000 ** 5,
+        'pb': 1000 ** 5,
+        'petabytes': 1000 ** 5,
+        'pebibytes': 1024 ** 5,
          'EiB': 1024 ** 6,
          'EB': 1000 ** 6,
          'eB': 1024 ** 6,
          'Eb': 1000 ** 6,
+        'eb': 1000 ** 6,
+        'exabytes': 1000 ** 6,
+        'exbibytes': 1024 ** 6,
          'ZiB': 1024 ** 7,
          'ZB': 1000 ** 7,
          'zB': 1024 ** 7,
          'Zb': 1000 ** 7,
+        'zb': 1000 ** 7,
+        'zettabytes': 1000 ** 7,
+        'zebibytes': 1024 ** 7,
          'YiB': 1024 ** 8,
          'YB': 1000 ** 8,
          'yB': 1024 ** 8,
          'Yb': 1000 ** 8,
+        'yb': 1000 ** 8,
+        'yottabytes': 1000 ** 8,
+        'yobibytes': 1024 ** 8,
      }
  
      return lookup_unit_table(_UNIT_TABLE, s)
@@ -1562,11 +1631,13 @@ def parse_count(s):
      return lookup_unit_table(_UNIT_TABLE, s)
  
  
-def month_by_name(name):
+def month_by_name(name, lang='en'):
      """ Return the number of a month by (locale-independently) English name """
  
+    month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
+
      try:
-        return ENGLISH_MONTH_NAMES.index(name) + 1
+        return month_names.index(name) + 1
      except ValueError:
          return None
  
@@ -1632,6 +1703,20 @@ def url_basename(url):
      return path.strip('/').split('/')[-1]
  
  
+def base_url(url):
+    return re.match(r'https?://[^?#&]+/', url).group()
+
+
+def urljoin(base, path):
+    if not isinstance(path, compat_str) or not path:
+        return None
+    if re.match(r'^(?:https?:)?//', path):
+        return path
+    if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
+        return None
+    return compat_urlparse.urljoin(base, path)
+
+
  class HEADRequest(compat_urllib_request.Request):
      def get_method(self):
          return 'HEAD'
@@ -1688,7 +1773,7 @@ def parse_duration(s):
      s = s.strip()
  
      days, hours, mins, secs, ms = [None] * 5
-    m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?$', s)
+    m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
      if m:
          days, hours, mins, secs, ms = m.groups()
      else:
@@ -1705,11 +1790,11 @@ def parse_duration(s):
                  )?
                  (?:
                      (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
-                )?$''', s)
+                )?Z?$''', s)
          if m:
              days, hours, mins, secs, ms = m.groups()
          else:
-            m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s)
+            m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
              if m:
                  hours, mins = m.groups()
              else:
@@ -1759,8 +1844,12 @@ def get_exe_version(exe, args=['--version'],
      """ Returns the version of the specified executable,
      or False if the executable is not present """
      try:
+        # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
+        # SIGTTOU if youtube-dl is run in the background.
+        # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
          out, _ = subprocess.Popen(
              [encodeArgument(exe)] + args,
+            stdin=subprocess.PIPE,
              stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
      except OSError:
          return False
@@ -2014,11 +2103,18 @@ def strip_jsonp(code):
  
  
  def js_to_json(code):
+    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+    SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
+    INTEGER_TABLE = (
+        (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
+        (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+    )
+
      def fix_kv(m):
          v = m.group(0)
          if v in ('true', 'false', 'null'):
              return v
-        elif v.startswith('/*') or v == ',':
+        elif v.startswith('/*') or v.startswith('//') or v == ',':
              return ""
  
          if v[0] in ("'", '"'):
@@ -2029,15 +2125,10 @@ def js_to_json(code):
                  '\\x': '\\u00',
              }.get(m.group(0), m.group(0)), v[1:-1])
  
-        INTEGER_TABLE = (
-            (r'^0[xX][0-9a-fA-F]+', 16),
-            (r'^0+[0-7]+', 8),
-        )
-
          for regex, base in INTEGER_TABLE:
              im = re.match(regex, v)
              if im:
-                i = int(im.group(0), base)
+                i = int(im.group(1), base)
                  return '"%d":' % i if v.endswith(':') else '%d' % i
  
          return '"%s"' % v
@@ -2045,11 +2136,11 @@ def js_to_json(code):
      return re.sub(r'''(?sx)
          "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
          '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
-        /\*.*?\*/|,(?=\s*[\]}])|
+        {comment}|,(?={skip}[\]}}])|
          [a-zA-Z_][.a-zA-Z_0-9]*|
-        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
-        [0-9]+(?=\s*:)
-        ''', fix_kv, code)
+        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
+        [0-9]+(?={skip}:)
+        '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
  
  
  def qualities(quality_ids):
@@ -2123,7 +2214,7 @@ def mimetype2ext(mt):
          return ext
  
      _, _, res = mt.rpartition('/')
-    res = res.lower()
+    res = res.split(';')[0].strip().lower()
  
      return {
          '3gpp': '3gp',
@@ -2143,6 +2234,7 @@ def mimetype2ext(mt):
          'f4m+xml': 'f4m',
          'hds+xml': 'f4m',
          'vnd.ms-sstr+xml': 'ism',
+        'quicktime': 'mov',
      }.get(res, res)
  
  
@@ -2158,7 +2250,7 @@ def parse_codecs(codecs_str):
          if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
              if not vcodec:
                  vcodec = full_codec
-        elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'):
+        elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'):
              if not acodec:
                  acodec = full_codec
          else:
@@ -2281,11 +2373,18 @@ def _match_one(filter_part, dct):
      m = operator_rex.search(filter_part)
      if m:
          op = COMPARISON_OPERATORS[m.group('op')]
-        if m.group('strval') is not None:
+        actual_value = dct.get(m.group('key'))
+        if (m.group('strval') is not None or
+            # If the original field is a string and matching comparisonvalue is
+            # a number we should respect the origin of the original field
+            # and process comparison value as a string (see
+            # https://github.com/rg3/youtube-dl/issues/11082).
+            actual_value is not None and m.group('intval') is not None and
+                isinstance(actual_value, compat_str)):
              if m.group('op') not in ('=', '!='):
                  raise ValueError(
                      'Operator %s does not support string values!' % m.group('op'))
-            comparison_value = m.group('strval')
+            comparison_value = m.group('strval') or m.group('intval')
          else:
              try:
                  comparison_value = int(m.group('intval'))
@@ -2297,7 +2396,6 @@ def _match_one(filter_part, dct):
                      raise ValueError(
                          'Invalid integer value %r in filter part %r' % (
                              m.group('intval'), filter_part))
-        actual_value = dct.get(m.group('key'))
          if actual_value is None:
              return m.group('none_inclusive')
          return op(actual_value, comparison_value)
@@ -2959,9 +3057,7 @@ def encode_base_n(num, n, table=None):
  
  
  def decode_packed_codes(code):
-    mobj = re.search(
-        r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)",
-        code)
+    mobj = re.search(PACKED_CODES_RE, code)
      obfucasted_code, base, count, symbols = mobj.groups()
      base = int(base)
      count = int(count)
@@ -3096,3 +3192,87 @@ def decode_png(png_data):
              current_row.append(color)
  
      return width, height, pixels
+
+
+def write_xattr(path, key, value):
+    # This mess below finds the best xattr tool for the job
+    try:
+        # try the pyxattr module...
+        import xattr
+
+        if hasattr(xattr, 'set'):  # pyxattr
+            # Unicode arguments are not supported in python-pyxattr until
+            # version 0.5.0
+            # See https://github.com/rg3/youtube-dl/issues/5498
+            pyxattr_required_version = '0.5.0'
+            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+                # TODO: fallback to CLI tools
+                raise XAttrUnavailableError(
+                    'python-pyxattr is detected but is too old. '
+                    'youtube-dl requires %s or above while your version is %s. '
+                    'Falling back to other xattr implementations' % (
+                        pyxattr_required_version, xattr.__version__))
+
+            setxattr = xattr.set
+        else:  # xattr
+            setxattr = xattr.setxattr
+
+        try:
+            setxattr(path, key, value)
+        except EnvironmentError as e:
+            raise XAttrMetadataError(e.errno, e.strerror)
+
+    except ImportError:
+        if compat_os_name == 'nt':
+            # Write xattrs to NTFS Alternate Data Streams:
+            # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+            assert ':' not in key
+            assert os.path.exists(path)
+
+            ads_fn = path + ':' + key
+            try:
+                with open(ads_fn, 'wb') as f:
+                    f.write(value)
+            except EnvironmentError as e:
+                raise XAttrMetadataError(e.errno, e.strerror)
+        else:
+            user_has_setfattr = check_executable('setfattr', ['--version'])
+            user_has_xattr = check_executable('xattr', ['-h'])
+
+            if user_has_setfattr or user_has_xattr:
+
+                value = value.decode('utf-8')
+                if user_has_setfattr:
+                    executable = 'setfattr'
+                    opts = ['-n', key, '-v', value]
+                elif user_has_xattr:
+                    executable = 'xattr'
+                    opts = ['-w', key, value]
+
+                cmd = ([encodeFilename(executable, True)] +
+                       [encodeArgument(o) for o in opts] +
+                       [encodeFilename(path, True)])
+
+                try:
+                    p = subprocess.Popen(
+                        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+                except EnvironmentError as e:
+                    raise XAttrMetadataError(e.errno, e.strerror)
+                stdout, stderr = p.communicate()
+                stderr = stderr.decode('utf-8', 'replace')
+                if p.returncode != 0:
+                    raise XAttrMetadataError(p.returncode, stderr)
+
+            else:
+                # On Unix, and can't find pyxattr, setfattr, or xattr.
+                if sys.platform.startswith('linux'):
+                    raise XAttrUnavailableError(
+                        "Couldn't find a tool to set the xattrs. "
+                        "Install either the python 'pyxattr' or 'xattr' "
+                        "modules, or the GNU 'attr' package "
+                        "(which contains the 'setfattr' tool).")
+                else:
+                    raise XAttrUnavailableError(
+                        "Couldn't find a tool to set the xattrs. "
+                        "Install either the python 'xattr' module, "
+                        "or the 'xattr' binary.")