New upstream version 2017.03.26

[youtubedl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 17b83794a2becf272005305442094223cd054638..2340bc306b69652d43246472e7fd3f5e819e2af2 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -39,6 +39,7 @@ from .compat import (
      compat_basestring,
      compat_chr,
      compat_etree_fromstring,
+    compat_expanduser,
      compat_html_entities,
      compat_html_entities_html5,
      compat_http_client,
@@ -473,7 +474,8 @@ def timeconvert(timestr):
  def sanitize_filename(s, restricted=False, is_id=False):
      """Sanitizes a string so it could be used as part of a filename.
      If restricted is set, use a stricter subset of allowed characters.
-    Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
+    Set is_id if this is not an arbitrary string, but an ID that should be kept
+    if possible.
      """
      def replace_insane(char):
          if restricted and char in ACCENT_CHARS:
@@ -538,6 +540,11 @@ def sanitized_Request(url, *args, **kwargs):
      return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
  
  
+def expand_path(s):
+    """Expand shell variables and ~"""
+    return os.path.expandvars(compat_expanduser(s))
+
+
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
@@ -1747,11 +1754,16 @@ def base_url(url):
  
  
  def urljoin(base, path):
+    if isinstance(path, bytes):
+        path = path.decode('utf-8')
      if not isinstance(path, compat_str) or not path:
          return None
      if re.match(r'^(?:https?:)?//', path):
          return path
-    if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
+    if isinstance(base, bytes):
+        base = base.decode('utf-8')
+    if not isinstance(base, compat_str) or not re.match(
+            r'^(?:https?:)?//', base):
          return None
      return compat_urlparse.urljoin(base, path)
  
@@ -3319,6 +3331,57 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
              self, req, proxy, type)
  
  
+# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
+# released into Public Domain
+# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
+
+def long_to_bytes(n, blocksize=0):
+    """long_to_bytes(n:long, blocksize:int) : string
+    Convert a long integer to a byte string.
+
+    If optional blocksize is given and greater than zero, pad the front of the
+    byte string with binary zeros so that the length is a multiple of
+    blocksize.
+    """
+    # after much testing, this algorithm was deemed to be the fastest
+    s = b''
+    n = int(n)
+    while n > 0:
+        s = compat_struct_pack('>I', n & 0xffffffff) + s
+        n = n >> 32
+    # strip off leading zeros
+    for i in range(len(s)):
+        if s[i] != b'\000'[0]:
+            break
+    else:
+        # only happens when n == 0
+        s = b'\000'
+        i = 0
+    s = s[i:]
+    # add back some pad bytes.  this could be done more efficiently w.r.t. the
+    # de-padding being done above, but sigh...
+    if blocksize > 0 and len(s) % blocksize:
+        s = (blocksize - len(s) % blocksize) * b'\000' + s
+    return s
+
+
+def bytes_to_long(s):
+    """bytes_to_long(string) : long
+    Convert a byte string to a long integer.
+
+    This is (essentially) the inverse of long_to_bytes().
+    """
+    acc = 0
+    length = len(s)
+    if length % 4:
+        extra = (4 - length % 4)
+        s = b'\000' * extra + s
+        length = length + extra
+    for i in range(0, length, 4):
+        acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+    return acc
+
+
  def ohdave_rsa_encrypt(data, exponent, modulus):
      '''
      Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
@@ -3336,6 +3399,21 @@ def ohdave_rsa_encrypt(data, exponent, modulus):
      return '%x' % encrypted
  
  
+def pkcs1pad(data, length):
+    """
+    Padding input data with PKCS#1 scheme
+
+    @param {int[]} data        input data
+    @param {int}   length      target length
+    @returns {int[]}           padded data
+    """
+    if len(data) > length - 11:
+        raise ValueError('Input data too long for PKCS#1 padding')
+
+    pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
+    return [0, 2] + pseudo_random + [0] + data
+
+
  def encode_base_n(num, n, table=None):
      FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
      if not table: