debian/control: Add recommends on aria2 | wget | curl to use external downloaders.

[youtubedl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 5be7cf99200c57639af17b1dc89f9a861658f331..8f5463f1c9a1e1a2660867abdc0f1f62e9147032 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -10,6 +10,7 @@ import ctypes
  import datetime
  import email.utils
  import errno
  import datetime
  import email.utils
  import errno
+import functools
  import gzip
  import itertools
  import io
  import gzip
  import itertools
  import io
@@ -31,16 +32,20 @@ import xml.etree.ElementTree
  import zlib
  
  from .compat import (
  import zlib
  
  from .compat import (
+    compat_basestring,
      compat_chr,
      compat_getenv,
      compat_html_entities,
      compat_chr,
      compat_getenv,
      compat_html_entities,
+    compat_http_client,
      compat_parse_qs,
      compat_parse_qs,
+    compat_socket_create_connection,
      compat_str,
      compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urlparse,
      compat_str,
      compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urlparse,
+    shlex_quote,
  )
  
  
  )
  
  
@@ -55,6 +60,7 @@ std_headers = {
      'Accept-Language': 'en-us,en;q=0.5',
  }
  
      'Accept-Language': 'en-us,en;q=0.5',
  }
  
+
  def preferredencoding():
      """Get preferred encoding.
  
  def preferredencoding():
      """Get preferred encoding.
  
@@ -129,13 +135,13 @@ if sys.version_info >= (2, 7):
          """ Find the xpath xpath[@key=val] """
          assert re.match(r'^[a-zA-Z-]+$', key)
          assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
          """ Find the xpath xpath[@key=val] """
          assert re.match(r'^[a-zA-Z-]+$', key)
          assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
-        expr = xpath + u"[@%s='%s']" % (key, val)
+        expr = xpath + "[@%s='%s']" % (key, val)
          return node.find(expr)
  else:
      def find_xpath_attr(node, xpath, key, val):
          # Here comes the crazy part: In 2.6, if the xpath is a unicode,
          # .//node does not match if a node is a direct child of . !
          return node.find(expr)
  else:
      def find_xpath_attr(node, xpath, key, val):
          # Here comes the crazy part: In 2.6, if the xpath is a unicode,
          # .//node does not match if a node is a direct child of . !
-        if isinstance(xpath, unicode):
+        if isinstance(xpath, compat_str):
              xpath = xpath.encode('ascii')
  
          for f in node.findall(xpath):
              xpath = xpath.encode('ascii')
  
          for f in node.findall(xpath):
@@ -145,6 +151,8 @@ else:
  
  # On python2.6 the xml.etree.ElementTree.Element methods don't support
  # the namespace parameter
  
  # On python2.6 the xml.etree.ElementTree.Element methods don't support
  # the namespace parameter
+
+
  def xpath_with_ns(path, ns_map):
      components = [c.split(':') for c in path.split('/')]
      replaced = []
  def xpath_with_ns(path, ns_map):
      components = [c.split(':') for c in path.split('/')]
      replaced = []
@@ -162,7 +170,7 @@ def xpath_text(node, xpath, name=None, fatal=False):
          xpath = xpath.encode('ascii')
  
      n = node.find(xpath)
          xpath = xpath.encode('ascii')
  
      n = node.find(xpath)
-    if n is None:
+    if n is None or n.text is None:
          if fatal:
              name = xpath if name is None else name
              raise ExtractorError('Could not find XML element %s' % name)
          if fatal:
              name = xpath if name is None else name
              raise ExtractorError('Could not find XML element %s' % name)
@@ -201,6 +209,10 @@ def get_element_by_attribute(attribute, value, html):
  
  def clean_html(html):
      """Clean an HTML snippet into a readable string"""
  
  def clean_html(html):
      """Clean an HTML snippet into a readable string"""
+
+    if html is None:  # Convenience for sanitizing descriptions etc.
+        return html
+
      # Newline vs <br />
      html = html.replace('\n', ' ')
      html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
      # Newline vs <br />
      html = html.replace('\n', ' ')
      html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
@@ -236,9 +248,9 @@ def sanitize_open(filename, open_mode):
  
          # In case of error, try to remove win32 forbidden chars
          alt_filename = os.path.join(
  
          # In case of error, try to remove win32 forbidden chars
          alt_filename = os.path.join(
-                        re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
-                        for path_part in os.path.split(filename)
-                       )
+            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
+            for path_part in os.path.split(filename)
+        )
          if alt_filename == filename:
              raise
          else:
          if alt_filename == filename:
              raise
          else:
@@ -255,6 +267,7 @@ def timeconvert(timestr):
          timestamp = email.utils.mktime_tz(timetuple)
      return timestamp
  
          timestamp = email.utils.mktime_tz(timetuple)
      return timestamp
  
+
  def sanitize_filename(s, restricted=False, is_id=False):
      """Sanitizes a string so it could be used as part of a filename.
      If restricted is set, use a stricter subset of allowed characters.
  def sanitize_filename(s, restricted=False, is_id=False):
      """Sanitizes a string so it could be used as part of a filename.
      If restricted is set, use a stricter subset of allowed characters.
@@ -275,6 +288,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
              return '_'
          return char
  
              return '_'
          return char
  
+    # Handle timestamps
+    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
      result = ''.join(map(replace_insane, s))
      if not is_id:
          while '__' in result:
      result = ''.join(map(replace_insane, s))
      if not is_id:
          while '__' in result:
@@ -287,6 +302,7 @@ def sanitize_filename(s, restricted=False, is_id=False):
              result = '_'
      return result
  
              result = '_'
      return result
  
+
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
@@ -357,7 +373,7 @@ def encodeArgument(s):
      if not isinstance(s, compat_str):
          # Legacy code that uses byte strings
          # Uncomment the following line after fixing all post processors
      if not isinstance(s, compat_str):
          # Legacy code that uses byte strings
          # Uncomment the following line after fixing all post processors
-        #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+        # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
          s = s.decode('ascii')
      return encodeFilename(s, True)
  
          s = s.decode('ascii')
      return encodeFilename(s, True)
  
@@ -371,6 +387,7 @@ def decodeOption(optval):
      assert isinstance(optval, compat_str)
      return optval
  
      assert isinstance(optval, compat_str)
      return optval
  
+
  def formatSeconds(secs):
      if secs > 3600:
          return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
  def formatSeconds(secs):
      if secs > 3600:
          return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
@@ -380,49 +397,34 @@ def formatSeconds(secs):
          return '%d' % secs
  
  
          return '%d' % secs
  
  
-def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
-    if sys.version_info < (3, 2):
-        import httplib
-
-        class HTTPSConnectionV3(httplib.HTTPSConnection):
-            def __init__(self, *args, **kwargs):
-                httplib.HTTPSConnection.__init__(self, *args, **kwargs)
-
-            def connect(self):
-                sock = socket.create_connection((self.host, self.port), self.timeout)
-                if getattr(self, '_tunnel_host', False):
-                    self.sock = sock
-                    self._tunnel()
-                try:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
-                except ssl.SSLError:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
-
-        class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
-            def https_open(self, req):
-                return self.do_open(HTTPSConnectionV3, req)
-        return HTTPSHandlerV3(**kwargs)
-    elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
-        context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
-        context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
+def make_HTTPS_handler(params, **kwargs):
+    opts_no_check_certificate = params.get('nocheckcertificate', False)
+    if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
+        context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
          if opts_no_check_certificate:
          if opts_no_check_certificate:
+            context.check_hostname = False
              context.verify_mode = ssl.CERT_NONE
              context.verify_mode = ssl.CERT_NONE
-        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+        try:
+            return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+        except TypeError:
+            # Python 2.7.8
+            # (create_default_context present but HTTPSHandler has no context=)
+            pass
+
+    if sys.version_info < (3, 2):
+        return YoutubeDLHTTPSHandler(params, **kwargs)
      else:  # Python < 3.4
      else:  # Python < 3.4
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+        context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
          context.verify_mode = (ssl.CERT_NONE
                                 if opts_no_check_certificate
                                 else ssl.CERT_REQUIRED)
          context.set_default_verify_paths()
          context.verify_mode = (ssl.CERT_NONE
                                 if opts_no_check_certificate
                                 else ssl.CERT_REQUIRED)
          context.set_default_verify_paths()
-        try:
-            context.load_default_certs()
-        except AttributeError:
-            pass  # Python < 3.4
-        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+        return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
  
  
  class ExtractorError(Exception):
      """Error during info extraction."""
  
  
  class ExtractorError(Exception):
      """Error during info extraction."""
+
      def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
      def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
@@ -455,6 +457,13 @@ class ExtractorError(Exception):
          return ''.join(traceback.format_tb(self.traceback))
  
  
          return ''.join(traceback.format_tb(self.traceback))
  
  
+class UnsupportedError(ExtractorError):
+    def __init__(self, url):
+        super(UnsupportedError, self).__init__(
+            'Unsupported URL: %s' % url, expected=True)
+        self.url = url
+
+
  class RegexNotFoundError(ExtractorError):
      """Error when a regex didn't match"""
      pass
  class RegexNotFoundError(ExtractorError):
      """Error when a regex didn't match"""
      pass
@@ -467,6 +476,7 @@ class DownloadError(Exception):
      configured to continue on errors. They will contain the appropriate
      error message.
      """
      configured to continue on errors. They will contain the appropriate
      error message.
      """
+
      def __init__(self, msg, exc_info=None):
          """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
          super(DownloadError, self).__init__(msg)
      def __init__(self, msg, exc_info=None):
          """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
          super(DownloadError, self).__init__(msg)
@@ -488,9 +498,11 @@ class PostProcessingError(Exception):
      This exception may be raised by PostProcessor's .run() method to
      indicate an error in the postprocessing task.
      """
      This exception may be raised by PostProcessor's .run() method to
      indicate an error in the postprocessing task.
      """
+
      def __init__(self, msg):
          self.msg = msg
  
      def __init__(self, msg):
          self.msg = msg
  
+
  class MaxDownloadsReached(Exception):
      """ --max-downloads limit has been reached. """
      pass
  class MaxDownloadsReached(Exception):
      """ --max-downloads limit has been reached. """
      pass
@@ -520,6 +532,29 @@ class ContentTooShortError(Exception):
          self.downloaded = downloaded
          self.expected = expected
  
          self.downloaded = downloaded
          self.expected = expected
  
+
+def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+    hc = http_class(*args, **kwargs)
+    source_address = ydl_handler._params.get('source_address')
+    if source_address is not None:
+        sa = (source_address, 0)
+        if hasattr(hc, 'source_address'):  # Python 2.7+
+            hc.source_address = sa
+        else:  # Python 2.6
+            def _hc_connect(self, *args, **kwargs):
+                sock = compat_socket_create_connection(
+                    (self.host, self.port), self.timeout, sa)
+                if is_https:
+                    self.sock = ssl.wrap_socket(
+                        sock, self.key_file, self.cert_file,
+                        ssl_version=ssl.PROTOCOL_TLSv1)
+                else:
+                    self.sock = sock
+            hc.connect = functools.partial(_hc_connect, hc)
+
+    return hc
+
+
  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      """Handler for HTTP requests and responses.
  
  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      """Handler for HTTP requests and responses.
  
@@ -538,6 +573,15 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      public domain.
      """
  
      public domain.
      """
  
+    def __init__(self, params, *args, **kwargs):
+        compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+        self._params = params
+
+    def http_open(self, req):
+        return self.do_open(functools.partial(
+            _create_http_connection, self, compat_http_client.HTTPConnection, False),
+            req)
+
      @staticmethod
      def deflate(data):
          try:
      @staticmethod
      def deflate(data):
          try:
@@ -555,17 +599,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
  
      def http_request(self, req):
          for h, v in std_headers.items():
  
      def http_request(self, req):
          for h, v in std_headers.items():
-            if h not in req.headers:
+            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
+            # The dict keys are capitalized because of this bug by urllib
+            if h.capitalize() not in req.headers:
                  req.add_header(h, v)
          if 'Youtubedl-no-compression' in req.headers:
              if 'Accept-encoding' in req.headers:
                  del req.headers['Accept-encoding']
              del req.headers['Youtubedl-no-compression']
                  req.add_header(h, v)
          if 'Youtubedl-no-compression' in req.headers:
              if 'Accept-encoding' in req.headers:
                  del req.headers['Accept-encoding']
              del req.headers['Youtubedl-no-compression']
-        if 'Youtubedl-user-agent' in req.headers:
-            if 'User-agent' in req.headers:
-                del req.headers['User-agent']
-            req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
-            del req.headers['Youtubedl-user-agent']
  
          if sys.version_info < (2, 7) and '#' in req.get_full_url():
              # Python 2.6 is brain-dead when it comes to fragments
  
          if sys.version_info < (2, 7) and '#' in req.get_full_url():
              # Python 2.6 is brain-dead when it comes to fragments
@@ -607,6 +648,23 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      https_response = http_response
  
  
      https_response = http_response
  
  
+class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+    def __init__(self, params, https_conn_class=None, *args, **kwargs):
+        compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
+        self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+        self._params = params
+
+    def https_open(self, req):
+        kwargs = {}
+        if hasattr(self, '_context'):  # python > 2.6
+            kwargs['context'] = self._context
+        if hasattr(self, '_check_hostname'):  # python 3.x
+            kwargs['check_hostname'] = self._check_hostname
+        return self.do_open(functools.partial(
+            _create_http_connection, self, self._https_conn_class, True),
+            req, **kwargs)
+
+
  def parse_iso8601(date_str, delimiter='T'):
      """ Return a UNIX timestamp from the given date """
  
  def parse_iso8601(date_str, delimiter='T'):
      """ Return a UNIX timestamp from the given date """
  
@@ -632,17 +690,19 @@ def parse_iso8601(date_str, delimiter='T'):
      return calendar.timegm(dt.timetuple())
  
  
      return calendar.timegm(dt.timetuple())
  
  
-def unified_strdate(date_str):
+def unified_strdate(date_str, day_first=True):
      """Return a string with the date in the format YYYYMMDD"""
  
      if date_str is None:
          return None
      """Return a string with the date in the format YYYYMMDD"""
  
      if date_str is None:
          return None
-
      upload_date = None
      upload_date = None
-    #Replace commas
+    # Replace commas
      date_str = date_str.replace(',', ' ')
      # %z (UTC offset) is only supported in python>=3.2
      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
      date_str = date_str.replace(',', ' ')
      # %z (UTC offset) is only supported in python>=3.2
      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+    # Remove AM/PM + timezone
+    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+
      format_expressions = [
          '%d %B %Y',
          '%d %b %Y',
      format_expressions = [
          '%d %B %Y',
          '%d %b %Y',
@@ -651,13 +711,10 @@ def unified_strdate(date_str):
          '%b %dst %Y %I:%M%p',
          '%b %dnd %Y %I:%M%p',
          '%b %dth %Y %I:%M%p',
          '%b %dst %Y %I:%M%p',
          '%b %dnd %Y %I:%M%p',
          '%b %dth %Y %I:%M%p',
+        '%Y %m %d',
          '%Y-%m-%d',
          '%Y/%m/%d',
          '%Y-%m-%d',
          '%Y/%m/%d',
-        '%d.%m.%Y',
-        '%d/%m/%Y',
-        '%d/%m/%y',
          '%Y/%m/%d %H:%M:%S',
          '%Y/%m/%d %H:%M:%S',
-        '%d/%m/%Y %H:%M:%S',
          '%Y-%m-%d %H:%M:%S',
          '%Y-%m-%d %H:%M:%S.%f',
          '%d.%m.%Y %H:%M',
          '%Y-%m-%d %H:%M:%S',
          '%Y-%m-%d %H:%M:%S.%f',
          '%d.%m.%Y %H:%M',
@@ -669,6 +726,20 @@ def unified_strdate(date_str):
          '%Y-%m-%dT%H:%M:%S.%f',
          '%Y-%m-%dT%H:%M',
      ]
          '%Y-%m-%dT%H:%M:%S.%f',
          '%Y-%m-%dT%H:%M',
      ]
+    if day_first:
+        format_expressions.extend([
+            '%d.%m.%Y',
+            '%d/%m/%Y',
+            '%d/%m/%y',
+            '%d/%m/%Y %H:%M:%S',
+        ])
+    else:
+        format_expressions.extend([
+            '%m.%d.%Y',
+            '%m/%d/%Y',
+            '%m/%d/%y',
+            '%m/%d/%Y %H:%M:%S',
+        ])
      for expression in format_expressions:
          try:
              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
      for expression in format_expressions:
          try:
              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -680,6 +751,7 @@ def unified_strdate(date_str):
              upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
      return upload_date
  
              upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
      return upload_date
  
+
  def determine_ext(url, default_ext='unknown_video'):
      if url is None:
          return default_ext
  def determine_ext(url, default_ext='unknown_video'):
      if url is None:
          return default_ext
@@ -689,16 +761,20 @@ def determine_ext(url, default_ext='unknown_video'):
      else:
          return default_ext
  
      else:
          return default_ext
  
+
  def subtitles_filename(filename, sub_lang, sub_format):
      return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
  
  def subtitles_filename(filename, sub_lang, sub_format):
      return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
  
+
  def date_from_str(date_str):
      """
      Return a datetime object from a string in the format YYYYMMDD or
      (now|today)[+-][0-9](day|week|month|year)(s)?"""
      today = datetime.date.today()
  def date_from_str(date_str):
      """
      Return a datetime object from a string in the format YYYYMMDD or
      (now|today)[+-][0-9](day|week|month|year)(s)?"""
      today = datetime.date.today()
-    if date_str == 'now'or date_str == 'today':
+    if date_str in ('now', 'today'):
          return today
          return today
+    if date_str == 'yesterday':
+        return today - datetime.timedelta(days=1)
      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
@@ -706,7 +782,7 @@ def date_from_str(date_str):
          if sign == '-':
              time = -time
          unit = match.group('unit')
          if sign == '-':
              time = -time
          unit = match.group('unit')
-        #A bad aproximation?
+        # A bad aproximation?
          if unit == 'month':
              unit = 'day'
              time *= 30
          if unit == 'month':
              unit = 'day'
              time *= 30
@@ -717,7 +793,8 @@ def date_from_str(date_str):
          delta = datetime.timedelta(**{unit: time})
          return today + delta
      return datetime.datetime.strptime(date_str, "%Y%m%d").date()
          delta = datetime.timedelta(**{unit: time})
          return today + delta
      return datetime.datetime.strptime(date_str, "%Y%m%d").date()
-    
+
+
  def hyphenate_date(date_str):
      """
      Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
  def hyphenate_date(date_str):
      """
      Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
@@ -727,8 +804,10 @@ def hyphenate_date(date_str):
      else:
          return date_str
  
      else:
          return date_str
  
+
  class DateRange(object):
      """Represents a time interval between two dates"""
  class DateRange(object):
      """Represents a time interval between two dates"""
+
      def __init__(self, start=None, end=None):
          """start and end must be strings in the format accepted by date"""
          if start is not None:
      def __init__(self, start=None, end=None):
          """start and end must be strings in the format accepted by date"""
          if start is not None:
@@ -741,17 +820,20 @@ class DateRange(object):
              self.end = datetime.datetime.max.date()
          if self.start > self.end:
              raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
              self.end = datetime.datetime.max.date()
          if self.start > self.end:
              raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
      @classmethod
      def day(cls, day):
          """Returns a range that only contains the given day"""
      @classmethod
      def day(cls, day):
          """Returns a range that only contains the given day"""
-        return cls(day,day)
+        return cls(day, day)
+
      def __contains__(self, date):
          """Check if the date is in the range"""
          if not isinstance(date, datetime.date):
              date = date_from_str(date)
          return self.start <= date <= self.end
      def __contains__(self, date):
          """Check if the date is in the range"""
          if not isinstance(date, datetime.date):
              date = date_from_str(date)
          return self.start <= date <= self.end
+
      def __str__(self):
      def __str__(self):
-        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+        return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
  
  
  def platform_name():
  
  
  def platform_name():
@@ -782,27 +864,30 @@ def _windows_write_string(s, out):
      except AttributeError:
          # If the output stream doesn't have a fileno, it's virtual
          return False
      except AttributeError:
          # If the output stream doesn't have a fileno, it's virtual
          return False
+    except io.UnsupportedOperation:
+        # Some strange Windows pseudo files?
+        return False
      if fileno not in WIN_OUTPUT_IDS:
          return False
  
      GetStdHandle = ctypes.WINFUNCTYPE(
          ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
      if fileno not in WIN_OUTPUT_IDS:
          return False
  
      GetStdHandle = ctypes.WINFUNCTYPE(
          ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
-        ("GetStdHandle", ctypes.windll.kernel32))
+        (b"GetStdHandle", ctypes.windll.kernel32))
      h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
  
      WriteConsoleW = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
          ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
      h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
  
      WriteConsoleW = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
          ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
-        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
+        ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
      written = ctypes.wintypes.DWORD(0)
  
      written = ctypes.wintypes.DWORD(0)
  
-    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
      FILE_TYPE_CHAR = 0x0002
      FILE_TYPE_REMOTE = 0x8000
      GetConsoleMode = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
          ctypes.POINTER(ctypes.wintypes.DWORD))(
      FILE_TYPE_CHAR = 0x0002
      FILE_TYPE_REMOTE = 0x8000
      GetConsoleMode = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
          ctypes.POINTER(ctypes.wintypes.DWORD))(
-        ("GetConsoleMode", ctypes.windll.kernel32))
+        (b"GetConsoleMode", ctypes.windll.kernel32))
      INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
  
      def not_a_console(handle):
      INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
  
      def not_a_console(handle):
@@ -1003,7 +1088,7 @@ def smuggle_url(url, data):
  
  
  def unsmuggle_url(smug_url, default=None):
  
  
  def unsmuggle_url(smug_url, default=None):
-    if not '#__youtubedl_smuggle' in smug_url:
+    if '#__youtubedl_smuggle' not in smug_url:
          return smug_url, default
      url, _, sdata = smug_url.rpartition('#')
      jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
          return smug_url, default
      url, _, sdata = smug_url.rpartition('#')
      jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
@@ -1025,6 +1110,60 @@ def format_bytes(bytes):
      return '%.2f%s' % (converted, suffix)
  
  
      return '%.2f%s' % (converted, suffix)
  
  
+def parse_filesize(s):
+    if s is None:
+        return None
+
+    # The lower-case forms are of course incorrect and inofficial,
+    # but we support those too
+    _UNIT_TABLE = {
+        'B': 1,
+        'b': 1,
+        'KiB': 1024,
+        'KB': 1000,
+        'kB': 1024,
+        'Kb': 1000,
+        'MiB': 1024 ** 2,
+        'MB': 1000 ** 2,
+        'mB': 1024 ** 2,
+        'Mb': 1000 ** 2,
+        'GiB': 1024 ** 3,
+        'GB': 1000 ** 3,
+        'gB': 1024 ** 3,
+        'Gb': 1000 ** 3,
+        'TiB': 1024 ** 4,
+        'TB': 1000 ** 4,
+        'tB': 1024 ** 4,
+        'Tb': 1000 ** 4,
+        'PiB': 1024 ** 5,
+        'PB': 1000 ** 5,
+        'pB': 1024 ** 5,
+        'Pb': 1000 ** 5,
+        'EiB': 1024 ** 6,
+        'EB': 1000 ** 6,
+        'eB': 1024 ** 6,
+        'Eb': 1000 ** 6,
+        'ZiB': 1024 ** 7,
+        'ZB': 1000 ** 7,
+        'zB': 1024 ** 7,
+        'Zb': 1000 ** 7,
+        'YiB': 1024 ** 8,
+        'YB': 1000 ** 8,
+        'yB': 1024 ** 8,
+        'Yb': 1000 ** 8,
+    }
+
+    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
+    m = re.match(
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+    if not m:
+        return None
+
+    num_str = m.group('num').replace(',', '.')
+    mult = _UNIT_TABLE[m.group('unit')]
+    return int(float(num_str) * mult)
+
+
  def get_term_width():
      columns = compat_getenv('COLUMNS', None)
      if columns:
  def get_term_width():
      columns = compat_getenv('COLUMNS', None)
      if columns:
@@ -1124,32 +1263,48 @@ def float_or_none(v, scale=1, invscale=1, default=None):
  
  
  def parse_duration(s):
  
  
  def parse_duration(s):
-    if s is None:
+    if not isinstance(s, compat_basestring):
          return None
  
      s = s.strip()
  
      m = re.match(
          return None
  
      s = s.strip()
  
      m = re.match(
-        r'''(?ix)T?
+        r'''(?ix)(?:P?T)?
+        (?:
+            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
              (?:
              (?:
-                (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
+                (?:
+                    (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
+                    (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
+                )?
                  (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
              )?
                  (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
              )?
-            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
+            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+        )$''', s)
      if not m:
          return None
      if not m:
          return None
-    res = int(m.group('secs'))
+    res = 0
+    if m.group('only_mins'):
+        return float_or_none(m.group('only_mins'), invscale=60)
+    if m.group('only_hours'):
+        return float_or_none(m.group('only_hours'), invscale=60 * 60)
+    if m.group('secs'):
+        res += int(m.group('secs'))
      if m.group('mins'):
          res += int(m.group('mins')) * 60
      if m.group('mins'):
          res += int(m.group('mins')) * 60
-        if m.group('hours'):
-            res += int(m.group('hours')) * 60 * 60
+    if m.group('hours'):
+        res += int(m.group('hours')) * 60 * 60
+    if m.group('days'):
+        res += int(m.group('days')) * 24 * 60 * 60
      if m.group('ms'):
          res += float(m.group('ms'))
      return res
  
  
  def prepend_extension(filename, ext):
      if m.group('ms'):
          res += float(m.group('ms'))
      return res
  
  
  def prepend_extension(filename, ext):
-    name, real_ext = os.path.splitext(filename) 
+    name, real_ext = os.path.splitext(filename)
      return '{0}.{1}{2}'.format(name, ext, real_ext)
  
  
      return '{0}.{1}{2}'.format(name, ext, real_ext)
  
  
@@ -1164,18 +1319,25 @@ def check_executable(exe, args=[]):
  
  
  def get_exe_version(exe, args=['--version'],
  
  
  def get_exe_version(exe, args=['--version'],
-                    version_re=r'version\s+([0-9._-a-zA-Z]+)',
-                    unrecognized='present'):
+                    version_re=None, unrecognized='present'):
      """ Returns the version of the specified executable,
      or False if the executable is not present """
      try:
      """ Returns the version of the specified executable,
      or False if the executable is not present """
      try:
-        out, err = subprocess.Popen(
+        out, _ = subprocess.Popen(
              [exe] + args,
              stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
      except OSError:
          return False
              [exe] + args,
              stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
      except OSError:
          return False
-    firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
-    m = re.search(version_re, firstline)
+    if isinstance(out, bytes):  # Python 2.x
+        out = out.decode('ascii', 'ignore')
+    return detect_exe_version(out, version_re, unrecognized)
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+    assert isinstance(output, compat_str)
+    if version_re is None:
+        version_re = r'version\s+([-0-9._a-zA-Z]+)'
+    m = re.search(version_re, output)
      if m:
          return m.group(1)
      else:
      if m:
          return m.group(1)
      else:
@@ -1270,7 +1432,7 @@ def uppercase_escape(s):
  
  def escape_rfc3986(s):
      """Escape non-ASCII characters as suggested by RFC 3986"""
  
  def escape_rfc3986(s):
      """Escape non-ASCII characters as suggested by RFC 3986"""
-    if sys.version_info < (3, 0) and isinstance(s, unicode):
+    if sys.version_info < (3, 0) and isinstance(s, compat_str):
          s = s.encode('utf-8')
      return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
  
          s = s.encode('utf-8')
      return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
  
@@ -1386,7 +1548,7 @@ def js_to_json(code):
      res = re.sub(r'''(?x)
          "(?:[^"\\]*(?:\\\\|\\")?)*"|
          '(?:[^'\\]*(?:\\\\|\\')?)*'|
      res = re.sub(r'''(?x)
          "(?:[^"\\]*(?:\\\\|\\")?)*"|
          '(?:[^'\\]*(?:\\\\|\\')?)*'|
-        [a-zA-Z_][a-zA-Z_0-9]*
+        [a-zA-Z_][.a-zA-Z_0-9]*
          ''', fix_kv, code)
      res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
      return res
          ''', fix_kv, code)
      res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
      return res
@@ -1416,7 +1578,7 @@ def limit_length(s, length):
  
  
  def version_tuple(v):
  
  
  def version_tuple(v):
-    return [int(e) for e in v.split('.')]
+    return tuple(int(e) for e in re.split(r'[-.]', v))
  
  
  def is_outdated_version(version, limit, assume_new=True):
  
  
  def is_outdated_version(version, limit, assume_new=True):
@@ -1433,3 +1595,86 @@ def ytdl_is_updateable():
      from zipimport import zipimporter
  
      return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
      from zipimport import zipimporter
  
      return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+    # Get a short string representation for a subprocess command
+    return ' '.join(shlex_quote(a) for a in args)
+
+
+def urlhandle_detect_ext(url_handle):
+    try:
+        url_handle.headers
+        getheader = lambda h: url_handle.headers[h]
+    except AttributeError:  # Python < 3
+        getheader = url_handle.info().getheader
+
+    cd = getheader('Content-Disposition')
+    if cd:
+        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+        if m:
+            e = determine_ext(m.group('filename'), default_ext=None)
+            if e:
+                return e
+
+    return getheader('Content-Type').split("/")[1]
+
+
+def age_restricted(content_limit, age_limit):
+    """ Returns True iff the content should be blocked """
+
+    if age_limit is None:  # No limit set
+        return False
+    if content_limit is None:
+        return False  # Content available for everyone
+    return age_limit < content_limit
+
+
+def is_html(first_bytes):
+    """ Detect whether a file contains HTML by examining its first bytes. """
+
+    BOMS = [
+        (b'\xef\xbb\xbf', 'utf-8'),
+        (b'\x00\x00\xfe\xff', 'utf-32-be'),
+        (b'\xff\xfe\x00\x00', 'utf-32-le'),
+        (b'\xff\xfe', 'utf-16-le'),
+        (b'\xfe\xff', 'utf-16-be'),
+    ]
+    for bom, enc in BOMS:
+        if first_bytes.startswith(bom):
+            s = first_bytes[len(bom):].decode(enc, 'replace')
+            break
+    else:
+        s = first_bytes.decode('utf-8', 'replace')
+
+    return re.match(r'^\s*<', s)
+
+
+def determine_protocol(info_dict):
+    protocol = info_dict.get('protocol')
+    if protocol is not None:
+        return protocol
+
+    url = info_dict['url']
+    if url.startswith('rtmp'):
+        return 'rtmp'
+    elif url.startswith('mms'):
+        return 'mms'
+    elif url.startswith('rtsp'):
+        return 'rtsp'
+
+    ext = determine_ext(url)
+    if ext == 'm3u8':
+        return 'm3u8'
+    elif ext == 'f4m':
+        return 'f4m'
+
+    return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+    """ Render a list of rows, each as a list of values """
+    table = [header_row] + data
+    max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+    format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+    return '\n'.join(format_str % tuple(row) for row in table)