Merge tag 'upstream/2015.02.28'

[youtubedl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 5be7cf99200c57639af17b1dc89f9a861658f331..1f3bfef7d562e3fb0e63db16e644b86819eaaa5c 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -10,12 +10,14 @@ import ctypes
  import datetime
  import email.utils
  import errno
  import datetime
  import email.utils
  import errno
+import functools
  import gzip
  import itertools
  import io
  import json
  import locale
  import math
  import gzip
  import itertools
  import io
  import json
  import locale
  import math
+import operator
  import os
  import pipes
  import platform
  import os
  import pipes
  import platform
@@ -31,16 +33,20 @@ import xml.etree.ElementTree
  import zlib
  
  from .compat import (
  import zlib
  
  from .compat import (
+    compat_basestring,
      compat_chr,
      compat_getenv,
      compat_html_entities,
      compat_chr,
      compat_getenv,
      compat_html_entities,
+    compat_http_client,
      compat_parse_qs,
      compat_parse_qs,
+    compat_socket_create_connection,
      compat_str,
      compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urlparse,
      compat_str,
      compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urlparse,
+    shlex_quote,
  )
  
  
  )
  
  
@@ -48,13 +54,19 @@ from .compat import (
  compiled_regex_type = type(re.compile(''))
  
  std_headers = {
  compiled_regex_type = type(re.compile(''))
  
  std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Encoding': 'gzip, deflate',
      'Accept-Language': 'en-us,en;q=0.5',
  }
  
      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Encoding': 'gzip, deflate',
      'Accept-Language': 'en-us,en;q=0.5',
  }
  
+
+ENGLISH_MONTH_NAMES = [
+    'January', 'February', 'March', 'April', 'May', 'June',
+    'July', 'August', 'September', 'October', 'November', 'December']
+
+
  def preferredencoding():
      """Get preferred encoding.
  
  def preferredencoding():
      """Get preferred encoding.
  
@@ -129,13 +141,13 @@ if sys.version_info >= (2, 7):
          """ Find the xpath xpath[@key=val] """
          assert re.match(r'^[a-zA-Z-]+$', key)
          assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
          """ Find the xpath xpath[@key=val] """
          assert re.match(r'^[a-zA-Z-]+$', key)
          assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
-        expr = xpath + u"[@%s='%s']" % (key, val)
+        expr = xpath + "[@%s='%s']" % (key, val)
          return node.find(expr)
  else:
      def find_xpath_attr(node, xpath, key, val):
          # Here comes the crazy part: In 2.6, if the xpath is a unicode,
          # .//node does not match if a node is a direct child of . !
          return node.find(expr)
  else:
      def find_xpath_attr(node, xpath, key, val):
          # Here comes the crazy part: In 2.6, if the xpath is a unicode,
          # .//node does not match if a node is a direct child of . !
-        if isinstance(xpath, unicode):
+        if isinstance(xpath, compat_str):
              xpath = xpath.encode('ascii')
  
          for f in node.findall(xpath):
              xpath = xpath.encode('ascii')
  
          for f in node.findall(xpath):
@@ -145,6 +157,8 @@ else:
  
  # On python2.6 the xml.etree.ElementTree.Element methods don't support
  # the namespace parameter
  
  # On python2.6 the xml.etree.ElementTree.Element methods don't support
  # the namespace parameter
+
+
  def xpath_with_ns(path, ns_map):
      components = [c.split(':') for c in path.split('/')]
      replaced = []
  def xpath_with_ns(path, ns_map):
      components = [c.split(':') for c in path.split('/')]
      replaced = []
@@ -162,7 +176,7 @@ def xpath_text(node, xpath, name=None, fatal=False):
          xpath = xpath.encode('ascii')
  
      n = node.find(xpath)
          xpath = xpath.encode('ascii')
  
      n = node.find(xpath)
-    if n is None:
+    if n is None or n.text is None:
          if fatal:
              name = xpath if name is None else name
              raise ExtractorError('Could not find XML element %s' % name)
          if fatal:
              name = xpath if name is None else name
              raise ExtractorError('Could not find XML element %s' % name)
@@ -201,6 +215,10 @@ def get_element_by_attribute(attribute, value, html):
  
  def clean_html(html):
      """Clean an HTML snippet into a readable string"""
  
  def clean_html(html):
      """Clean an HTML snippet into a readable string"""
+
+    if html is None:  # Convenience for sanitizing descriptions etc.
+        return html
+
      # Newline vs <br />
      html = html.replace('\n', ' ')
      html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
      # Newline vs <br />
      html = html.replace('\n', ' ')
      html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
@@ -236,9 +254,9 @@ def sanitize_open(filename, open_mode):
  
          # In case of error, try to remove win32 forbidden chars
          alt_filename = os.path.join(
  
          # In case of error, try to remove win32 forbidden chars
          alt_filename = os.path.join(
-                        re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
-                        for path_part in os.path.split(filename)
-                       )
+            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
+            for path_part in os.path.split(filename)
+        )
          if alt_filename == filename:
              raise
          else:
          if alt_filename == filename:
              raise
          else:
@@ -255,6 +273,7 @@ def timeconvert(timestr):
          timestamp = email.utils.mktime_tz(timetuple)
      return timestamp
  
          timestamp = email.utils.mktime_tz(timetuple)
      return timestamp
  
+
  def sanitize_filename(s, restricted=False, is_id=False):
      """Sanitizes a string so it could be used as part of a filename.
      If restricted is set, use a stricter subset of allowed characters.
  def sanitize_filename(s, restricted=False, is_id=False):
      """Sanitizes a string so it could be used as part of a filename.
      If restricted is set, use a stricter subset of allowed characters.
@@ -275,6 +294,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
              return '_'
          return char
  
              return '_'
          return char
  
+    # Handle timestamps
+    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
      result = ''.join(map(replace_insane, s))
      if not is_id:
          while '__' in result:
      result = ''.join(map(replace_insane, s))
      if not is_id:
          while '__' in result:
@@ -283,10 +304,13 @@ def sanitize_filename(s, restricted=False, is_id=False):
          # Common case of "Foreign band name - English song title"
          if restricted and result.startswith('-_'):
              result = result[2:]
          # Common case of "Foreign band name - English song title"
          if restricted and result.startswith('-_'):
              result = result[2:]
+        if result.startswith('-'):
+            result = '_' + result[len('-'):]
          if not result:
              result = '_'
      return result
  
          if not result:
              result = '_'
      return result
  
+
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
@@ -357,7 +381,7 @@ def encodeArgument(s):
      if not isinstance(s, compat_str):
          # Legacy code that uses byte strings
          # Uncomment the following line after fixing all post processors
      if not isinstance(s, compat_str):
          # Legacy code that uses byte strings
          # Uncomment the following line after fixing all post processors
-        #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+        # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
          s = s.decode('ascii')
      return encodeFilename(s, True)
  
          s = s.decode('ascii')
      return encodeFilename(s, True)
  
@@ -371,6 +395,7 @@ def decodeOption(optval):
      assert isinstance(optval, compat_str)
      return optval
  
      assert isinstance(optval, compat_str)
      return optval
  
+
  def formatSeconds(secs):
      if secs > 3600:
          return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
  def formatSeconds(secs):
      if secs > 3600:
          return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
@@ -380,49 +405,34 @@ def formatSeconds(secs):
          return '%d' % secs
  
  
          return '%d' % secs
  
  
-def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
-    if sys.version_info < (3, 2):
-        import httplib
-
-        class HTTPSConnectionV3(httplib.HTTPSConnection):
-            def __init__(self, *args, **kwargs):
-                httplib.HTTPSConnection.__init__(self, *args, **kwargs)
-
-            def connect(self):
-                sock = socket.create_connection((self.host, self.port), self.timeout)
-                if getattr(self, '_tunnel_host', False):
-                    self.sock = sock
-                    self._tunnel()
-                try:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
-                except ssl.SSLError:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
-
-        class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
-            def https_open(self, req):
-                return self.do_open(HTTPSConnectionV3, req)
-        return HTTPSHandlerV3(**kwargs)
-    elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
-        context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
-        context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
+def make_HTTPS_handler(params, **kwargs):
+    opts_no_check_certificate = params.get('nocheckcertificate', False)
+    if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
+        context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
          if opts_no_check_certificate:
          if opts_no_check_certificate:
+            context.check_hostname = False
              context.verify_mode = ssl.CERT_NONE
              context.verify_mode = ssl.CERT_NONE
-        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+        try:
+            return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+        except TypeError:
+            # Python 2.7.8
+            # (create_default_context present but HTTPSHandler has no context=)
+            pass
+
+    if sys.version_info < (3, 2):
+        return YoutubeDLHTTPSHandler(params, **kwargs)
      else:  # Python < 3.4
      else:  # Python < 3.4
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+        context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
          context.verify_mode = (ssl.CERT_NONE
                                 if opts_no_check_certificate
                                 else ssl.CERT_REQUIRED)
          context.set_default_verify_paths()
          context.verify_mode = (ssl.CERT_NONE
                                 if opts_no_check_certificate
                                 else ssl.CERT_REQUIRED)
          context.set_default_verify_paths()
-        try:
-            context.load_default_certs()
-        except AttributeError:
-            pass  # Python < 3.4
-        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+        return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
  
  
  class ExtractorError(Exception):
      """Error during info extraction."""
  
  
  class ExtractorError(Exception):
      """Error during info extraction."""
+
      def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
      def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
@@ -455,6 +465,13 @@ class ExtractorError(Exception):
          return ''.join(traceback.format_tb(self.traceback))
  
  
          return ''.join(traceback.format_tb(self.traceback))
  
  
+class UnsupportedError(ExtractorError):
+    def __init__(self, url):
+        super(UnsupportedError, self).__init__(
+            'Unsupported URL: %s' % url, expected=True)
+        self.url = url
+
+
  class RegexNotFoundError(ExtractorError):
      """Error when a regex didn't match"""
      pass
  class RegexNotFoundError(ExtractorError):
      """Error when a regex didn't match"""
      pass
@@ -467,6 +484,7 @@ class DownloadError(Exception):
      configured to continue on errors. They will contain the appropriate
      error message.
      """
      configured to continue on errors. They will contain the appropriate
      error message.
      """
+
      def __init__(self, msg, exc_info=None):
          """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
          super(DownloadError, self).__init__(msg)
      def __init__(self, msg, exc_info=None):
          """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
          super(DownloadError, self).__init__(msg)
@@ -488,9 +506,11 @@ class PostProcessingError(Exception):
      This exception may be raised by PostProcessor's .run() method to
      indicate an error in the postprocessing task.
      """
      This exception may be raised by PostProcessor's .run() method to
      indicate an error in the postprocessing task.
      """
+
      def __init__(self, msg):
          self.msg = msg
  
      def __init__(self, msg):
          self.msg = msg
  
+
  class MaxDownloadsReached(Exception):
      """ --max-downloads limit has been reached. """
      pass
  class MaxDownloadsReached(Exception):
      """ --max-downloads limit has been reached. """
      pass
@@ -520,6 +540,29 @@ class ContentTooShortError(Exception):
          self.downloaded = downloaded
          self.expected = expected
  
          self.downloaded = downloaded
          self.expected = expected
  
+
+def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+    hc = http_class(*args, **kwargs)
+    source_address = ydl_handler._params.get('source_address')
+    if source_address is not None:
+        sa = (source_address, 0)
+        if hasattr(hc, 'source_address'):  # Python 2.7+
+            hc.source_address = sa
+        else:  # Python 2.6
+            def _hc_connect(self, *args, **kwargs):
+                sock = compat_socket_create_connection(
+                    (self.host, self.port), self.timeout, sa)
+                if is_https:
+                    self.sock = ssl.wrap_socket(
+                        sock, self.key_file, self.cert_file,
+                        ssl_version=ssl.PROTOCOL_TLSv1)
+                else:
+                    self.sock = sock
+            hc.connect = functools.partial(_hc_connect, hc)
+
+    return hc
+
+
  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      """Handler for HTTP requests and responses.
  
  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      """Handler for HTTP requests and responses.
  
@@ -538,6 +581,15 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      public domain.
      """
  
      public domain.
      """
  
+    def __init__(self, params, *args, **kwargs):
+        compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+        self._params = params
+
+    def http_open(self, req):
+        return self.do_open(functools.partial(
+            _create_http_connection, self, compat_http_client.HTTPConnection, False),
+            req)
+
      @staticmethod
      def deflate(data):
          try:
      @staticmethod
      def deflate(data):
          try:
@@ -555,17 +607,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
  
      def http_request(self, req):
          for h, v in std_headers.items():
  
      def http_request(self, req):
          for h, v in std_headers.items():
-            if h not in req.headers:
+            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
+            # The dict keys are capitalized because of this bug by urllib
+            if h.capitalize() not in req.headers:
                  req.add_header(h, v)
          if 'Youtubedl-no-compression' in req.headers:
              if 'Accept-encoding' in req.headers:
                  del req.headers['Accept-encoding']
              del req.headers['Youtubedl-no-compression']
                  req.add_header(h, v)
          if 'Youtubedl-no-compression' in req.headers:
              if 'Accept-encoding' in req.headers:
                  del req.headers['Accept-encoding']
              del req.headers['Youtubedl-no-compression']
-        if 'Youtubedl-user-agent' in req.headers:
-            if 'User-agent' in req.headers:
-                del req.headers['User-agent']
-            req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
-            del req.headers['Youtubedl-user-agent']
  
          if sys.version_info < (2, 7) and '#' in req.get_full_url():
              # Python 2.6 is brain-dead when it comes to fragments
  
          if sys.version_info < (2, 7) and '#' in req.get_full_url():
              # Python 2.6 is brain-dead when it comes to fragments
@@ -607,42 +656,62 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      https_response = http_response
  
  
      https_response = http_response
  
  
-def parse_iso8601(date_str, delimiter='T'):
+class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+    def __init__(self, params, https_conn_class=None, *args, **kwargs):
+        compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
+        self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+        self._params = params
+
+    def https_open(self, req):
+        kwargs = {}
+        if hasattr(self, '_context'):  # python > 2.6
+            kwargs['context'] = self._context
+        if hasattr(self, '_check_hostname'):  # python 3.x
+            kwargs['check_hostname'] = self._check_hostname
+        return self.do_open(functools.partial(
+            _create_http_connection, self, self._https_conn_class, True),
+            req, **kwargs)
+
+
+def parse_iso8601(date_str, delimiter='T', timezone=None):
      """ Return a UNIX timestamp from the given date """
  
      if date_str is None:
          return None
  
      """ Return a UNIX timestamp from the given date """
  
      if date_str is None:
          return None
  
-    m = re.search(
-        r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
-        date_str)
-    if not m:
-        timezone = datetime.timedelta()
-    else:
-        date_str = date_str[:-len(m.group(0))]
-        if not m.group('sign'):
+    if timezone is None:
+        m = re.search(
+            r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+            date_str)
+        if not m:
              timezone = datetime.timedelta()
          else:
              timezone = datetime.timedelta()
          else:
-            sign = 1 if m.group('sign') == '+' else -1
-            timezone = datetime.timedelta(
-                hours=sign * int(m.group('hours')),
-                minutes=sign * int(m.group('minutes')))
+            date_str = date_str[:-len(m.group(0))]
+            if not m.group('sign'):
+                timezone = datetime.timedelta()
+            else:
+                sign = 1 if m.group('sign') == '+' else -1
+                timezone = datetime.timedelta(
+                    hours=sign * int(m.group('hours')),
+                    minutes=sign * int(m.group('minutes')))
      date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
      dt = datetime.datetime.strptime(date_str, date_format) - timezone
      return calendar.timegm(dt.timetuple())
  
  
      date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
      dt = datetime.datetime.strptime(date_str, date_format) - timezone
      return calendar.timegm(dt.timetuple())
  
  
-def unified_strdate(date_str):
+def unified_strdate(date_str, day_first=True):
      """Return a string with the date in the format YYYYMMDD"""
  
      if date_str is None:
          return None
      """Return a string with the date in the format YYYYMMDD"""
  
      if date_str is None:
          return None
-
      upload_date = None
      upload_date = None
-    #Replace commas
+    # Replace commas
      date_str = date_str.replace(',', ' ')
      # %z (UTC offset) is only supported in python>=3.2
      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
      date_str = date_str.replace(',', ' ')
      # %z (UTC offset) is only supported in python>=3.2
      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+    # Remove AM/PM + timezone
+    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+
      format_expressions = [
          '%d %B %Y',
          '%d %b %Y',
      format_expressions = [
          '%d %B %Y',
          '%d %b %Y',
@@ -651,13 +720,10 @@ def unified_strdate(date_str):
          '%b %dst %Y %I:%M%p',
          '%b %dnd %Y %I:%M%p',
          '%b %dth %Y %I:%M%p',
          '%b %dst %Y %I:%M%p',
          '%b %dnd %Y %I:%M%p',
          '%b %dth %Y %I:%M%p',
+        '%Y %m %d',
          '%Y-%m-%d',
          '%Y/%m/%d',
          '%Y-%m-%d',
          '%Y/%m/%d',
-        '%d.%m.%Y',
-        '%d/%m/%Y',
-        '%d/%m/%y',
          '%Y/%m/%d %H:%M:%S',
          '%Y/%m/%d %H:%M:%S',
-        '%d/%m/%Y %H:%M:%S',
          '%Y-%m-%d %H:%M:%S',
          '%Y-%m-%d %H:%M:%S.%f',
          '%d.%m.%Y %H:%M',
          '%Y-%m-%d %H:%M:%S',
          '%Y-%m-%d %H:%M:%S.%f',
          '%d.%m.%Y %H:%M',
@@ -669,6 +735,20 @@ def unified_strdate(date_str):
          '%Y-%m-%dT%H:%M:%S.%f',
          '%Y-%m-%dT%H:%M',
      ]
          '%Y-%m-%dT%H:%M:%S.%f',
          '%Y-%m-%dT%H:%M',
      ]
+    if day_first:
+        format_expressions.extend([
+            '%d.%m.%Y',
+            '%d/%m/%Y',
+            '%d/%m/%y',
+            '%d/%m/%Y %H:%M:%S',
+        ])
+    else:
+        format_expressions.extend([
+            '%m.%d.%Y',
+            '%m/%d/%Y',
+            '%m/%d/%y',
+            '%m/%d/%Y %H:%M:%S',
+        ])
      for expression in format_expressions:
          try:
              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
      for expression in format_expressions:
          try:
              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -680,6 +760,7 @@ def unified_strdate(date_str):
              upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
      return upload_date
  
              upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
      return upload_date
  
+
  def determine_ext(url, default_ext='unknown_video'):
      if url is None:
          return default_ext
  def determine_ext(url, default_ext='unknown_video'):
      if url is None:
          return default_ext
@@ -689,16 +770,20 @@ def determine_ext(url, default_ext='unknown_video'):
      else:
          return default_ext
  
      else:
          return default_ext
  
+
  def subtitles_filename(filename, sub_lang, sub_format):
      return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
  
  def subtitles_filename(filename, sub_lang, sub_format):
      return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
  
+
  def date_from_str(date_str):
      """
      Return a datetime object from a string in the format YYYYMMDD or
      (now|today)[+-][0-9](day|week|month|year)(s)?"""
      today = datetime.date.today()
  def date_from_str(date_str):
      """
      Return a datetime object from a string in the format YYYYMMDD or
      (now|today)[+-][0-9](day|week|month|year)(s)?"""
      today = datetime.date.today()
-    if date_str == 'now'or date_str == 'today':
+    if date_str in ('now', 'today'):
          return today
          return today
+    if date_str == 'yesterday':
+        return today - datetime.timedelta(days=1)
      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
@@ -706,7 +791,7 @@ def date_from_str(date_str):
          if sign == '-':
              time = -time
          unit = match.group('unit')
          if sign == '-':
              time = -time
          unit = match.group('unit')
-        #A bad aproximation?
+        # A bad aproximation?
          if unit == 'month':
              unit = 'day'
              time *= 30
          if unit == 'month':
              unit = 'day'
              time *= 30
@@ -717,7 +802,8 @@ def date_from_str(date_str):
          delta = datetime.timedelta(**{unit: time})
          return today + delta
      return datetime.datetime.strptime(date_str, "%Y%m%d").date()
          delta = datetime.timedelta(**{unit: time})
          return today + delta
      return datetime.datetime.strptime(date_str, "%Y%m%d").date()
-    
+
+
  def hyphenate_date(date_str):
      """
      Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
  def hyphenate_date(date_str):
      """
      Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
@@ -727,8 +813,10 @@ def hyphenate_date(date_str):
      else:
          return date_str
  
      else:
          return date_str
  
+
  class DateRange(object):
      """Represents a time interval between two dates"""
  class DateRange(object):
      """Represents a time interval between two dates"""
+
      def __init__(self, start=None, end=None):
          """start and end must be strings in the format accepted by date"""
          if start is not None:
      def __init__(self, start=None, end=None):
          """start and end must be strings in the format accepted by date"""
          if start is not None:
@@ -741,17 +829,20 @@ class DateRange(object):
              self.end = datetime.datetime.max.date()
          if self.start > self.end:
              raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
              self.end = datetime.datetime.max.date()
          if self.start > self.end:
              raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
      @classmethod
      def day(cls, day):
          """Returns a range that only contains the given day"""
      @classmethod
      def day(cls, day):
          """Returns a range that only contains the given day"""
-        return cls(day,day)
+        return cls(day, day)
+
      def __contains__(self, date):
          """Check if the date is in the range"""
          if not isinstance(date, datetime.date):
              date = date_from_str(date)
          return self.start <= date <= self.end
      def __contains__(self, date):
          """Check if the date is in the range"""
          if not isinstance(date, datetime.date):
              date = date_from_str(date)
          return self.start <= date <= self.end
+
      def __str__(self):
      def __str__(self):
-        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+        return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
  
  
  def platform_name():
  
  
  def platform_name():
@@ -782,34 +873,37 @@ def _windows_write_string(s, out):
      except AttributeError:
          # If the output stream doesn't have a fileno, it's virtual
          return False
      except AttributeError:
          # If the output stream doesn't have a fileno, it's virtual
          return False
+    except io.UnsupportedOperation:
+        # Some strange Windows pseudo files?
+        return False
      if fileno not in WIN_OUTPUT_IDS:
          return False
  
      GetStdHandle = ctypes.WINFUNCTYPE(
          ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
      if fileno not in WIN_OUTPUT_IDS:
          return False
  
      GetStdHandle = ctypes.WINFUNCTYPE(
          ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
-        ("GetStdHandle", ctypes.windll.kernel32))
+        (b"GetStdHandle", ctypes.windll.kernel32))
      h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
  
      WriteConsoleW = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
          ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
      h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
  
      WriteConsoleW = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
          ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
-        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
+        ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
      written = ctypes.wintypes.DWORD(0)
  
      written = ctypes.wintypes.DWORD(0)
  
-    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
      FILE_TYPE_CHAR = 0x0002
      FILE_TYPE_REMOTE = 0x8000
      GetConsoleMode = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
          ctypes.POINTER(ctypes.wintypes.DWORD))(
      FILE_TYPE_CHAR = 0x0002
      FILE_TYPE_REMOTE = 0x8000
      GetConsoleMode = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
          ctypes.POINTER(ctypes.wintypes.DWORD))(
-        ("GetConsoleMode", ctypes.windll.kernel32))
+        (b"GetConsoleMode", ctypes.windll.kernel32))
      INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
  
      def not_a_console(handle):
          if handle == INVALID_HANDLE_VALUE or handle is None:
              return True
      INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
  
      def not_a_console(handle):
          if handle == INVALID_HANDLE_VALUE or handle is None:
              return True
-        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
-                or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
+                GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
  
      if not_a_console(h):
          return False
  
      if not_a_console(h):
          return False
@@ -1003,7 +1097,7 @@ def smuggle_url(url, data):
  
  
  def unsmuggle_url(smug_url, default=None):
  
  
  def unsmuggle_url(smug_url, default=None):
-    if not '#__youtubedl_smuggle' in smug_url:
+    if '#__youtubedl_smuggle' not in smug_url:
          return smug_url, default
      url, _, sdata = smug_url.rpartition('#')
      jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
          return smug_url, default
      url, _, sdata = smug_url.rpartition('#')
      jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
@@ -1025,6 +1119,60 @@ def format_bytes(bytes):
      return '%.2f%s' % (converted, suffix)
  
  
      return '%.2f%s' % (converted, suffix)
  
  
+def parse_filesize(s):
+    if s is None:
+        return None
+
+    # The lower-case forms are of course incorrect and inofficial,
+    # but we support those too
+    _UNIT_TABLE = {
+        'B': 1,
+        'b': 1,
+        'KiB': 1024,
+        'KB': 1000,
+        'kB': 1024,
+        'Kb': 1000,
+        'MiB': 1024 ** 2,
+        'MB': 1000 ** 2,
+        'mB': 1024 ** 2,
+        'Mb': 1000 ** 2,
+        'GiB': 1024 ** 3,
+        'GB': 1000 ** 3,
+        'gB': 1024 ** 3,
+        'Gb': 1000 ** 3,
+        'TiB': 1024 ** 4,
+        'TB': 1000 ** 4,
+        'tB': 1024 ** 4,
+        'Tb': 1000 ** 4,
+        'PiB': 1024 ** 5,
+        'PB': 1000 ** 5,
+        'pB': 1024 ** 5,
+        'Pb': 1000 ** 5,
+        'EiB': 1024 ** 6,
+        'EB': 1000 ** 6,
+        'eB': 1024 ** 6,
+        'Eb': 1000 ** 6,
+        'ZiB': 1024 ** 7,
+        'ZB': 1000 ** 7,
+        'zB': 1024 ** 7,
+        'Zb': 1000 ** 7,
+        'YiB': 1024 ** 8,
+        'YB': 1000 ** 8,
+        'yB': 1024 ** 8,
+        'Yb': 1000 ** 8,
+    }
+
+    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
+    m = re.match(
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+    if not m:
+        return None
+
+    num_str = m.group('num').replace(',', '.')
+    mult = _UNIT_TABLE[m.group('unit')]
+    return int(float(num_str) * mult)
+
+
  def get_term_width():
      columns = compat_getenv('COLUMNS', None)
      if columns:
  def get_term_width():
      columns = compat_getenv('COLUMNS', None)
      if columns:
@@ -1044,11 +1192,18 @@ def get_term_width():
  def month_by_name(name):
      """ Return the number of a month by (locale-independently) English name """
  
  def month_by_name(name):
      """ Return the number of a month by (locale-independently) English name """
  
-    ENGLISH_NAMES = [
-        'January', 'February', 'March', 'April', 'May', 'June',
-        'July', 'August', 'September', 'October', 'November', 'December']
      try:
      try:
-        return ENGLISH_NAMES.index(name) + 1
+        return ENGLISH_MONTH_NAMES.index(name) + 1
+    except ValueError:
+        return None
+
+
+def month_by_abbreviation(abbrev):
+    """ Return the number of a month by (locale-independently) English
+        abbreviations """
+
+    try:
+        return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
      except ValueError:
          return None
  
      except ValueError:
          return None
  
@@ -1124,32 +1279,53 @@ def float_or_none(v, scale=1, invscale=1, default=None):
  
  
  def parse_duration(s):
  
  
  def parse_duration(s):
-    if s is None:
+    if not isinstance(s, compat_basestring):
          return None
  
      s = s.strip()
  
      m = re.match(
          return None
  
      s = s.strip()
  
      m = re.match(
-        r'''(?ix)T?
+        r'''(?ix)(?:P?T)?
+        (?:
+            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
+            \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
              (?:
              (?:
-                (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
+                (?:
+                    (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
+                    (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
+                )?
                  (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
              )?
                  (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
              )?
-            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
+            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+        )$''', s)
      if not m:
          return None
      if not m:
          return None
-    res = int(m.group('secs'))
+    res = 0
+    if m.group('only_mins'):
+        return float_or_none(m.group('only_mins'), invscale=60)
+    if m.group('only_hours'):
+        return float_or_none(m.group('only_hours'), invscale=60 * 60)
+    if m.group('secs'):
+        res += int(m.group('secs'))
+    if m.group('mins_reversed'):
+        res += int(m.group('mins_reversed')) * 60
      if m.group('mins'):
          res += int(m.group('mins')) * 60
      if m.group('mins'):
          res += int(m.group('mins')) * 60
-        if m.group('hours'):
-            res += int(m.group('hours')) * 60 * 60
+    if m.group('hours'):
+        res += int(m.group('hours')) * 60 * 60
+    if m.group('hours_reversed'):
+        res += int(m.group('hours_reversed')) * 60 * 60
+    if m.group('days'):
+        res += int(m.group('days')) * 24 * 60 * 60
      if m.group('ms'):
          res += float(m.group('ms'))
      return res
  
  
  def prepend_extension(filename, ext):
      if m.group('ms'):
          res += float(m.group('ms'))
      return res
  
  
  def prepend_extension(filename, ext):
-    name, real_ext = os.path.splitext(filename) 
+    name, real_ext = os.path.splitext(filename)
      return '{0}.{1}{2}'.format(name, ext, real_ext)
  
  
      return '{0}.{1}{2}'.format(name, ext, real_ext)
  
  
@@ -1164,18 +1340,25 @@ def check_executable(exe, args=[]):
  
  
  def get_exe_version(exe, args=['--version'],
  
  
  def get_exe_version(exe, args=['--version'],
-                    version_re=r'version\s+([0-9._-a-zA-Z]+)',
-                    unrecognized='present'):
+                    version_re=None, unrecognized='present'):
      """ Returns the version of the specified executable,
      or False if the executable is not present """
      try:
      """ Returns the version of the specified executable,
      or False if the executable is not present """
      try:
-        out, err = subprocess.Popen(
+        out, _ = subprocess.Popen(
              [exe] + args,
              stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
      except OSError:
          return False
              [exe] + args,
              stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
      except OSError:
          return False
-    firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
-    m = re.search(version_re, firstline)
+    if isinstance(out, bytes):  # Python 2.x
+        out = out.decode('ascii', 'ignore')
+    return detect_exe_version(out, version_re, unrecognized)
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+    assert isinstance(output, compat_str)
+    if version_re is None:
+        version_re = r'version\s+([-0-9._a-zA-Z]+)'
+    m = re.search(version_re, output)
      if m:
          return m.group(1)
      else:
      if m:
          return m.group(1)
      else:
@@ -1270,7 +1453,7 @@ def uppercase_escape(s):
  
  def escape_rfc3986(s):
      """Escape non-ASCII characters as suggested by RFC 3986"""
  
  def escape_rfc3986(s):
      """Escape non-ASCII characters as suggested by RFC 3986"""
-    if sys.version_info < (3, 0) and isinstance(s, unicode):
+    if sys.version_info < (3, 0) and isinstance(s, compat_str):
          s = s.encode('utf-8')
      return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
  
          s = s.encode('utf-8')
      return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
  
@@ -1384,9 +1567,9 @@ def js_to_json(code):
          return '"%s"' % v
  
      res = re.sub(r'''(?x)
          return '"%s"' % v
  
      res = re.sub(r'''(?x)
-        "(?:[^"\\]*(?:\\\\|\\")?)*"|
-        '(?:[^'\\]*(?:\\\\|\\')?)*'|
-        [a-zA-Z_][a-zA-Z_0-9]*
+        "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
+        '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
+        [a-zA-Z_][.a-zA-Z_0-9]*
          ''', fix_kv, code)
      res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
      return res
          ''', fix_kv, code)
      res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
      return res
@@ -1416,7 +1599,7 @@ def limit_length(s, length):
  
  
  def version_tuple(v):
  
  
  def version_tuple(v):
-    return [int(e) for e in v.split('.')]
+    return tuple(int(e) for e in re.split(r'[-.]', v))
  
  
  def is_outdated_version(version, limit, assume_new=True):
  
  
  def is_outdated_version(version, limit, assume_new=True):
@@ -1433,3 +1616,171 @@ def ytdl_is_updateable():
      from zipimport import zipimporter
  
      return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
      from zipimport import zipimporter
  
      return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+    # Get a short string representation for a subprocess command
+    return ' '.join(shlex_quote(a) for a in args)
+
+
+def mimetype2ext(mt):
+    _, _, res = mt.rpartition('/')
+
+    return {
+        'x-ms-wmv': 'wmv',
+        'x-mp4-fragmented': 'mp4',
+    }.get(res, res)
+
+
+def urlhandle_detect_ext(url_handle):
+    try:
+        url_handle.headers
+        getheader = lambda h: url_handle.headers[h]
+    except AttributeError:  # Python < 3
+        getheader = url_handle.info().getheader
+
+    cd = getheader('Content-Disposition')
+    if cd:
+        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+        if m:
+            e = determine_ext(m.group('filename'), default_ext=None)
+            if e:
+                return e
+
+    return mimetype2ext(getheader('Content-Type'))
+
+
+def age_restricted(content_limit, age_limit):
+    """ Returns True iff the content should be blocked """
+
+    if age_limit is None:  # No limit set
+        return False
+    if content_limit is None:
+        return False  # Content available for everyone
+    return age_limit < content_limit
+
+
+def is_html(first_bytes):
+    """ Detect whether a file contains HTML by examining its first bytes. """
+
+    BOMS = [
+        (b'\xef\xbb\xbf', 'utf-8'),
+        (b'\x00\x00\xfe\xff', 'utf-32-be'),
+        (b'\xff\xfe\x00\x00', 'utf-32-le'),
+        (b'\xff\xfe', 'utf-16-le'),
+        (b'\xfe\xff', 'utf-16-be'),
+    ]
+    for bom, enc in BOMS:
+        if first_bytes.startswith(bom):
+            s = first_bytes[len(bom):].decode(enc, 'replace')
+            break
+    else:
+        s = first_bytes.decode('utf-8', 'replace')
+
+    return re.match(r'^\s*<', s)
+
+
+def determine_protocol(info_dict):
+    protocol = info_dict.get('protocol')
+    if protocol is not None:
+        return protocol
+
+    url = info_dict['url']
+    if url.startswith('rtmp'):
+        return 'rtmp'
+    elif url.startswith('mms'):
+        return 'mms'
+    elif url.startswith('rtsp'):
+        return 'rtsp'
+
+    ext = determine_ext(url)
+    if ext == 'm3u8':
+        return 'm3u8'
+    elif ext == 'f4m':
+        return 'f4m'
+
+    return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+    """ Render a list of rows, each as a list of values """
+    table = [header_row] + data
+    max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+    format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+    return '\n'.join(format_str % tuple(row) for row in table)
+
+
+def _match_one(filter_part, dct):
+    COMPARISON_OPERATORS = {
+        '<': operator.lt,
+        '<=': operator.le,
+        '>': operator.gt,
+        '>=': operator.ge,
+        '=': operator.eq,
+        '!=': operator.ne,
+    }
+    operator_rex = re.compile(r'''(?x)\s*
+        (?P<key>[a-z_]+)
+        \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+        (?:
+            (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+            (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+        )
+        \s*$
+        ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
+    m = operator_rex.search(filter_part)
+    if m:
+        op = COMPARISON_OPERATORS[m.group('op')]
+        if m.group('strval') is not None:
+            if m.group('op') not in ('=', '!='):
+                raise ValueError(
+                    'Operator %s does not support string values!' % m.group('op'))
+            comparison_value = m.group('strval')
+        else:
+            try:
+                comparison_value = int(m.group('intval'))
+            except ValueError:
+                comparison_value = parse_filesize(m.group('intval'))
+                if comparison_value is None:
+                    comparison_value = parse_filesize(m.group('intval') + 'B')
+                if comparison_value is None:
+                    raise ValueError(
+                        'Invalid integer value %r in filter part %r' % (
+                            m.group('intval'), filter_part))
+        actual_value = dct.get(m.group('key'))
+        if actual_value is None:
+            return m.group('none_inclusive')
+        return op(actual_value, comparison_value)
+
+    UNARY_OPERATORS = {
+        '': lambda v: v is not None,
+        '!': lambda v: v is None,
+    }
+    operator_rex = re.compile(r'''(?x)\s*
+        (?P<op>%s)\s*(?P<key>[a-z_]+)
+        \s*$
+        ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
+    m = operator_rex.search(filter_part)
+    if m:
+        op = UNARY_OPERATORS[m.group('op')]
+        actual_value = dct.get(m.group('key'))
+        return op(actual_value)
+
+    raise ValueError('Invalid filter part %r' % filter_part)
+
+
+def match_str(filter_str, dct):
+    """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
+
+    return all(
+        _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+
+
+def match_filter_func(filter_str):
+    def _match_func(info_dict):
+        if match_str(filter_str, info_dict):
+            return None
+        else:
+            video_title = info_dict.get('title', info_dict.get('id', 'video'))
+            return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
+    return _match_func