Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import errno
   5 import gzip
   6 import io
   7 import json
   8 import locale
   9 import os
  10 import re
  11 import sys
  12 import traceback
  13 import zlib
  14 import email.utils
  15 import socket
  16 import datetime
  17
  18 try:
  19     import urllib.request as compat_urllib_request
  20 except ImportError: # Python 2
  21     import urllib2 as compat_urllib_request
  22
  23 try:
  24     import urllib.error as compat_urllib_error
  25 except ImportError: # Python 2
  26     import urllib2 as compat_urllib_error
  27
  28 try:
  29     import urllib.parse as compat_urllib_parse
  30 except ImportError: # Python 2
  31     import urllib as compat_urllib_parse
  32
  33 try:
  34     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  35 except ImportError: # Python 2
  36     from urlparse import urlparse as compat_urllib_parse_urlparse
  37
  38 try:
  39     import urllib.parse as compat_urlparse
  40 except ImportError: # Python 2
  41     import urlparse as compat_urlparse
  42
  43 try:
  44     import http.cookiejar as compat_cookiejar
  45 except ImportError: # Python 2
  46     import cookielib as compat_cookiejar
  47
  48 try:
  49     import html.entities as compat_html_entities
  50 except ImportError: # Python 2
  51     import htmlentitydefs as compat_html_entities
  52
  53 try:
  54     import html.parser as compat_html_parser
  55 except ImportError: # Python 2
  56     import HTMLParser as compat_html_parser
  57
  58 try:
  59     import http.client as compat_http_client
  60 except ImportError: # Python 2
  61     import httplib as compat_http_client
  62
  63 try:
  64     from subprocess import DEVNULL
  65     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  66 except ImportError:
  67     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  68
  69 try:
  70     from urllib.parse import parse_qs as compat_parse_qs
  71 except ImportError: # Python 2
  72     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
  73     # Python 2's version is apparently totally broken
  74     def _unquote(string, encoding='utf-8', errors='replace'):
  75         if string == '':
  76             return string
  77         res = string.split('%')
  78         if len(res) == 1:
  79             return string
  80         if encoding is None:
  81             encoding = 'utf-8'
  82         if errors is None:
  83             errors = 'replace'
  84         # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
  85         pct_sequence = b''
  86         string = res[0]
  87         for item in res[1:]:
  88             try:
  89                 if not item:
  90                     raise ValueError
  91                 pct_sequence += item[:2].decode('hex')
  92                 rest = item[2:]
  93                 if not rest:
  94                     # This segment was just a single percent-encoded character.
  95                     # May be part of a sequence of code units, so delay decoding.
  96                     # (Stored in pct_sequence).
  97                     continue
  98             except ValueError:
  99                 rest = '%' + item
 100             # Encountered non-percent-encoded characters. Flush the current
 101             # pct_sequence.
 102             string += pct_sequence.decode(encoding, errors) + rest
 103             pct_sequence = b''
 104         if pct_sequence:
 105             # Flush the final pct_sequence
 106             string += pct_sequence.decode(encoding, errors)
 107         return string
 108
 109     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 110                 encoding='utf-8', errors='replace'):
 111         qs, _coerce_result = qs, unicode
 112         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 113         r = []
 114         for name_value in pairs:
 115             if not name_value and not strict_parsing:
 116                 continue
 117             nv = name_value.split('=', 1)
 118             if len(nv) != 2:
 119                 if strict_parsing:
 120                     raise ValueError("bad query field: %r" % (name_value,))
 121                 # Handle case of a control-name with no equal sign
 122                 if keep_blank_values:
 123                     nv.append('')
 124                 else:
 125                     continue
 126             if len(nv[1]) or keep_blank_values:
 127                 name = nv[0].replace('+', ' ')
 128                 name = _unquote(name, encoding=encoding, errors=errors)
 129                 name = _coerce_result(name)
 130                 value = nv[1].replace('+', ' ')
 131                 value = _unquote(value, encoding=encoding, errors=errors)
 132                 value = _coerce_result(value)
 133                 r.append((name, value))
 134         return r
 135
 136     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 137                 encoding='utf-8', errors='replace'):
 138         parsed_result = {}
 139         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 140                         encoding=encoding, errors=errors)
 141         for name, value in pairs:
 142             if name in parsed_result:
 143                 parsed_result[name].append(value)
 144             else:
 145                 parsed_result[name] = [value]
 146         return parsed_result
 147
 148 try:
 149     compat_str = unicode # Python 2
 150 except NameError:
 151     compat_str = str
 152
 153 try:
 154     compat_chr = unichr # Python 2
 155 except NameError:
 156     compat_chr = chr
 157
 158 def compat_ord(c):
 159     if type(c) is int: return c
 160     else: return ord(c)
 161
 162 # This is not clearly defined otherwise
 163 compiled_regex_type = type(re.compile(''))
 164
 165 std_headers = {
 166     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
 167     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 168     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 169     'Accept-Encoding': 'gzip, deflate',
 170     'Accept-Language': 'en-us,en;q=0.5',
 171 }
 172
 173 def preferredencoding():
 174     """Get preferred encoding.
 175
 176     Returns the best encoding scheme for the system, based on
 177     locale.getpreferredencoding() and some further tweaks.
 178     """
 179     try:
 180         pref = locale.getpreferredencoding()
 181         u'TEST'.encode(pref)
 182     except:
 183         pref = 'UTF-8'
 184
 185     return pref
 186
 187 if sys.version_info < (3,0):
 188     def compat_print(s):
 189         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 190 else:
 191     def compat_print(s):
 192         assert type(s) == type(u'')
 193         print(s)
 194
 195 # In Python 2.x, json.dump expects a bytestream.
 196 # In Python 3.x, it writes to a character stream
 197 if sys.version_info < (3,0):
 198     def write_json_file(obj, fn):
 199         with open(fn, 'wb') as f:
 200             json.dump(obj, f)
 201 else:
 202     def write_json_file(obj, fn):
 203         with open(fn, 'w', encoding='utf-8') as f:
 204             json.dump(obj, f)
 205
 206 if sys.version_info >= (2,7):
 207     def find_xpath_attr(node, xpath, key, val):
 208         """ Find the xpath xpath[@key=val] """
 209         assert re.match(r'^[a-zA-Z]+$', key)
 210         assert re.match(r'^[a-zA-Z@\s]*$', val)
 211         expr = xpath + u"[@%s='%s']" % (key, val)
 212         return node.find(expr)
 213 else:
 214     def find_xpath_attr(node, xpath, key, val):
 215         for f in node.findall(xpath):
 216             if f.attrib.get(key) == val:
 217                 return f
 218         return None
 219
 220 def htmlentity_transform(matchobj):
 221     """Transforms an HTML entity to a character.
 222
 223     This function receives a match object and is intended to be used with
 224     the re.sub() function.
 225     """
 226     entity = matchobj.group(1)
 227
 228     # Known non-numeric HTML entity
 229     if entity in compat_html_entities.name2codepoint:
 230         return compat_chr(compat_html_entities.name2codepoint[entity])
 231
 232     mobj = re.match(u'(?u)#(x?\\d+)', entity)
 233     if mobj is not None:
 234         numstr = mobj.group(1)
 235         if numstr.startswith(u'x'):
 236             base = 16
 237             numstr = u'0%s' % numstr
 238         else:
 239             base = 10
 240         return compat_chr(int(numstr, base))
 241
 242     # Unknown entity in name, return its literal representation
 243     return (u'&%s;' % entity)
 244
 245 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 246 class AttrParser(compat_html_parser.HTMLParser):
 247     """Modified HTMLParser that isolates a tag with the specified attribute"""
 248     def __init__(self, attribute, value):
 249         self.attribute = attribute
 250         self.value = value
 251         self.result = None
 252         self.started = False
 253         self.depth = {}
 254         self.html = None
 255         self.watch_startpos = False
 256         self.error_count = 0
 257         compat_html_parser.HTMLParser.__init__(self)
 258
 259     def error(self, message):
 260         if self.error_count > 10 or self.started:
 261             raise compat_html_parser.HTMLParseError(message, self.getpos())
 262         self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
 263         self.error_count += 1
 264         self.goahead(1)
 265
 266     def loads(self, html):
 267         self.html = html
 268         self.feed(html)
 269         self.close()
 270
 271     def handle_starttag(self, tag, attrs):
 272         attrs = dict(attrs)
 273         if self.started:
 274             self.find_startpos(None)
 275         if self.attribute in attrs and attrs[self.attribute] == self.value:
 276             self.result = [tag]
 277             self.started = True
 278             self.watch_startpos = True
 279         if self.started:
 280             if not tag in self.depth: self.depth[tag] = 0
 281             self.depth[tag] += 1
 282
 283     def handle_endtag(self, tag):
 284         if self.started:
 285             if tag in self.depth: self.depth[tag] -= 1
 286             if self.depth[self.result[0]] == 0:
 287                 self.started = False
 288                 self.result.append(self.getpos())
 289
 290     def find_startpos(self, x):
 291         """Needed to put the start position of the result (self.result[1])
 292         after the opening tag with the requested id"""
 293         if self.watch_startpos:
 294             self.watch_startpos = False
 295             self.result.append(self.getpos())
 296     handle_entityref = handle_charref = handle_data = handle_comment = \
 297     handle_decl = handle_pi = unknown_decl = find_startpos
 298
 299     def get_result(self):
 300         if self.result is None:
 301             return None
 302         if len(self.result) != 3:
 303             return None
 304         lines = self.html.split('\n')
 305         lines = lines[self.result[1][0]-1:self.result[2][0]]
 306         lines[0] = lines[0][self.result[1][1]:]
 307         if len(lines) == 1:
 308             lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
 309         lines[-1] = lines[-1][:self.result[2][1]]
 310         return '\n'.join(lines).strip()
 311 # Hack for https://github.com/rg3/youtube-dl/issues/662
 312 if sys.version_info < (2, 7, 3):
 313     AttrParser.parse_endtag = (lambda self, i:
 314         i + len("</scr'+'ipt>")
 315         if self.rawdata[i:].startswith("</scr'+'ipt>")
 316         else compat_html_parser.HTMLParser.parse_endtag(self, i))
 317
 318 def get_element_by_id(id, html):
 319     """Return the content of the tag with the specified ID in the passed HTML document"""
 320     return get_element_by_attribute("id", id, html)
 321
 322 def get_element_by_attribute(attribute, value, html):
 323     """Return the content of the tag with the specified attribute in the passed HTML document"""
 324     parser = AttrParser(attribute, value)
 325     try:
 326         parser.loads(html)
 327     except compat_html_parser.HTMLParseError:
 328         pass
 329     return parser.get_result()
 330
 331
 332 def clean_html(html):
 333     """Clean an HTML snippet into a readable string"""
 334     # Newline vs <br />
 335     html = html.replace('\n', ' ')
 336     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 337     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 338     # Strip html tags
 339     html = re.sub('<.*?>', '', html)
 340     # Replace html entities
 341     html = unescapeHTML(html)
 342     return html.strip()
 343
 344
 345 def sanitize_open(filename, open_mode):
 346     """Try to open the given filename, and slightly tweak it if this fails.
 347
 348     Attempts to open the given filename. If this fails, it tries to change
 349     the filename slightly, step by step, until it's either able to open it
 350     or it fails and raises a final exception, like the standard open()
 351     function.
 352
 353     It returns the tuple (stream, definitive_file_name).
 354     """
 355     try:
 356         if filename == u'-':
 357             if sys.platform == 'win32':
 358                 import msvcrt
 359                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 360             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 361         stream = open(encodeFilename(filename), open_mode)
 362         return (stream, filename)
 363     except (IOError, OSError) as err:
 364         if err.errno in (errno.EACCES,):
 365             raise
 366
 367         # In case of error, try to remove win32 forbidden chars
 368         alt_filename = os.path.join(
 369                         re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
 370                         for path_part in os.path.split(filename)
 371                        )
 372         if alt_filename == filename:
 373             raise
 374         else:
 375             # An exception here should be caught in the caller
 376             stream = open(encodeFilename(filename), open_mode)
 377             return (stream, alt_filename)
 378
 379
 380 def timeconvert(timestr):
 381     """Convert RFC 2822 defined time string into system timestamp"""
 382     timestamp = None
 383     timetuple = email.utils.parsedate_tz(timestr)
 384     if timetuple is not None:
 385         timestamp = email.utils.mktime_tz(timetuple)
 386     return timestamp
 387
 388 def sanitize_filename(s, restricted=False, is_id=False):
 389     """Sanitizes a string so it could be used as part of a filename.
 390     If restricted is set, use a stricter subset of allowed characters.
 391     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 392     """
 393     def replace_insane(char):
 394         if char == '?' or ord(char) < 32 or ord(char) == 127:
 395             return ''
 396         elif char == '"':
 397             return '' if restricted else '\''
 398         elif char == ':':
 399             return '_-' if restricted else ' -'
 400         elif char in '\\/|*<>':
 401             return '_'
 402         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 403             return '_'
 404         if restricted and ord(char) > 127:
 405             return '_'
 406         return char
 407
 408     result = u''.join(map(replace_insane, s))
 409     if not is_id:
 410         while '__' in result:
 411             result = result.replace('__', '_')
 412         result = result.strip('_')
 413         # Common case of "Foreign band name - English song title"
 414         if restricted and result.startswith('-_'):
 415             result = result[2:]
 416         if not result:
 417             result = '_'
 418     return result
 419
 420 def orderedSet(iterable):
 421     """ Remove all duplicates from the input iterable """
 422     res = []
 423     for el in iterable:
 424         if el not in res:
 425             res.append(el)
 426     return res
 427
 428 def unescapeHTML(s):
 429     """
 430     @param s a string
 431     """
 432     assert type(s) == type(u'')
 433
 434     result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
 435     return result
 436
 437 def encodeFilename(s):
 438     """
 439     @param s The name of the file
 440     """
 441
 442     assert type(s) == type(u'')
 443
 444     # Python 3 has a Unicode API
 445     if sys.version_info >= (3, 0):
 446         return s
 447
 448     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 449         # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 450         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 451         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 452         return s
 453     else:
 454         encoding = sys.getfilesystemencoding()
 455         if encoding is None:
 456             encoding = 'utf-8'
 457         return s.encode(encoding, 'ignore')
 458
 459 def decodeOption(optval):
 460     if optval is None:
 461         return optval
 462     if isinstance(optval, bytes):
 463         optval = optval.decode(preferredencoding())
 464
 465     assert isinstance(optval, compat_str)
 466     return optval
 467
 468 def formatSeconds(secs):
 469     if secs > 3600:
 470         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 471     elif secs > 60:
 472         return '%d:%02d' % (secs // 60, secs % 60)
 473     else:
 474         return '%d' % secs
 475
 476 def make_HTTPS_handler(opts):
 477     if sys.version_info < (3,2):
 478         # Python's 2.x handler is very simplistic
 479         return compat_urllib_request.HTTPSHandler()
 480     else:
 481         import ssl
 482         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 483         context.set_default_verify_paths()
 484
 485         context.verify_mode = (ssl.CERT_NONE
 486                                if opts.no_check_certificate
 487                                else ssl.CERT_REQUIRED)
 488         return compat_urllib_request.HTTPSHandler(context=context)
 489
 490 class ExtractorError(Exception):
 491     """Error during info extraction."""
 492     def __init__(self, msg, tb=None, expected=False):
 493         """ tb, if given, is the original traceback (so that it can be printed out).
 494         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 495         """
 496
 497         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 498             expected = True
 499         if not expected:
 500             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
 501         super(ExtractorError, self).__init__(msg)
 502
 503         self.traceback = tb
 504         self.exc_info = sys.exc_info()  # preserve original exception
 505
 506     def format_traceback(self):
 507         if self.traceback is None:
 508             return None
 509         return u''.join(traceback.format_tb(self.traceback))
 510
 511
 512 class DownloadError(Exception):
 513     """Download Error exception.
 514
 515     This exception may be thrown by FileDownloader objects if they are not
 516     configured to continue on errors. They will contain the appropriate
 517     error message.
 518     """
 519     def __init__(self, msg, exc_info=None):
 520         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 521         super(DownloadError, self).__init__(msg)
 522         self.exc_info = exc_info
 523
 524
 525 class SameFileError(Exception):
 526     """Same File exception.
 527
 528     This exception will be thrown by FileDownloader objects if they detect
 529     multiple files would have to be downloaded to the same file on disk.
 530     """
 531     pass
 532
 533
 534 class PostProcessingError(Exception):
 535     """Post Processing exception.
 536
 537     This exception may be raised by PostProcessor's .run() method to
 538     indicate an error in the postprocessing task.
 539     """
 540     def __init__(self, msg):
 541         self.msg = msg
 542
 543 class MaxDownloadsReached(Exception):
 544     """ --max-downloads limit has been reached. """
 545     pass
 546
 547
 548 class UnavailableVideoError(Exception):
 549     """Unavailable Format exception.
 550
 551     This exception will be thrown when a video is requested
 552     in a format that is not available for that video.
 553     """
 554     pass
 555
 556
 557 class ContentTooShortError(Exception):
 558     """Content Too Short exception.
 559
 560     This exception may be raised by FileDownloader objects when a file they
 561     download is too small for what the server announced first, indicating
 562     the connection was probably interrupted.
 563     """
 564     # Both in bytes
 565     downloaded = None
 566     expected = None
 567
 568     def __init__(self, downloaded, expected):
 569         self.downloaded = downloaded
 570         self.expected = expected
 571
 572 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 573     """Handler for HTTP requests and responses.
 574
 575     This class, when installed with an OpenerDirector, automatically adds
 576     the standard headers to every HTTP request and handles gzipped and
 577     deflated responses from web servers. If compression is to be avoided in
 578     a particular request, the original request in the program code only has
 579     to include the HTTP header "Youtubedl-No-Compression", which will be
 580     removed before making the real request.
 581
 582     Part of this code was copied from:
 583
 584     http://techknack.net/python-urllib2-handlers/
 585
 586     Andrew Rowls, the author of that code, agreed to release it to the
 587     public domain.
 588     """
 589
 590     @staticmethod
 591     def deflate(data):
 592         try:
 593             return zlib.decompress(data, -zlib.MAX_WBITS)
 594         except zlib.error:
 595             return zlib.decompress(data)
 596
 597     @staticmethod
 598     def addinfourl_wrapper(stream, headers, url, code):
 599         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 600             return compat_urllib_request.addinfourl(stream, headers, url, code)
 601         ret = compat_urllib_request.addinfourl(stream, headers, url)
 602         ret.code = code
 603         return ret
 604
 605     def http_request(self, req):
 606         for h,v in std_headers.items():
 607             if h in req.headers:
 608                 del req.headers[h]
 609             req.add_header(h, v)
 610         if 'Youtubedl-no-compression' in req.headers:
 611             if 'Accept-encoding' in req.headers:
 612                 del req.headers['Accept-encoding']
 613             del req.headers['Youtubedl-no-compression']
 614         if 'Youtubedl-user-agent' in req.headers:
 615             if 'User-agent' in req.headers:
 616                 del req.headers['User-agent']
 617             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 618             del req.headers['Youtubedl-user-agent']
 619         return req
 620
 621     def http_response(self, req, resp):
 622         old_resp = resp
 623         # gzip
 624         if resp.headers.get('Content-encoding', '') == 'gzip':
 625             gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
 626             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 627             resp.msg = old_resp.msg
 628         # deflate
 629         if resp.headers.get('Content-encoding', '') == 'deflate':
 630             gz = io.BytesIO(self.deflate(resp.read()))
 631             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 632             resp.msg = old_resp.msg
 633         return resp
 634
 635     https_request = http_request
 636     https_response = http_response
 637
 638 def unified_strdate(date_str):
 639     """Return a string with the date in the format YYYYMMDD"""
 640     upload_date = None
 641     #Replace commas
 642     date_str = date_str.replace(',',' ')
 643     # %z (UTC offset) is only supported in python>=3.2
 644     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
 645     format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
 646     for expression in format_expressions:
 647         try:
 648             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 649         except:
 650             pass
 651     return upload_date
 652
 653 def determine_ext(url, default_ext=u'unknown_video'):
 654     guess = url.partition(u'?')[0].rpartition(u'.')[2]
 655     if re.match(r'^[A-Za-z0-9]+$', guess):
 656         return guess
 657     else:
 658         return default_ext
 659
 660 def date_from_str(date_str):
 661     """
 662     Return a datetime object from a string in the format YYYYMMDD or
 663     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 664     today = datetime.date.today()
 665     if date_str == 'now'or date_str == 'today':
 666         return today
 667     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 668     if match is not None:
 669         sign = match.group('sign')
 670         time = int(match.group('time'))
 671         if sign == '-':
 672             time = -time
 673         unit = match.group('unit')
 674         #A bad aproximation?
 675         if unit == 'month':
 676             unit = 'day'
 677             time *= 30
 678         elif unit == 'year':
 679             unit = 'day'
 680             time *= 365
 681         unit += 's'
 682         delta = datetime.timedelta(**{unit: time})
 683         return today + delta
 684     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 685
 686 class DateRange(object):
 687     """Represents a time interval between two dates"""
 688     def __init__(self, start=None, end=None):
 689         """start and end must be strings in the format accepted by date"""
 690         if start is not None:
 691             self.start = date_from_str(start)
 692         else:
 693             self.start = datetime.datetime.min.date()
 694         if end is not None:
 695             self.end = date_from_str(end)
 696         else:
 697             self.end = datetime.datetime.max.date()
 698         if self.start > self.end:
 699             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 700     @classmethod
 701     def day(cls, day):
 702         """Returns a range that only contains the given day"""
 703         return cls(day,day)
 704     def __contains__(self, date):
 705         """Check if the date is in the range"""
 706         if not isinstance(date, datetime.date):
 707             date = date_from_str(date)
 708         return self.start <= date <= self.end
 709     def __str__(self):
 710         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())