- """Content Too Short exception.
-
- This exception may be raised by FileDownloader objects when a file they
- download is too small for what the server announced first, indicating
- the connection was probably interrupted.
- """
- # Both in bytes
- downloaded = None
- expected = None
-
- def __init__(self, downloaded, expected):
- self.downloaded = downloaded
- self.expected = expected
-
-
-class Trouble(Exception):
- """Trouble helper exception
-
- This is an exception to be handled with
- FileDownloader.trouble
- """
-
-class YoutubeDLHandler(urllib2.HTTPHandler):
- """Handler for HTTP requests and responses.
-
- This class, when installed with an OpenerDirector, automatically adds
- the standard headers to every HTTP request and handles gzipped and
- deflated responses from web servers. If compression is to be avoided in
- a particular request, the original request in the program code only has
- to include the HTTP header "Youtubedl-No-Compression", which will be
- removed before making the real request.
-
- Part of this code was copied from:
-
- http://techknack.net/python-urllib2-handlers/
-
- Andrew Rowls, the author of that code, agreed to release it to the
- public domain.
- """
-
- @staticmethod
- def deflate(data):
- try:
- return zlib.decompress(data, -zlib.MAX_WBITS)
- except zlib.error:
- return zlib.decompress(data)
-
- @staticmethod
- def addinfourl_wrapper(stream, headers, url, code):
- if hasattr(urllib2.addinfourl, 'getcode'):
- return urllib2.addinfourl(stream, headers, url, code)
- ret = urllib2.addinfourl(stream, headers, url)
- ret.code = code
- return ret
-
- def http_request(self, req):
- for h in std_headers:
- if h in req.headers:
- del req.headers[h]
- req.add_header(h, std_headers[h])
- if 'Youtubedl-no-compression' in req.headers:
- if 'Accept-encoding' in req.headers:
- del req.headers['Accept-encoding']
- del req.headers['Youtubedl-no-compression']
- return req
-
- def http_response(self, req, resp):
- old_resp = resp
- # gzip
- if resp.headers.get('Content-encoding', '') == 'gzip':
- gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
- resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
- resp.msg = old_resp.msg
- # deflate
- if resp.headers.get('Content-encoding', '') == 'deflate':
- gz = StringIO.StringIO(self.deflate(resp.read()))
- resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
- resp.msg = old_resp.msg
- return resp
+ """Content Too Short exception.
+
+ This exception may be raised by FileDownloader objects when a file they
+ download is too small for what the server announced first, indicating
+ the connection was probably interrupted.
+ """
+ # Both in bytes
+ downloaded = None
+ expected = None
+
+ def __init__(self, downloaded, expected):
+ self.downloaded = downloaded
+ self.expected = expected
+
+class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+ """Handler for HTTP requests and responses.
+
+ This class, when installed with an OpenerDirector, automatically adds
+ the standard headers to every HTTP request and handles gzipped and
+ deflated responses from web servers. If compression is to be avoided in
+ a particular request, the original request in the program code only has
+ to include the HTTP header "Youtubedl-No-Compression", which will be
+ removed before making the real request.
+
+ Part of this code was copied from:
+
+ http://techknack.net/python-urllib2-handlers/
+
+ Andrew Rowls, the author of that code, agreed to release it to the
+ public domain.
+ """
+
+ @staticmethod
+ def deflate(data):
+ try:
+ return zlib.decompress(data, -zlib.MAX_WBITS)
+ except zlib.error:
+ return zlib.decompress(data)
+
+ @staticmethod
+ def addinfourl_wrapper(stream, headers, url, code):
+ if hasattr(compat_urllib_request.addinfourl, 'getcode'):
+ return compat_urllib_request.addinfourl(stream, headers, url, code)
+ ret = compat_urllib_request.addinfourl(stream, headers, url)
+ ret.code = code
+ return ret
+
+ def http_request(self, req):
+ for h,v in std_headers.items():
+ if h in req.headers:
+ del req.headers[h]
+ req.add_header(h, v)
+ if 'Youtubedl-no-compression' in req.headers:
+ if 'Accept-encoding' in req.headers:
+ del req.headers['Accept-encoding']
+ del req.headers['Youtubedl-no-compression']
+ if 'Youtubedl-user-agent' in req.headers:
+ if 'User-agent' in req.headers:
+ del req.headers['User-agent']
+ req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
+ del req.headers['Youtubedl-user-agent']
+ return req
+
+ def http_response(self, req, resp):
+ old_resp = resp
+ # gzip
+ if resp.headers.get('Content-encoding', '') == 'gzip':
+ gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
+ resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ # deflate
+ if resp.headers.get('Content-encoding', '') == 'deflate':
+ gz = io.BytesIO(self.deflate(resp.read()))
+ resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ return resp
+
+ https_request = http_request
+ https_response = http_response
+
+def unified_strdate(date_str):
+ """Return a string with the date in the format YYYYMMDD"""
+ upload_date = None
+ #Replace commas
+ date_str = date_str.replace(',',' ')
+ # %z (UTC offset) is only supported in python>=3.2
+ date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
+ format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
+ for expression in format_expressions:
+ try:
+ upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
+ except:
+ pass
+ return upload_date
+
+def determine_ext(url):
+ guess = url.partition(u'?')[0].rpartition(u'.')[2]
+ if re.match(r'^[A-Za-z0-9]+$', guess):
+ return guess
+ else:
+ return u'unknown_video'
+
+def date_from_str(date_str):
+ """
+ Return a datetime object from a string in the format YYYYMMDD or
+ (now|today)[+-][0-9](day|week|month|year)(s)?"""
+ today = datetime.date.today()
+ if date_str == 'now'or date_str == 'today':
+ return today
+ match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+ if match is not None:
+ sign = match.group('sign')
+ time = int(match.group('time'))
+ if sign == '-':
+ time = -time
+ unit = match.group('unit')
+ #A bad aproximation?
+ if unit == 'month':
+ unit = 'day'
+ time *= 30
+ elif unit == 'year':
+ unit = 'day'
+ time *= 365
+ unit += 's'
+ delta = datetime.timedelta(**{unit: time})
+ return today + delta
+ return datetime.datetime.strptime(date_str, "%Y%m%d").date()
+
+class DateRange(object):
+ """Represents a time interval between two dates"""
+ def __init__(self, start=None, end=None):
+ """start and end must be strings in the format accepted by date"""
+ if start is not None:
+ self.start = date_from_str(start)
+ else:
+ self.start = datetime.datetime.min.date()
+ if end is not None:
+ self.end = date_from_str(end)
+ else:
+ self.end = datetime.datetime.max.date()
+ if self.start > self.end:
+ raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+ @classmethod
+ def day(cls, day):
+ """Returns a range that only contains the given day"""
+ return cls(day,day)
+ def __contains__(self, date):
+ """Check if the date is in the range"""
+ if not isinstance(date, datetime.date):
+ date = date_from_str(date)
+ return self.start <= date <= self.end
+ def __str__(self):
+ return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())