X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/eed74133841b43ce901ee0dd31c7dad5a234bd48..bdc21837cde17528bdb3555350f019701333965c:/youtube_dl/utils.py diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5be7cf9..4d3cbac 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -41,6 +41,7 @@ from .compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urlparse, + shlex_quote, ) @@ -55,6 +56,7 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } + def preferredencoding(): """Get preferred encoding. @@ -129,7 +131,7 @@ if sys.version_info >= (2, 7): """ Find the xpath xpath[@key=val] """ assert re.match(r'^[a-zA-Z-]+$', key) assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val) - expr = xpath + u"[@%s='%s']" % (key, val) + expr = xpath + "[@%s='%s']" % (key, val) return node.find(expr) else: def find_xpath_attr(node, xpath, key, val): @@ -145,6 +147,8 @@ else: # On python2.6 the xml.etree.ElementTree.Element methods don't support # the namespace parameter + + def xpath_with_ns(path, ns_map): components = [c.split(':') for c in path.split('/')] replaced = [] @@ -236,9 +240,9 @@ def sanitize_open(filename, open_mode): # In case of error, try to remove win32 forbidden chars alt_filename = os.path.join( - re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) - for path_part in os.path.split(filename) - ) + re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) + for path_part in os.path.split(filename) + ) if alt_filename == filename: raise else: @@ -255,6 +259,7 @@ def timeconvert(timestr): timestamp = email.utils.mktime_tz(timetuple) return timestamp + def sanitize_filename(s, restricted=False, is_id=False): """Sanitizes a string so it could be used as part of a filename. If restricted is set, use a stricter subset of allowed characters. @@ -287,6 +292,7 @@ def sanitize_filename(s, restricted=False, is_id=False): result = '_' return result + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] @@ -371,6 +377,7 @@ def decodeOption(optval): assert isinstance(optval, compat_str) return optval + def formatSeconds(secs): if secs > 3600: return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60) @@ -423,6 +430,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs): class ExtractorError(Exception): """Error during info extraction.""" + def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): """ tb, if given, is the original traceback (so that it can be printed out). If expected is set, this is a normal error message and most likely not a bug in youtube-dl. @@ -467,6 +475,7 @@ class DownloadError(Exception): configured to continue on errors. They will contain the appropriate error message. """ + def __init__(self, msg, exc_info=None): """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ super(DownloadError, self).__init__(msg) @@ -488,9 +497,11 @@ class PostProcessingError(Exception): This exception may be raised by PostProcessor's .run() method to indicate an error in the postprocessing task. """ + def __init__(self, msg): self.msg = msg + class MaxDownloadsReached(Exception): """ --max-downloads limit has been reached. """ pass @@ -520,6 +531,7 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected + class YoutubeDLHandler(compat_urllib_request.HTTPHandler): """Handler for HTTP requests and responses. @@ -639,7 +651,7 @@ def unified_strdate(date_str): return None upload_date = None - #Replace commas + # Replace commas date_str = date_str.replace(',', ' ') # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) @@ -680,6 +692,7 @@ def unified_strdate(date_str): upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') return upload_date + def determine_ext(url, default_ext='unknown_video'): if url is None: return default_ext @@ -689,9 +702,11 @@ def determine_ext(url, default_ext='unknown_video'): else: return default_ext + def subtitles_filename(filename, sub_lang, sub_format): return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format + def date_from_str(date_str): """ Return a datetime object from a string in the format YYYYMMDD or @@ -706,7 +721,7 @@ def date_from_str(date_str): if sign == '-': time = -time unit = match.group('unit') - #A bad aproximation? + # A bad aproximation? if unit == 'month': unit = 'day' time *= 30 @@ -717,7 +732,8 @@ def date_from_str(date_str): delta = datetime.timedelta(**{unit: time}) return today + delta return datetime.datetime.strptime(date_str, "%Y%m%d").date() - + + def hyphenate_date(date_str): """ Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" @@ -727,8 +743,10 @@ def hyphenate_date(date_str): else: return date_str + class DateRange(object): """Represents a time interval between two dates""" + def __init__(self, start=None, end=None): """start and end must be strings in the format accepted by date""" if start is not None: @@ -741,17 +759,20 @@ class DateRange(object): self.end = datetime.datetime.max.date() if self.start > self.end: raise ValueError('Date range: "%s" , the start date must be before the end date' % self) + @classmethod def day(cls, day): """Returns a range that only contains the given day""" - return cls(day,day) + return cls(day, day) + def __contains__(self, date): """Check if the date is in the range""" if not isinstance(date, datetime.date): date = date_from_str(date) return self.start <= date <= self.end + def __str__(self): - return '%s - %s' % ( self.start.isoformat(), self.end.isoformat()) + return '%s - %s' % (self.start.isoformat(), self.end.isoformat()) def platform_name(): @@ -1025,6 +1046,57 @@ def format_bytes(bytes): return '%.2f%s' % (converted, suffix) +def parse_filesize(s): + if s is None: + return None + + # The lower-case forms are of course incorrect and inofficial, + # but we support those too + _UNIT_TABLE = { + 'B': 1, + 'b': 1, + 'KiB': 1024, + 'KB': 1000, + 'kB': 1024, + 'Kb': 1000, + 'MiB': 1024 ** 2, + 'MB': 1000 ** 2, + 'mB': 1024 ** 2, + 'Mb': 1000 ** 2, + 'GiB': 1024 ** 3, + 'GB': 1000 ** 3, + 'gB': 1024 ** 3, + 'Gb': 1000 ** 3, + 'TiB': 1024 ** 4, + 'TB': 1000 ** 4, + 'tB': 1024 ** 4, + 'Tb': 1000 ** 4, + 'PiB': 1024 ** 5, + 'PB': 1000 ** 5, + 'pB': 1024 ** 5, + 'Pb': 1000 ** 5, + 'EiB': 1024 ** 6, + 'EB': 1000 ** 6, + 'eB': 1024 ** 6, + 'Eb': 1000 ** 6, + 'ZiB': 1024 ** 7, + 'ZB': 1000 ** 7, + 'zB': 1024 ** 7, + 'Zb': 1000 ** 7, + 'YiB': 1024 ** 8, + 'YB': 1000 ** 8, + 'yB': 1024 ** 8, + 'Yb': 1000 ** 8, + } + + units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE) + m = re.match(r'(?P[0-9]+(?:\.[0-9]*)?)\s*(?P%s)' % units_re, s) + if not m: + return None + + return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')]) + + def get_term_width(): columns = compat_getenv('COLUMNS', None) if columns: @@ -1149,7 +1221,7 @@ def parse_duration(s): def prepend_extension(filename, ext): - name, real_ext = os.path.splitext(filename) + name, real_ext = os.path.splitext(filename) return '{0}.{1}{2}'.format(name, ext, real_ext) @@ -1433,3 +1505,8 @@ def ytdl_is_updateable(): from zipimport import zipimporter return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen') + + +def args_to_str(args): + # Get a short string representation for a subprocess command + return ' '.join(shlex_quote(a) for a in args)