+
+
+def format_bytes(bytes):
+ if bytes is None:
+ return 'N/A'
+ if type(bytes) is str:
+ bytes = float(bytes)
+ if bytes == 0.0:
+ exponent = 0
+ else:
+ exponent = int(math.log(bytes, 1024.0))
+ suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
+ converted = float(bytes) / float(1024 ** exponent)
+ return '%.2f%s' % (converted, suffix)
+
+
+def parse_filesize(s):
+ if s is None:
+ return None
+
+ # The lower-case forms are of course incorrect and inofficial,
+ # but we support those too
+ _UNIT_TABLE = {
+ 'B': 1,
+ 'b': 1,
+ 'KiB': 1024,
+ 'KB': 1000,
+ 'kB': 1024,
+ 'Kb': 1000,
+ 'MiB': 1024 ** 2,
+ 'MB': 1000 ** 2,
+ 'mB': 1024 ** 2,
+ 'Mb': 1000 ** 2,
+ 'GiB': 1024 ** 3,
+ 'GB': 1000 ** 3,
+ 'gB': 1024 ** 3,
+ 'Gb': 1000 ** 3,
+ 'TiB': 1024 ** 4,
+ 'TB': 1000 ** 4,
+ 'tB': 1024 ** 4,
+ 'Tb': 1000 ** 4,
+ 'PiB': 1024 ** 5,
+ 'PB': 1000 ** 5,
+ 'pB': 1024 ** 5,
+ 'Pb': 1000 ** 5,
+ 'EiB': 1024 ** 6,
+ 'EB': 1000 ** 6,
+ 'eB': 1024 ** 6,
+ 'Eb': 1000 ** 6,
+ 'ZiB': 1024 ** 7,
+ 'ZB': 1000 ** 7,
+ 'zB': 1024 ** 7,
+ 'Zb': 1000 ** 7,
+ 'YiB': 1024 ** 8,
+ 'YB': 1000 ** 8,
+ 'yB': 1024 ** 8,
+ 'Yb': 1000 ** 8,
+ }
+
+ units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
+ m = re.match(
+ r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+ if not m:
+ return None
+
+ num_str = m.group('num').replace(',', '.')
+ mult = _UNIT_TABLE[m.group('unit')]
+ return int(float(num_str) * mult)
+
+
+def get_term_width():
+ columns = compat_getenv('COLUMNS', None)
+ if columns:
+ return int(columns)
+
+ try:
+ sp = subprocess.Popen(
+ ['stty', 'size'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = sp.communicate()
+ return int(out.split()[1])
+ except:
+ pass
+ return None
+
+
+def month_by_name(name):
+ """ Return the number of a month by (locale-independently) English name """
+
+ ENGLISH_NAMES = [
+ 'January', 'February', 'March', 'April', 'May', 'June',
+ 'July', 'August', 'September', 'October', 'November', 'December']
+ try:
+ return ENGLISH_NAMES.index(name) + 1
+ except ValueError:
+ return None
+
+
+def fix_xml_ampersands(xml_str):
+ """Replace all the '&' by '&' in XML"""
+ return re.sub(
+ r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
+ '&',
+ xml_str)
+
+
+def setproctitle(title):
+ assert isinstance(title, compat_str)
+ try:
+ libc = ctypes.cdll.LoadLibrary("libc.so.6")
+ except OSError:
+ return
+ title_bytes = title.encode('utf-8')
+ buf = ctypes.create_string_buffer(len(title_bytes))
+ buf.value = title_bytes
+ try:
+ libc.prctl(15, buf, 0, 0, 0)
+ except AttributeError:
+ return # Strange libc, just skip this
+
+
+def remove_start(s, start):
+ if s.startswith(start):
+ return s[len(start):]
+ return s
+
+
+def remove_end(s, end):
+ if s.endswith(end):
+ return s[:-len(end)]
+ return s
+
+
+def url_basename(url):
+ path = compat_urlparse.urlparse(url).path
+ return path.strip('/').split('/')[-1]
+
+
+class HEADRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return "HEAD"
+
+
+def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
+ if get_attr:
+ if v is not None:
+ v = getattr(v, get_attr, None)
+ if v == '':
+ v = None
+ return default if v is None else (int(v) * invscale // scale)
+
+
+def str_or_none(v, default=None):
+ return default if v is None else compat_str(v)
+
+
+def str_to_int(int_str):
+ """ A more relaxed version of int_or_none """
+ if int_str is None:
+ return None
+ int_str = re.sub(r'[,\.\+]', '', int_str)
+ return int(int_str)
+
+
+def float_or_none(v, scale=1, invscale=1, default=None):
+ return default if v is None else (float(v) * invscale / scale)
+
+
+def parse_duration(s):
+ if not isinstance(s, compat_basestring):
+ return None
+
+ s = s.strip()
+
+ m = re.match(
+ r'''(?ix)(?:P?T)?
+ (?:
+ (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+ (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
+ (?:
+ (?:
+ (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
+ (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
+ )?
+ (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
+ )?
+ (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+ )$''', s)
+ if not m:
+ return None
+ res = 0
+ if m.group('only_mins'):
+ return float_or_none(m.group('only_mins'), invscale=60)
+ if m.group('only_hours'):
+ return float_or_none(m.group('only_hours'), invscale=60 * 60)
+ if m.group('secs'):
+ res += int(m.group('secs'))
+ if m.group('mins'):
+ res += int(m.group('mins')) * 60
+ if m.group('hours'):
+ res += int(m.group('hours')) * 60 * 60
+ if m.group('days'):
+ res += int(m.group('days')) * 24 * 60 * 60
+ if m.group('ms'):
+ res += float(m.group('ms'))
+ return res
+
+
+def prepend_extension(filename, ext):
+ name, real_ext = os.path.splitext(filename)
+ return '{0}.{1}{2}'.format(name, ext, real_ext)
+
+
+def check_executable(exe, args=[]):
+ """ Checks if the given binary is installed somewhere in PATH, and returns its name.
+ args can be a list of arguments for a short output (like -version) """
+ try:
+ subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+ except OSError:
+ return False
+ return exe
+
+
+def get_exe_version(exe, args=['--version'],
+ version_re=None, unrecognized='present'):
+ """ Returns the version of the specified executable,
+ or False if the executable is not present """
+ try:
+ out, _ = subprocess.Popen(
+ [exe] + args,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+ except OSError:
+ return False
+ if isinstance(out, bytes): # Python 2.x
+ out = out.decode('ascii', 'ignore')
+ return detect_exe_version(out, version_re, unrecognized)
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+ assert isinstance(output, compat_str)
+ if version_re is None:
+ version_re = r'version\s+([-0-9._a-zA-Z]+)'
+ m = re.search(version_re, output)
+ if m:
+ return m.group(1)
+ else:
+ return unrecognized
+
+
+class PagedList(object):
+ def __len__(self):
+ # This is only useful for tests
+ return len(self.getslice())
+
+
+class OnDemandPagedList(PagedList):
+ def __init__(self, pagefunc, pagesize):
+ self._pagefunc = pagefunc
+ self._pagesize = pagesize
+
+ def getslice(self, start=0, end=None):
+ res = []
+ for pagenum in itertools.count(start // self._pagesize):
+ firstid = pagenum * self._pagesize
+ nextfirstid = pagenum * self._pagesize + self._pagesize
+ if start >= nextfirstid:
+ continue
+
+ page_results = list(self._pagefunc(pagenum))
+
+ startv = (
+ start % self._pagesize
+ if firstid <= start < nextfirstid
+ else 0)
+
+ endv = (
+ ((end - 1) % self._pagesize) + 1
+ if (end is not None and firstid <= end <= nextfirstid)
+ else None)
+
+ if startv != 0 or endv is not None:
+ page_results = page_results[startv:endv]
+ res.extend(page_results)
+
+ # A little optimization - if current page is not "full", ie. does
+ # not contain page_size videos then we can assume that this page
+ # is the last one - there are no more ids on further pages -
+ # i.e. no need to query again.
+ if len(page_results) + startv < self._pagesize:
+ break
+
+ # If we got the whole page, but the next page is not interesting,
+ # break out early as well
+ if end == nextfirstid:
+ break
+ return res
+
+
+class InAdvancePagedList(PagedList):
+ def __init__(self, pagefunc, pagecount, pagesize):
+ self._pagefunc = pagefunc
+ self._pagecount = pagecount
+ self._pagesize = pagesize
+
+ def getslice(self, start=0, end=None):
+ res = []
+ start_page = start // self._pagesize
+ end_page = (
+ self._pagecount if end is None else (end // self._pagesize + 1))
+ skip_elems = start - start_page * self._pagesize
+ only_more = None if end is None else end - start
+ for pagenum in range(start_page, end_page):
+ page = list(self._pagefunc(pagenum))
+ if skip_elems:
+ page = page[skip_elems:]
+ skip_elems = None
+ if only_more is not None:
+ if len(page) < only_more:
+ only_more -= len(page)
+ else:
+ page = page[:only_more]
+ res.extend(page)
+ break
+ res.extend(page)
+ return res
+
+
+def uppercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\U[0-9a-fA-F]{8}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
+def escape_rfc3986(s):
+ """Escape non-ASCII characters as suggested by RFC 3986"""
+ if sys.version_info < (3, 0) and isinstance(s, compat_str):
+ s = s.encode('utf-8')
+ return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+
+
+def escape_url(url):
+ """Escape URL as suggested by RFC 3986"""
+ url_parsed = compat_urllib_parse_urlparse(url)
+ return url_parsed._replace(
+ path=escape_rfc3986(url_parsed.path),
+ params=escape_rfc3986(url_parsed.params),
+ query=escape_rfc3986(url_parsed.query),
+ fragment=escape_rfc3986(url_parsed.fragment)
+ ).geturl()
+
+try:
+ struct.pack('!I', 0)
+except TypeError:
+ # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
+ def struct_pack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.pack(spec, *args)
+
+ def struct_unpack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.unpack(spec, *args)
+else:
+ struct_pack = struct.pack
+ struct_unpack = struct.unpack
+
+
+def read_batch_urls(batch_fd):
+ def fixup(url):
+ if not isinstance(url, compat_str):
+ url = url.decode('utf-8', 'replace')
+ BOM_UTF8 = '\xef\xbb\xbf'
+ if url.startswith(BOM_UTF8):
+ url = url[len(BOM_UTF8):]
+ url = url.strip()
+ if url.startswith(('#', ';', ']')):
+ return False
+ return url
+
+ with contextlib.closing(batch_fd) as fd:
+ return [url for url in map(fixup, fd) if url]
+
+
+def urlencode_postdata(*args, **kargs):
+ return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
+
+
+try:
+ etree_iter = xml.etree.ElementTree.Element.iter
+except AttributeError: # Python <=2.6
+ etree_iter = lambda n: n.findall('.//*')
+
+
+def parse_xml(s):
+ class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass # Ignore doctypes
+
+ parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
+ kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
+ tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+ # Fix up XML parser in Python 2.x
+ if sys.version_info < (3, 0):
+ for n in etree_iter(tree):
+ if n.text is not None:
+ if not isinstance(n.text, compat_str):
+ n.text = n.text.decode('utf-8')
+ return tree
+
+
+US_RATINGS = {
+ 'G': 0,
+ 'PG': 10,
+ 'PG-13': 13,
+ 'R': 16,
+ 'NC': 18,
+}
+
+
+def parse_age_limit(s):
+ if s is None:
+ return None
+ m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
+ return int(m.group('age')) if m else US_RATINGS.get(s, None)
+
+
+def strip_jsonp(code):
+ return re.sub(
+ r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+
+
+def js_to_json(code):
+ def fix_kv(m):
+ v = m.group(0)
+ if v in ('true', 'false', 'null'):
+ return v
+ if v.startswith('"'):
+ return v
+ if v.startswith("'"):
+ v = v[1:-1]
+ v = re.sub(r"\\\\|\\'|\"", lambda m: {
+ '\\\\': '\\\\',
+ "\\'": "'",
+ '"': '\\"',
+ }[m.group(0)], v)
+ return '"%s"' % v
+
+ res = re.sub(r'''(?x)
+ "(?:[^"\\]*(?:\\\\|\\")?)*"|
+ '(?:[^'\\]*(?:\\\\|\\')?)*'|
+ [a-zA-Z_][.a-zA-Z_0-9]*
+ ''', fix_kv, code)
+ res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+ return res
+
+
+def qualities(quality_ids):
+ """ Get a numeric quality value out of a list of possible values """
+ def q(qid):
+ try:
+ return quality_ids.index(qid)
+ except ValueError:
+ return -1
+ return q
+
+
+DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
+
+
+def limit_length(s, length):
+ """ Add ellipses to overly long strings """
+ if s is None:
+ return None
+ ELLIPSES = '...'
+ if len(s) > length:
+ return s[:length - len(ELLIPSES)] + ELLIPSES
+ return s
+
+
+def version_tuple(v):
+ return tuple(int(e) for e in re.split(r'[-.]', v))
+
+
+def is_outdated_version(version, limit, assume_new=True):
+ if not version:
+ return not assume_new
+ try:
+ return version_tuple(version) < version_tuple(limit)
+ except ValueError:
+ return not assume_new
+
+
+def ytdl_is_updateable():
+ """ Returns if youtube-dl can be updated with -U """
+ from zipimport import zipimporter
+
+ return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+ # Get a short string representation for a subprocess command
+ return ' '.join(shlex_quote(a) for a in args)
+
+
+def urlhandle_detect_ext(url_handle):
+ try:
+ url_handle.headers
+ getheader = lambda h: url_handle.headers[h]
+ except AttributeError: # Python < 3
+ getheader = url_handle.info().getheader
+
+ cd = getheader('Content-Disposition')
+ if cd:
+ m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+ if m:
+ e = determine_ext(m.group('filename'), default_ext=None)
+ if e:
+ return e
+
+ return getheader('Content-Type').split("/")[1]
+
+
+def age_restricted(content_limit, age_limit):
+ """ Returns True iff the content should be blocked """
+
+ if age_limit is None: # No limit set
+ return False
+ if content_limit is None:
+ return False # Content available for everyone
+ return age_limit < content_limit
+
+
+def is_html(first_bytes):
+ """ Detect whether a file contains HTML by examining its first bytes. """
+
+ BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+ ]
+ for bom, enc in BOMS:
+ if first_bytes.startswith(bom):
+ s = first_bytes[len(bom):].decode(enc, 'replace')
+ break
+ else:
+ s = first_bytes.decode('utf-8', 'replace')
+
+ return re.match(r'^\s*<', s)
+
+
+def determine_protocol(info_dict):
+ protocol = info_dict.get('protocol')
+ if protocol is not None:
+ return protocol
+
+ url = info_dict['url']
+ if url.startswith('rtmp'):
+ return 'rtmp'
+ elif url.startswith('mms'):
+ return 'mms'
+ elif url.startswith('rtsp'):
+ return 'rtsp'
+
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ return 'm3u8'
+ elif ext == 'f4m':
+ return 'f4m'
+
+ return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+ """ Render a list of rows, each as a list of values """
+ table = [header_row] + data
+ max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+ format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+ return '\n'.join(format_str % tuple(row) for row in table)