+
+
+def read_batch_urls(batch_fd):
+ def fixup(url):
+ if not isinstance(url, compat_str):
+ url = url.decode('utf-8', 'replace')
+ BOM_UTF8 = '\xef\xbb\xbf'
+ if url.startswith(BOM_UTF8):
+ url = url[len(BOM_UTF8):]
+ url = url.strip()
+ if url.startswith(('#', ';', ']')):
+ return False
+ return url
+
+ with contextlib.closing(batch_fd) as fd:
+ return [url for url in map(fixup, fd) if url]
+
+
+def urlencode_postdata(*args, **kargs):
+ return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
+
+
+try:
+ etree_iter = xml.etree.ElementTree.Element.iter
+except AttributeError: # Python <=2.6
+ etree_iter = lambda n: n.findall('.//*')
+
+
+def parse_xml(s):
+ class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass # Ignore doctypes
+
+ parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
+ kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
+ tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+ # Fix up XML parser in Python 2.x
+ if sys.version_info < (3, 0):
+ for n in etree_iter(tree):
+ if n.text is not None:
+ if not isinstance(n.text, compat_str):
+ n.text = n.text.decode('utf-8')
+ return tree
+
+
+US_RATINGS = {
+ 'G': 0,
+ 'PG': 10,
+ 'PG-13': 13,
+ 'R': 16,
+ 'NC': 18,
+}
+
+
+def parse_age_limit(s):
+ if s is None:
+ return None
+ m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
+ return int(m.group('age')) if m else US_RATINGS.get(s, None)
+
+
+def strip_jsonp(code):
+ return re.sub(
+ r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+
+
+def js_to_json(code):
+ def fix_kv(m):
+ v = m.group(0)
+ if v in ('true', 'false', 'null'):
+ return v
+ if v.startswith('"'):
+ return v
+ if v.startswith("'"):
+ v = v[1:-1]
+ v = re.sub(r"\\\\|\\'|\"", lambda m: {
+ '\\\\': '\\\\',
+ "\\'": "'",
+ '"': '\\"',
+ }[m.group(0)], v)
+ return '"%s"' % v
+
+ res = re.sub(r'''(?x)
+ "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
+ '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
+ [a-zA-Z_][.a-zA-Z_0-9]*
+ ''', fix_kv, code)
+ res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+ return res
+
+
+def qualities(quality_ids):
+ """ Get a numeric quality value out of a list of possible values """
+ def q(qid):
+ try:
+ return quality_ids.index(qid)
+ except ValueError:
+ return -1
+ return q
+
+
+DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
+
+
+def limit_length(s, length):
+ """ Add ellipses to overly long strings """
+ if s is None:
+ return None
+ ELLIPSES = '...'
+ if len(s) > length:
+ return s[:length - len(ELLIPSES)] + ELLIPSES
+ return s
+
+
+def version_tuple(v):
+ return tuple(int(e) for e in re.split(r'[-.]', v))
+
+
+def is_outdated_version(version, limit, assume_new=True):
+ if not version:
+ return not assume_new
+ try:
+ return version_tuple(version) < version_tuple(limit)
+ except ValueError:
+ return not assume_new
+
+
+def ytdl_is_updateable():
+ """ Returns if youtube-dl can be updated with -U """
+ from zipimport import zipimporter
+
+ return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+ # Get a short string representation for a subprocess command
+ return ' '.join(shlex_quote(a) for a in args)
+
+
+def mimetype2ext(mt):
+ _, _, res = mt.rpartition('/')
+
+ return {
+ 'x-ms-wmv': 'wmv',
+ 'x-mp4-fragmented': 'mp4',
+ }.get(res, res)
+
+
+def urlhandle_detect_ext(url_handle):
+ try:
+ url_handle.headers
+ getheader = lambda h: url_handle.headers[h]
+ except AttributeError: # Python < 3
+ getheader = url_handle.info().getheader
+
+ cd = getheader('Content-Disposition')
+ if cd:
+ m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+ if m:
+ e = determine_ext(m.group('filename'), default_ext=None)
+ if e:
+ return e
+
+ return mimetype2ext(getheader('Content-Type'))
+
+
+def age_restricted(content_limit, age_limit):
+ """ Returns True iff the content should be blocked """
+
+ if age_limit is None: # No limit set
+ return False
+ if content_limit is None:
+ return False # Content available for everyone
+ return age_limit < content_limit
+
+
+def is_html(first_bytes):
+ """ Detect whether a file contains HTML by examining its first bytes. """
+
+ BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+ ]
+ for bom, enc in BOMS:
+ if first_bytes.startswith(bom):
+ s = first_bytes[len(bom):].decode(enc, 'replace')
+ break
+ else:
+ s = first_bytes.decode('utf-8', 'replace')
+
+ return re.match(r'^\s*<', s)
+
+
+def determine_protocol(info_dict):
+ protocol = info_dict.get('protocol')
+ if protocol is not None:
+ return protocol
+
+ url = info_dict['url']
+ if url.startswith('rtmp'):
+ return 'rtmp'
+ elif url.startswith('mms'):
+ return 'mms'
+ elif url.startswith('rtsp'):
+ return 'rtsp'
+
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ return 'm3u8'
+ elif ext == 'f4m':
+ return 'f4m'
+
+ return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+ """ Render a list of rows, each as a list of values """
+ table = [header_row] + data
+ max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+ format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+ return '\n'.join(format_str % tuple(row) for row in table)
+
+
+def _match_one(filter_part, dct):
+ COMPARISON_OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[a-z_]+)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?:
+ (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+ (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+ )
+ \s*$
+ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = COMPARISON_OPERATORS[m.group('op')]
+ if m.group('strval') is not None:
+ if m.group('op') not in ('=', '!='):
+ raise ValueError(
+ 'Operator %s does not support string values!' % m.group('op'))
+ comparison_value = m.group('strval')
+ else:
+ try:
+ comparison_value = int(m.group('intval'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('intval'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('intval') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid integer value %r in filter part %r' % (
+ m.group('intval'), filter_part))
+ actual_value = dct.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+
+ UNARY_OPERATORS = {
+ '': lambda v: v is not None,
+ '!': lambda v: v is None,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<op>%s)\s*(?P<key>[a-z_]+)
+ \s*$
+ ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = UNARY_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ return op(actual_value)
+
+ raise ValueError('Invalid filter part %r' % filter_part)
+
+
+def match_str(filter_str, dct):
+ """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
+
+ return all(
+ _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+
+
+def match_filter_func(filter_str):
+ def _match_func(info_dict):
+ if match_str(filter_str, info_dict):
+ return None
+ else:
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
+ return _match_func