2 # -*- coding: utf-8 -*- 
   4 from __future__ 
import unicode_literals
 
  31 import xml
.etree
.ElementTree
 
  41     compat_socket_create_connection
, 
  45     compat_urllib_parse_urlparse
, 
  46     compat_urllib_request
, 
  52 # This is not clearly defined otherwise 
  53 compiled_regex_type 
= type(re
.compile('')) 
  56     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)', 
  57     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  58     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  59     'Accept-Encoding': 'gzip, deflate', 
  60     'Accept-Language': 'en-us,en;q=0.5', 
  64 def preferredencoding(): 
  65     """Get preferred encoding. 
  67     Returns the best encoding scheme for the system, based on 
  68     locale.getpreferredencoding() and some further tweaks. 
  71         pref 
= locale
.getpreferredencoding() 
  79 def write_json_file(obj
, fn
): 
  80     """ Encode obj as JSON and write it to fn, atomically if possible """ 
  82     fn 
= encodeFilename(fn
) 
  83     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
  84         encoding 
= get_filesystem_encoding() 
  85         # os.path.basename returns a bytes object, but NamedTemporaryFile 
  86         # will fail if the filename contains non ascii characters unless we 
  87         # use a unicode object 
  88         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
  89         # the same for os.path.dirname 
  90         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
  92         path_basename 
= os
.path
.basename
 
  93         path_dirname 
= os
.path
.dirname
 
  97         'prefix': path_basename(fn
) + '.', 
  98         'dir': path_dirname(fn
), 
 102     # In Python 2.x, json.dump expects a bytestream. 
 103     # In Python 3.x, it writes to a character stream 
 104     if sys
.version_info 
< (3, 0): 
 112     tf 
= tempfile
.NamedTemporaryFile(**args
) 
 117         if sys
.platform 
== 'win32': 
 118             # Need to remove existing file on Windows, else os.rename raises 
 119             # WindowsError or FileExistsError. 
 124         os
.rename(tf
.name
, fn
) 
 133 if sys
.version_info 
>= (2, 7): 
 134     def find_xpath_attr(node
, xpath
, key
, val
): 
 135         """ Find the xpath xpath[@key=val] """ 
 136         assert re
.match(r
'^[a-zA-Z-]+$', key
) 
 137         assert re
.match(r
'^[a-zA-Z0-9@\s:._-]*$', val
) 
 138         expr 
= xpath 
+ "[@%s='%s']" % (key
, val
) 
 139         return node
.find(expr
) 
 141     def find_xpath_attr(node
, xpath
, key
, val
): 
 142         # Here comes the crazy part: In 2.6, if the xpath is a unicode, 
 143         # .//node does not match if a node is a direct child of . ! 
 144         if isinstance(xpath
, compat_str
): 
 145             xpath 
= xpath
.encode('ascii') 
 147         for f 
in node
.findall(xpath
): 
 148             if f
.attrib
.get(key
) == val
: 
 152 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 153 # the namespace parameter 
 156 def xpath_with_ns(path
, ns_map
): 
 157     components 
= [c
.split(':') for c 
in path
.split('/')] 
 161             replaced
.append(c
[0]) 
 164             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 165     return '/'.join(replaced
) 
 168 def xpath_text(node
, xpath
, name
=None, fatal
=False): 
 169     if sys
.version_info 
< (2, 7):  # Crazy 2.6 
 170         xpath 
= xpath
.encode('ascii') 
 173     if n 
is None or n
.text 
is None: 
 175             name 
= xpath 
if name 
is None else name
 
 176             raise ExtractorError('Could not find XML element %s' % name
) 
 182 def get_element_by_id(id, html
): 
 183     """Return the content of the tag with the specified ID in the passed HTML document""" 
 184     return get_element_by_attribute("id", id, html
) 
 187 def get_element_by_attribute(attribute
, value
, html
): 
 188     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 190     m 
= re
.search(r
'''(?xs) 
 192          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? 
 194          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? 
 198     ''' % (re
.escape(attribute
), re
.escape(value
)), html
) 
 202     res 
= m
.group('content') 
 204     if res
.startswith('"') or res
.startswith("'"): 
 207     return unescapeHTML(res
) 
 210 def clean_html(html
): 
 211     """Clean an HTML snippet into a readable string""" 
 213     if html 
is None:  # Convenience for sanitizing descriptions etc. 
 217     html 
= html
.replace('\n', ' ') 
 218     html 
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
) 
 219     html 
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
) 
 221     html 
= re
.sub('<.*?>', '', html
) 
 222     # Replace html entities 
 223     html 
= unescapeHTML(html
) 
 227 def sanitize_open(filename
, open_mode
): 
 228     """Try to open the given filename, and slightly tweak it if this fails. 
 230     Attempts to open the given filename. If this fails, it tries to change 
 231     the filename slightly, step by step, until it's either able to open it 
 232     or it fails and raises a final exception, like the standard open() 
 235     It returns the tuple (stream, definitive_file_name). 
 239             if sys
.platform 
== 'win32': 
 241                 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
) 
 242             return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
) 
 243         stream 
= open(encodeFilename(filename
), open_mode
) 
 244         return (stream
, filename
) 
 245     except (IOError, OSError) as err
: 
 246         if err
.errno 
in (errno
.EACCES
,): 
 249         # In case of error, try to remove win32 forbidden chars 
 250         alt_filename 
= os
.path
.join( 
 251             re
.sub('[/<>:"\\|\\\\?\\*]', '#', path_part
) 
 252             for path_part 
in os
.path
.split(filename
) 
 254         if alt_filename 
== filename
: 
 257             # An exception here should be caught in the caller 
 258             stream 
= open(encodeFilename(filename
), open_mode
) 
 259             return (stream
, alt_filename
) 
 262 def timeconvert(timestr
): 
 263     """Convert RFC 2822 defined time string into system timestamp""" 
 265     timetuple 
= email
.utils
.parsedate_tz(timestr
) 
 266     if timetuple 
is not None: 
 267         timestamp 
= email
.utils
.mktime_tz(timetuple
) 
 271 def sanitize_filename(s
, restricted
=False, is_id
=False): 
 272     """Sanitizes a string so it could be used as part of a filename. 
 273     If restricted is set, use a stricter subset of allowed characters. 
 274     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible 
 276     def replace_insane(char
): 
 277         if char 
== '?' or ord(char
) < 32 or ord(char
) == 127: 
 280             return '' if restricted 
else '\'' 
 282             return '_-' if restricted 
else ' -' 
 283         elif char 
in '\\/|*<>': 
 285         if restricted 
and (char 
in '!&\'()[]{}$;`^,#' or char
.isspace()): 
 287         if restricted 
and ord(char
) > 127: 
 292     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 293     result 
= ''.join(map(replace_insane
, s
)) 
 295         while '__' in result
: 
 296             result 
= result
.replace('__', '_') 
 297         result 
= result
.strip('_') 
 298         # Common case of "Foreign band name - English song title" 
 299         if restricted 
and result
.startswith('-_'): 
 306 def orderedSet(iterable
): 
 307     """ Remove all duplicates from the input iterable """ 
 315 def _htmlentity_transform(entity
): 
 316     """Transforms an HTML entity to a character.""" 
 317     # Known non-numeric HTML entity 
 318     if entity 
in compat_html_entities
.name2codepoint
: 
 319         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 321     mobj 
= re
.match(r
'#(x?[0-9]+)', entity
) 
 323         numstr 
= mobj
.group(1) 
 324         if numstr
.startswith('x'): 
 326             numstr 
= '0%s' % numstr
 
 329         return compat_chr(int(numstr
, base
)) 
 331     # Unknown entity in name, return its literal representation 
 332     return ('&%s;' % entity
) 
 338     assert type(s
) == compat_str
 
 341         r
'&([^;]+);', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 344 def encodeFilename(s
, for_subprocess
=False): 
 346     @param s The name of the file 
 349     assert type(s
) == compat_str
 
 351     # Python 3 has a Unicode API 
 352     if sys
.version_info 
>= (3, 0): 
 355     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 356         # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 357         # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 358         # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 359         if not for_subprocess
: 
 362             # For subprocess calls, encode with locale encoding 
 363             # Refer to http://stackoverflow.com/a/9951851/35070 
 364             encoding 
= preferredencoding() 
 366         encoding 
= sys
.getfilesystemencoding() 
 369     return s
.encode(encoding
, 'ignore') 
 372 def encodeArgument(s
): 
 373     if not isinstance(s
, compat_str
): 
 374         # Legacy code that uses byte strings 
 375         # Uncomment the following line after fixing all post processors 
 376         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 377         s 
= s
.decode('ascii') 
 378     return encodeFilename(s
, True) 
 381 def decodeOption(optval
): 
 384     if isinstance(optval
, bytes): 
 385         optval 
= optval
.decode(preferredencoding()) 
 387     assert isinstance(optval
, compat_str
) 
 391 def formatSeconds(secs
): 
 393         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 395         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 400 def make_HTTPS_handler(params
, **kwargs
): 
 401     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 402     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 403         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 404         if opts_no_check_certificate
: 
 405             context
.check_hostname 
= False 
 406             context
.verify_mode 
= ssl
.CERT_NONE
 
 408             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 411             # (create_default_context present but HTTPSHandler has no context=) 
 414     if sys
.version_info 
< (3, 2): 
 415         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 417         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 418         context
.verify_mode 
= (ssl
.CERT_NONE
 
 419                                if opts_no_check_certificate
 
 420                                else ssl
.CERT_REQUIRED
) 
 421         context
.set_default_verify_paths() 
 422         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 425 class ExtractorError(Exception): 
 426     """Error during info extraction.""" 
 428     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 429         """ tb, if given, is the original traceback (so that it can be printed out). 
 430         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 433         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 435         if video_id 
is not None: 
 436             msg 
= video_id 
+ ': ' + msg
 
 438             msg 
+= ' (caused by %r)' % cause
 
 440             if ytdl_is_updateable(): 
 441                 update_cmd 
= 'type  youtube-dl -U  to update' 
 443                 update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 444             msg 
+= '; please report this issue on https://yt-dl.org/bug .' 
 445             msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 446             msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 447         super(ExtractorError
, self
).__init
__(msg
) 
 450         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 452         self
.video_id 
= video_id
 
 454     def format_traceback(self
): 
 455         if self
.traceback 
is None: 
 457         return ''.join(traceback
.format_tb(self
.traceback
)) 
 460 class UnsupportedError(ExtractorError
): 
 461     def __init__(self
, url
): 
 462         super(UnsupportedError
, self
).__init
__( 
 463             'Unsupported URL: %s' % url
, expected
=True) 
 467 class RegexNotFoundError(ExtractorError
): 
 468     """Error when a regex didn't match""" 
 472 class DownloadError(Exception): 
 473     """Download Error exception. 
 475     This exception may be thrown by FileDownloader objects if they are not 
 476     configured to continue on errors. They will contain the appropriate 
 480     def __init__(self
, msg
, exc_info
=None): 
 481         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 482         super(DownloadError
, self
).__init
__(msg
) 
 483         self
.exc_info 
= exc_info
 
 486 class SameFileError(Exception): 
 487     """Same File exception. 
 489     This exception will be thrown by FileDownloader objects if they detect 
 490     multiple files would have to be downloaded to the same file on disk. 
 495 class PostProcessingError(Exception): 
 496     """Post Processing exception. 
 498     This exception may be raised by PostProcessor's .run() method to 
 499     indicate an error in the postprocessing task. 
 502     def __init__(self
, msg
): 
 506 class MaxDownloadsReached(Exception): 
 507     """ --max-downloads limit has been reached. """ 
 511 class UnavailableVideoError(Exception): 
 512     """Unavailable Format exception. 
 514     This exception will be thrown when a video is requested 
 515     in a format that is not available for that video. 
 520 class ContentTooShortError(Exception): 
 521     """Content Too Short exception. 
 523     This exception may be raised by FileDownloader objects when a file they 
 524     download is too small for what the server announced first, indicating 
 525     the connection was probably interrupted. 
 531     def __init__(self
, downloaded
, expected
): 
 532         self
.downloaded 
= downloaded
 
 533         self
.expected 
= expected
 
 536 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 537     hc 
= http_class(*args
, **kwargs
) 
 538     source_address 
= ydl_handler
._params
.get('source_address') 
 539     if source_address 
is not None: 
 540         sa 
= (source_address
, 0) 
 541         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 542             hc
.source_address 
= sa
 
 544             def _hc_connect(self
, *args
, **kwargs
): 
 545                 sock 
= compat_socket_create_connection( 
 546                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 548                     self
.sock 
= ssl
.wrap_socket( 
 549                         sock
, self
.key_file
, self
.cert_file
, 
 550                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 553             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 558 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 559     """Handler for HTTP requests and responses. 
 561     This class, when installed with an OpenerDirector, automatically adds 
 562     the standard headers to every HTTP request and handles gzipped and 
 563     deflated responses from web servers. If compression is to be avoided in 
 564     a particular request, the original request in the program code only has 
 565     to include the HTTP header "Youtubedl-No-Compression", which will be 
 566     removed before making the real request. 
 568     Part of this code was copied from: 
 570     http://techknack.net/python-urllib2-handlers/ 
 572     Andrew Rowls, the author of that code, agreed to release it to the 
 576     def __init__(self
, params
, *args
, **kwargs
): 
 577         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 578         self
._params 
= params
 
 580     def http_open(self
, req
): 
 581         return self
.do_open(functools
.partial( 
 582             _create_http_connection
, self
, compat_http_client
.HTTPConnection
, False), 
 588             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 590             return zlib
.decompress(data
) 
 593     def addinfourl_wrapper(stream
, headers
, url
, code
): 
 594         if hasattr(compat_urllib_request
.addinfourl
, 'getcode'): 
 595             return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
) 
 596         ret 
= compat_urllib_request
.addinfourl(stream
, headers
, url
) 
 600     def http_request(self
, req
): 
 601         for h
, v 
in std_headers
.items(): 
 602             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
 603             # The dict keys are capitalized because of this bug by urllib 
 604             if h
.capitalize() not in req
.headers
: 
 606         if 'Youtubedl-no-compression' in req
.headers
: 
 607             if 'Accept-encoding' in req
.headers
: 
 608                 del req
.headers
['Accept-encoding'] 
 609             del req
.headers
['Youtubedl-no-compression'] 
 611         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 612             # Python 2.6 is brain-dead when it comes to fragments 
 613             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 614             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 618     def http_response(self
, req
, resp
): 
 621         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 622             content 
= resp
.read() 
 623             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 625                 uncompressed 
= io
.BytesIO(gz
.read()) 
 626             except IOError as original_ioerror
: 
 627                 # There may be junk add the end of the file 
 628                 # See http://stackoverflow.com/q/4928560/35070 for details 
 629                 for i 
in range(1, 1024): 
 631                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 632                         uncompressed 
= io
.BytesIO(gz
.read()) 
 637                     raise original_ioerror
 
 638             resp 
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 639             resp
.msg 
= old_resp
.msg
 
 641         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 642             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
 643             resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 644             resp
.msg 
= old_resp
.msg
 
 647     https_request 
= http_request
 
 648     https_response 
= http_response
 
 651 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
 652     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
 653         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
 654         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
 655         self
._params 
= params
 
 657     def https_open(self
, req
): 
 659         if hasattr(self
, '_context'):  # python > 2.6 
 660             kwargs
['context'] = self
._context
 
 661         if hasattr(self
, '_check_hostname'):  # python 3.x 
 662             kwargs
['check_hostname'] = self
._check
_hostname
 
 663         return self
.do_open(functools
.partial( 
 664             _create_http_connection
, self
, self
._https
_conn
_class
, True), 
 668 def parse_iso8601(date_str
, delimiter
='T'): 
 669     """ Return a UNIX timestamp from the given date """ 
 675         r
'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
 678         timezone 
= datetime
.timedelta() 
 680         date_str 
= date_str
[:-len(m
.group(0))] 
 681         if not m
.group('sign'): 
 682             timezone 
= datetime
.timedelta() 
 684             sign 
= 1 if m
.group('sign') == '+' else -1 
 685             timezone 
= datetime
.timedelta( 
 686                 hours
=sign 
* int(m
.group('hours')), 
 687                 minutes
=sign 
* int(m
.group('minutes'))) 
 688     date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
 689     dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
 690     return calendar
.timegm(dt
.timetuple()) 
 693 def unified_strdate(date_str
, day_first
=True): 
 694     """Return a string with the date in the format YYYYMMDD""" 
 700     date_str 
= date_str
.replace(',', ' ') 
 701     # %z (UTC offset) is only supported in python>=3.2 
 702     date_str 
= re
.sub(r
' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str
) 
 703     # Remove AM/PM + timezone 
 704     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
 706     format_expressions 
= [ 
 711         '%b %dst %Y %I:%M%p', 
 712         '%b %dnd %Y %I:%M%p', 
 713         '%b %dth %Y %I:%M%p', 
 719         '%Y-%m-%d %H:%M:%S.%f', 
 722         '%Y-%m-%dT%H:%M:%SZ', 
 723         '%Y-%m-%dT%H:%M:%S.%fZ', 
 724         '%Y-%m-%dT%H:%M:%S.%f0Z', 
 726         '%Y-%m-%dT%H:%M:%S.%f', 
 730         format_expressions
.extend([ 
 737         format_expressions
.extend([ 
 743     for expression 
in format_expressions
: 
 745             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
 748     if upload_date 
is None: 
 749         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
 751             upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
 755 def determine_ext(url
, default_ext
='unknown_video'): 
 758     guess 
= url
.partition('?')[0].rpartition('.')[2] 
 759     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
 765 def subtitles_filename(filename
, sub_lang
, sub_format
): 
 766     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
 769 def date_from_str(date_str
): 
 771     Return a datetime object from a string in the format YYYYMMDD or 
 772     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
 773     today 
= datetime
.date
.today() 
 774     if date_str 
in ('now', 'today'): 
 776     if date_str 
== 'yesterday': 
 777         return today 
- datetime
.timedelta(days
=1) 
 778     match 
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
 779     if match 
is not None: 
 780         sign 
= match
.group('sign') 
 781         time 
= int(match
.group('time')) 
 784         unit 
= match
.group('unit') 
 785         # A bad aproximation? 
 793         delta 
= datetime
.timedelta(**{unit
: time
}) 
 795     return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date() 
 798 def hyphenate_date(date_str
): 
 800     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
 801     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
 802     if match 
is not None: 
 803         return '-'.join(match
.groups()) 
 808 class DateRange(object): 
 809     """Represents a time interval between two dates""" 
 811     def __init__(self
, start
=None, end
=None): 
 812         """start and end must be strings in the format accepted by date""" 
 813         if start 
is not None: 
 814             self
.start 
= date_from_str(start
) 
 816             self
.start 
= datetime
.datetime
.min.date() 
 818             self
.end 
= date_from_str(end
) 
 820             self
.end 
= datetime
.datetime
.max.date() 
 821         if self
.start 
> self
.end
: 
 822             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
 826         """Returns a range that only contains the given day""" 
 829     def __contains__(self
, date
): 
 830         """Check if the date is in the range""" 
 831         if not isinstance(date
, datetime
.date
): 
 832             date 
= date_from_str(date
) 
 833         return self
.start 
<= date 
<= self
.end
 
 836         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
 840     """ Returns the platform name as a compat_str """ 
 841     res 
= platform
.platform() 
 842     if isinstance(res
, bytes): 
 843         res 
= res
.decode(preferredencoding()) 
 845     assert isinstance(res
, compat_str
) 
 849 def _windows_write_string(s
, out
): 
 850     """ Returns True if the string was written using special methods, 
 851     False if it has yet to be written out.""" 
 852     # Adapted from http://stackoverflow.com/a/3259271/35070 
 855     import ctypes
.wintypes
 
 863         fileno 
= out
.fileno() 
 864     except AttributeError: 
 865         # If the output stream doesn't have a fileno, it's virtual 
 867     except io
.UnsupportedOperation
: 
 868         # Some strange Windows pseudo files? 
 870     if fileno 
not in WIN_OUTPUT_IDS
: 
 873     GetStdHandle 
= ctypes
.WINFUNCTYPE( 
 874         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
 875         (b
"GetStdHandle", ctypes
.windll
.kernel32
)) 
 876     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
 878     WriteConsoleW 
= ctypes
.WINFUNCTYPE( 
 879         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
 880         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
 881         ctypes
.wintypes
.LPVOID
)((b
"WriteConsoleW", ctypes
.windll
.kernel32
)) 
 882     written 
= ctypes
.wintypes
.DWORD(0) 
 884     GetFileType 
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
"GetFileType", ctypes
.windll
.kernel32
)) 
 885     FILE_TYPE_CHAR 
= 0x0002 
 886     FILE_TYPE_REMOTE 
= 0x8000 
 887     GetConsoleMode 
= ctypes
.WINFUNCTYPE( 
 888         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
 889         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
 890         (b
"GetConsoleMode", ctypes
.windll
.kernel32
)) 
 891     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
 893     def not_a_console(handle
): 
 894         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
 896         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR
 
 897                 or GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
 902     def next_nonbmp_pos(s
): 
 904             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
 905         except StopIteration: 
 909         count 
= min(next_nonbmp_pos(s
), 1024) 
 912             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
 914             raise OSError('Failed to write string') 
 915         if not count
:  # We just wrote a non-BMP character 
 916             assert written
.value 
== 2 
 919             assert written
.value 
> 0 
 920             s 
= s
[written
.value
:] 
 924 def write_string(s
, out
=None, encoding
=None): 
 927     assert type(s
) == compat_str
 
 929     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
 930         if _windows_write_string(s
, out
): 
 933     if ('b' in getattr(out
, 'mode', '') or 
 934             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
 935         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
 937     elif hasattr(out
, 'buffer'): 
 938         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
 939         byt 
= s
.encode(enc
, 'ignore') 
 940         out
.buffer.write(byt
) 
 946 def bytes_to_intlist(bs
): 
 949     if isinstance(bs
[0], int):  # Python 3 
 952         return [ord(c
) for c 
in bs
] 
 955 def intlist_to_bytes(xs
): 
 958     return struct_pack('%dB' % len(xs
), *xs
) 
 961 # Cross-platform file locking 
 962 if sys
.platform 
== 'win32': 
 963     import ctypes
.wintypes
 
 966     class OVERLAPPED(ctypes
.Structure
): 
 968             ('Internal', ctypes
.wintypes
.LPVOID
), 
 969             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
 970             ('Offset', ctypes
.wintypes
.DWORD
), 
 971             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
 972             ('hEvent', ctypes
.wintypes
.HANDLE
), 
 975     kernel32 
= ctypes
.windll
.kernel32
 
 976     LockFileEx 
= kernel32
.LockFileEx
 
 977     LockFileEx
.argtypes 
= [ 
 978         ctypes
.wintypes
.HANDLE
,     # hFile 
 979         ctypes
.wintypes
.DWORD
,      # dwFlags 
 980         ctypes
.wintypes
.DWORD
,      # dwReserved 
 981         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
 982         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
 983         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
 985     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
 986     UnlockFileEx 
= kernel32
.UnlockFileEx
 
 987     UnlockFileEx
.argtypes 
= [ 
 988         ctypes
.wintypes
.HANDLE
,     # hFile 
 989         ctypes
.wintypes
.DWORD
,      # dwReserved 
 990         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
 991         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
 992         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
 994     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
 995     whole_low 
= 0xffffffff 
 996     whole_high 
= 0x7fffffff 
 998     def _lock_file(f
, exclusive
): 
 999         overlapped 
= OVERLAPPED() 
1000         overlapped
.Offset 
= 0 
1001         overlapped
.OffsetHigh 
= 0 
1002         overlapped
.hEvent 
= 0 
1003         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1004         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1005         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1006                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1007             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1009     def _unlock_file(f
): 
1010         assert f
._lock
_file
_overlapped
_p
 
1011         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1012         if not UnlockFileEx(handle
, 0, 
1013                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1014             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1019     def _lock_file(f
, exclusive
): 
1020         fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1022     def _unlock_file(f
): 
1023         fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1026 class locked_file(object): 
1027     def __init__(self
, filename
, mode
, encoding
=None): 
1028         assert mode 
in ['r', 'a', 'w'] 
1029         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1032     def __enter__(self
): 
1033         exclusive 
= self
.mode 
!= 'r' 
1035             _lock_file(self
.f
, exclusive
) 
1041     def __exit__(self
, etype
, value
, traceback
): 
1043             _unlock_file(self
.f
) 
1050     def write(self
, *args
): 
1051         return self
.f
.write(*args
) 
1053     def read(self
, *args
): 
1054         return self
.f
.read(*args
) 
1057 def get_filesystem_encoding(): 
1058     encoding 
= sys
.getfilesystemencoding() 
1059     return encoding 
if encoding 
is not None else 'utf-8' 
1062 def shell_quote(args
): 
1064     encoding 
= get_filesystem_encoding() 
1066         if isinstance(a
, bytes): 
1067             # We may get a filename encoded with 'encodeFilename' 
1068             a 
= a
.decode(encoding
) 
1069         quoted_args
.append(pipes
.quote(a
)) 
1070     return ' '.join(quoted_args
) 
1073 def takewhile_inclusive(pred
, seq
): 
1074     """ Like itertools.takewhile, but include the latest evaluated element 
1075         (the first element so that Not pred(e)) """ 
1082 def smuggle_url(url
, data
): 
1083     """ Pass additional data in a URL for internal use. """ 
1085     sdata 
= compat_urllib_parse
.urlencode( 
1086         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1087     return url 
+ '#' + sdata
 
1090 def unsmuggle_url(smug_url
, default
=None): 
1091     if '#__youtubedl_smuggle' not in smug_url
: 
1092         return smug_url
, default
 
1093     url
, _
, sdata 
= smug_url
.rpartition('#') 
1094     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1095     data 
= json
.loads(jsond
) 
1099 def format_bytes(bytes): 
1102     if type(bytes) is str: 
1103         bytes = float(bytes) 
1107         exponent 
= int(math
.log(bytes, 1024.0)) 
1108     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1109     converted 
= float(bytes) / float(1024 ** exponent
) 
1110     return '%.2f%s' % (converted
, suffix
) 
1113 def parse_filesize(s
): 
1117     # The lower-case forms are of course incorrect and inofficial, 
1118     # but we support those too 
1156     units_re 
= '|'.join(re
.escape(u
) for u 
in _UNIT_TABLE
) 
1158         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re
, s
) 
1162     num_str 
= m
.group('num').replace(',', '.') 
1163     mult 
= _UNIT_TABLE
[m
.group('unit')] 
1164     return int(float(num_str
) * mult
) 
1167 def get_term_width(): 
1168     columns 
= compat_getenv('COLUMNS', None) 
1173         sp 
= subprocess
.Popen( 
1175             stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
) 
1176         out
, err 
= sp
.communicate() 
1177         return int(out
.split()[1]) 
1183 def month_by_name(name
): 
1184     """ Return the number of a month by (locale-independently) English name """ 
1187         'January', 'February', 'March', 'April', 'May', 'June', 
1188         'July', 'August', 'September', 'October', 'November', 'December'] 
1190         return ENGLISH_NAMES
.index(name
) + 1 
1195 def fix_xml_ampersands(xml_str
): 
1196     """Replace all the '&' by '&' in XML""" 
1198         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1203 def setproctitle(title
): 
1204     assert isinstance(title
, compat_str
) 
1206         libc 
= ctypes
.cdll
.LoadLibrary("libc.so.6") 
1209     title_bytes 
= title
.encode('utf-8') 
1210     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1211     buf
.value 
= title_bytes
 
1213         libc
.prctl(15, buf
, 0, 0, 0) 
1214     except AttributeError: 
1215         return  # Strange libc, just skip this 
1218 def remove_start(s
, start
): 
1219     if s
.startswith(start
): 
1220         return s
[len(start
):] 
1224 def remove_end(s
, end
): 
1226         return s
[:-len(end
)] 
1230 def url_basename(url
): 
1231     path 
= compat_urlparse
.urlparse(url
).path
 
1232     return path
.strip('/').split('/')[-1] 
1235 class HEADRequest(compat_urllib_request
.Request
): 
1236     def get_method(self
): 
1240 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1243             v 
= getattr(v
, get_attr
, None) 
1246     return default 
if v 
is None else (int(v
) * invscale 
// scale
) 
1249 def str_or_none(v
, default
=None): 
1250     return default 
if v 
is None else compat_str(v
) 
1253 def str_to_int(int_str
): 
1254     """ A more relaxed version of int_or_none """ 
1257     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1261 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1262     return default 
if v 
is None else (float(v
) * invscale 
/ scale
) 
1265 def parse_duration(s
): 
1266     if not isinstance(s
, compat_basestring
): 
1274             (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*| 
1275             (?P<only_hours>[0-9.]+)\s*(?:hours?)| 
1279                     (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)? 
1280                     (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s* 
1282                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s* 
1284             (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)? 
1289     if m
.group('only_mins'): 
1290         return float_or_none(m
.group('only_mins'), invscale
=60) 
1291     if m
.group('only_hours'): 
1292         return float_or_none(m
.group('only_hours'), invscale
=60 * 60) 
1294         res 
+= int(m
.group('secs')) 
1296         res 
+= int(m
.group('mins')) * 60 
1297     if m
.group('hours'): 
1298         res 
+= int(m
.group('hours')) * 60 * 60 
1300         res 
+= int(m
.group('days')) * 24 * 60 * 60 
1302         res 
+= float(m
.group('ms')) 
1306 def prepend_extension(filename
, ext
): 
1307     name
, real_ext 
= os
.path
.splitext(filename
) 
1308     return '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1311 def check_executable(exe
, args
=[]): 
1312     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1313     args can be a list of arguments for a short output (like -version) """ 
1315         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1321 def get_exe_version(exe
, args
=['--version'], 
1322                     version_re
=None, unrecognized
='present'): 
1323     """ Returns the version of the specified executable, 
1324     or False if the executable is not present """ 
1326         out
, _ 
= subprocess
.Popen( 
1328             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1331     if isinstance(out
, bytes):  # Python 2.x 
1332         out 
= out
.decode('ascii', 'ignore') 
1333     return detect_exe_version(out
, version_re
, unrecognized
) 
1336 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
1337     assert isinstance(output
, compat_str
) 
1338     if version_re 
is None: 
1339         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
1340     m 
= re
.search(version_re
, output
) 
1347 class PagedList(object): 
1349         # This is only useful for tests 
1350         return len(self
.getslice()) 
1353 class OnDemandPagedList(PagedList
): 
1354     def __init__(self
, pagefunc
, pagesize
): 
1355         self
._pagefunc 
= pagefunc
 
1356         self
._pagesize 
= pagesize
 
1358     def getslice(self
, start
=0, end
=None): 
1360         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1361             firstid 
= pagenum 
* self
._pagesize
 
1362             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
1363             if start 
>= nextfirstid
: 
1366             page_results 
= list(self
._pagefunc
(pagenum
)) 
1369                 start 
% self
._pagesize
 
1370                 if firstid 
<= start 
< nextfirstid
 
1374                 ((end 
- 1) % self
._pagesize
) + 1 
1375                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
1378             if startv 
!= 0 or endv 
is not None: 
1379                 page_results 
= page_results
[startv
:endv
] 
1380             res
.extend(page_results
) 
1382             # A little optimization - if current page is not "full", ie. does 
1383             # not contain page_size videos then we can assume that this page 
1384             # is the last one - there are no more ids on further pages - 
1385             # i.e. no need to query again. 
1386             if len(page_results
) + startv 
< self
._pagesize
: 
1389             # If we got the whole page, but the next page is not interesting, 
1390             # break out early as well 
1391             if end 
== nextfirstid
: 
1396 class InAdvancePagedList(PagedList
): 
1397     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
1398         self
._pagefunc 
= pagefunc
 
1399         self
._pagecount 
= pagecount
 
1400         self
._pagesize 
= pagesize
 
1402     def getslice(self
, start
=0, end
=None): 
1404         start_page 
= start 
// self
._pagesize
 
1406             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
1407         skip_elems 
= start 
- start_page 
* self
._pagesize
 
1408         only_more 
= None if end 
is None else end 
- start
 
1409         for pagenum 
in range(start_page
, end_page
): 
1410             page 
= list(self
._pagefunc
(pagenum
)) 
1412                 page 
= page
[skip_elems
:] 
1414             if only_more 
is not None: 
1415                 if len(page
) < only_more
: 
1416                     only_more 
-= len(page
) 
1418                     page 
= page
[:only_more
] 
1425 def uppercase_escape(s
): 
1426     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
1428         r
'\\U[0-9a-fA-F]{8}', 
1429         lambda m
: unicode_escape(m
.group(0))[0], 
1433 def escape_rfc3986(s
): 
1434     """Escape non-ASCII characters as suggested by RFC 3986""" 
1435     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
1436         s 
= s
.encode('utf-8') 
1437     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
1440 def escape_url(url
): 
1441     """Escape URL as suggested by RFC 3986""" 
1442     url_parsed 
= compat_urllib_parse_urlparse(url
) 
1443     return url_parsed
._replace
( 
1444         path
=escape_rfc3986(url_parsed
.path
), 
1445         params
=escape_rfc3986(url_parsed
.params
), 
1446         query
=escape_rfc3986(url_parsed
.query
), 
1447         fragment
=escape_rfc3986(url_parsed
.fragment
) 
1451     struct
.pack('!I', 0) 
1453     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument 
1454     def struct_pack(spec
, *args
): 
1455         if isinstance(spec
, compat_str
): 
1456             spec 
= spec
.encode('ascii') 
1457         return struct
.pack(spec
, *args
) 
1459     def struct_unpack(spec
, *args
): 
1460         if isinstance(spec
, compat_str
): 
1461             spec 
= spec
.encode('ascii') 
1462         return struct
.unpack(spec
, *args
) 
1464     struct_pack 
= struct
.pack
 
1465     struct_unpack 
= struct
.unpack
 
1468 def read_batch_urls(batch_fd
): 
1470         if not isinstance(url
, compat_str
): 
1471             url 
= url
.decode('utf-8', 'replace') 
1472         BOM_UTF8 
= '\xef\xbb\xbf' 
1473         if url
.startswith(BOM_UTF8
): 
1474             url 
= url
[len(BOM_UTF8
):] 
1476         if url
.startswith(('#', ';', ']')): 
1480     with contextlib
.closing(batch_fd
) as fd
: 
1481         return [url 
for url 
in map(fixup
, fd
) if url
] 
1484 def urlencode_postdata(*args
, **kargs
): 
1485     return compat_urllib_parse
.urlencode(*args
, **kargs
).encode('ascii') 
1489     etree_iter 
= xml
.etree
.ElementTree
.Element
.iter 
1490 except AttributeError:  # Python <=2.6 
1491     etree_iter 
= lambda n
: n
.findall('.//*') 
1495     class TreeBuilder(xml
.etree
.ElementTree
.TreeBuilder
): 
1496         def doctype(self
, name
, pubid
, system
): 
1497             pass  # Ignore doctypes 
1499     parser 
= xml
.etree
.ElementTree
.XMLParser(target
=TreeBuilder()) 
1500     kwargs 
= {'parser': parser
} if sys
.version_info 
>= (2, 7) else {} 
1501     tree 
= xml
.etree
.ElementTree
.XML(s
.encode('utf-8'), **kwargs
) 
1502     # Fix up XML parser in Python 2.x 
1503     if sys
.version_info 
< (3, 0): 
1504         for n 
in etree_iter(tree
): 
1505             if n
.text 
is not None: 
1506                 if not isinstance(n
.text
, compat_str
): 
1507                     n
.text 
= n
.text
.decode('utf-8') 
1520 def parse_age_limit(s
): 
1523     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
1524     return int(m
.group('age')) if m 
else US_RATINGS
.get(s
, None) 
1527 def strip_jsonp(code
): 
1529         r
'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
) 
1532 def js_to_json(code
): 
1535         if v 
in ('true', 'false', 'null'): 
1537         if v
.startswith('"'): 
1539         if v
.startswith("'"): 
1541             v 
= re
.sub(r
"\\\\|\\'|\"", lambda m: { 
1548     res = re.sub(r'''(?x) 
1549         "(?
:[^
"\\]*(?:\\\\|\\")?
)*"| 
1550         '(?:[^'\\]*(?:\\\\|\\')?)*'| 
1551         [a-zA-Z_][.a-zA-Z_0-9]* 
1553     res = re.sub(r',(\s*\])', lambda m: m.group(1), res) 
1557 def qualities(quality_ids): 
1558     """ Get a numeric quality value out of a list of possible values """ 
1561             return quality_ids.index(qid) 
1567 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s' 
1570 def limit_length(s, length): 
1571     """ Add ellipses to overly long strings """ 
1576         return s[:length - len(ELLIPSES)] + ELLIPSES 
1580 def version_tuple(v): 
1581     return tuple(int(e) for e in re.split(r'[-.]', v)) 
1584 def is_outdated_version(version, limit, assume_new=True): 
1586         return not assume_new 
1588         return version_tuple(version) < version_tuple(limit) 
1590         return not assume_new 
1593 def ytdl_is_updateable(): 
1594     """ Returns if youtube-dl can be updated with -U """ 
1595     from zipimport import zipimporter 
1597     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen') 
1600 def args_to_str(args): 
1601     # Get a short string representation for a subprocess command 
1602     return ' '.join(shlex_quote(a) for a in args) 
1605 def urlhandle_detect_ext(url_handle): 
1608         getheader = lambda h: url_handle.headers[h] 
1609     except AttributeError:  # Python < 3 
1610         getheader = url_handle.info().getheader 
1612     cd = getheader('Content-Disposition') 
1614         m = re.match(r'attachment;\s*filename="(?P
<filename
>[^
"]+)"', cd) 
1616             e = determine_ext(m.group('filename
'), default_ext=None) 
1620     return getheader('Content
-Type
').split("/")[1] 
1623 def age_restricted(content_limit, age_limit): 
1624     """ Returns True iff the content should be blocked """ 
1626     if age_limit is None:  # No limit set 
1628     if content_limit is None: 
1629         return False  # Content available for everyone 
1630     return age_limit < content_limit 
1633 def is_html(first_bytes): 
1634     """ Detect whether a file contains HTML by examining its first bytes. """ 
1637         (b'\xef\xbb\xbf', 'utf
-8'), 
1638         (b'\x00\x00\xfe\xff', 'utf
-32-be
'), 
1639         (b'\xff\xfe\x00\x00', 'utf
-32-le
'), 
1640         (b'\xff\xfe', 'utf
-16-le
'), 
1641         (b'\xfe\xff', 'utf
-16-be
'), 
1643     for bom, enc in BOMS: 
1644         if first_bytes.startswith(bom): 
1645             s = first_bytes[len(bom):].decode(enc, 'replace
') 
1648         s = first_bytes.decode('utf
-8', 'replace
') 
1650     return re.match(r'^\s
*<', s) 
1653 def determine_protocol(info_dict): 
1654     protocol = info_dict.get('protocol
') 
1655     if protocol is not None: 
1658     url = info_dict['url
'] 
1659     if url.startswith('rtmp
'): 
1661     elif url.startswith('mms
'): 
1663     elif url.startswith('rtsp
'): 
1666     ext = determine_ext(url) 
1672     return compat_urllib_parse_urlparse(url).scheme 
1675 def render_table(header_row, data): 
1676     """ Render a list of rows, each as a list of values """ 
1677     table = [header_row] + data 
1678     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] 
1679     format_str = ' '.join('%-' + compat_str(ml + 1) + 's
' for ml in max_lens[:-1]) + '%s' 
1680     return '\n'.join(format_str % tuple(row) for row in table)