2 # -*- coding: utf-8 -*- 
   4 from __future__ 
import unicode_literals
 
  32 import xml
.etree
.ElementTree
 
  42     compat_socket_create_connection
, 
  46     compat_urllib_parse_urlparse
, 
  47     compat_urllib_request
, 
  53 # This is not clearly defined otherwise 
  54 compiled_regex_type 
= type(re
.compile('')) 
  57     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', 
  58     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  59     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  60     'Accept-Encoding': 'gzip, deflate', 
  61     'Accept-Language': 'en-us,en;q=0.5', 
  65 ENGLISH_MONTH_NAMES 
= [ 
  66     'January', 'February', 'March', 'April', 'May', 'June', 
  67     'July', 'August', 'September', 'October', 'November', 'December'] 
  70 def preferredencoding(): 
  71     """Get preferred encoding. 
  73     Returns the best encoding scheme for the system, based on 
  74     locale.getpreferredencoding() and some further tweaks. 
  77         pref 
= locale
.getpreferredencoding() 
  85 def write_json_file(obj
, fn
): 
  86     """ Encode obj as JSON and write it to fn, atomically if possible """ 
  88     fn 
= encodeFilename(fn
) 
  89     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
  90         encoding 
= get_filesystem_encoding() 
  91         # os.path.basename returns a bytes object, but NamedTemporaryFile 
  92         # will fail if the filename contains non ascii characters unless we 
  93         # use a unicode object 
  94         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
  95         # the same for os.path.dirname 
  96         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
  98         path_basename 
= os
.path
.basename
 
  99         path_dirname 
= os
.path
.dirname
 
 103         'prefix': path_basename(fn
) + '.', 
 104         'dir': path_dirname(fn
), 
 108     # In Python 2.x, json.dump expects a bytestream. 
 109     # In Python 3.x, it writes to a character stream 
 110     if sys
.version_info 
< (3, 0): 
 118     tf 
= tempfile
.NamedTemporaryFile(**args
) 
 123         if sys
.platform 
== 'win32': 
 124             # Need to remove existing file on Windows, else os.rename raises 
 125             # WindowsError or FileExistsError. 
 130         os
.rename(tf
.name
, fn
) 
 139 if sys
.version_info 
>= (2, 7): 
 140     def find_xpath_attr(node
, xpath
, key
, val
): 
 141         """ Find the xpath xpath[@key=val] """ 
 142         assert re
.match(r
'^[a-zA-Z-]+$', key
) 
 143         assert re
.match(r
'^[a-zA-Z0-9@\s:._-]*$', val
) 
 144         expr 
= xpath 
+ "[@%s='%s']" % (key
, val
) 
 145         return node
.find(expr
) 
 147     def find_xpath_attr(node
, xpath
, key
, val
): 
 148         # Here comes the crazy part: In 2.6, if the xpath is a unicode, 
 149         # .//node does not match if a node is a direct child of . ! 
 150         if isinstance(xpath
, compat_str
): 
 151             xpath 
= xpath
.encode('ascii') 
 153         for f 
in node
.findall(xpath
): 
 154             if f
.attrib
.get(key
) == val
: 
 158 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 159 # the namespace parameter 
 162 def xpath_with_ns(path
, ns_map
): 
 163     components 
= [c
.split(':') for c 
in path
.split('/')] 
 167             replaced
.append(c
[0]) 
 170             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 171     return '/'.join(replaced
) 
 174 def xpath_text(node
, xpath
, name
=None, fatal
=False): 
 175     if sys
.version_info 
< (2, 7):  # Crazy 2.6 
 176         xpath 
= xpath
.encode('ascii') 
 179     if n 
is None or n
.text 
is None: 
 181             name 
= xpath 
if name 
is None else name
 
 182             raise ExtractorError('Could not find XML element %s' % name
) 
 188 def get_element_by_id(id, html
): 
 189     """Return the content of the tag with the specified ID in the passed HTML document""" 
 190     return get_element_by_attribute("id", id, html
) 
 193 def get_element_by_attribute(attribute
, value
, html
): 
 194     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 196     m 
= re
.search(r
'''(?xs) 
 198          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? 
 200          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? 
 204     ''' % (re
.escape(attribute
), re
.escape(value
)), html
) 
 208     res 
= m
.group('content') 
 210     if res
.startswith('"') or res
.startswith("'"): 
 213     return unescapeHTML(res
) 
 216 def clean_html(html
): 
 217     """Clean an HTML snippet into a readable string""" 
 219     if html 
is None:  # Convenience for sanitizing descriptions etc. 
 223     html 
= html
.replace('\n', ' ') 
 224     html 
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
) 
 225     html 
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
) 
 227     html 
= re
.sub('<.*?>', '', html
) 
 228     # Replace html entities 
 229     html 
= unescapeHTML(html
) 
 233 def sanitize_open(filename
, open_mode
): 
 234     """Try to open the given filename, and slightly tweak it if this fails. 
 236     Attempts to open the given filename. If this fails, it tries to change 
 237     the filename slightly, step by step, until it's either able to open it 
 238     or it fails and raises a final exception, like the standard open() 
 241     It returns the tuple (stream, definitive_file_name). 
 245             if sys
.platform 
== 'win32': 
 247                 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
) 
 248             return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
) 
 249         stream 
= open(encodeFilename(filename
), open_mode
) 
 250         return (stream
, filename
) 
 251     except (IOError, OSError) as err
: 
 252         if err
.errno 
in (errno
.EACCES
,): 
 255         # In case of error, try to remove win32 forbidden chars 
 256         alt_filename 
= os
.path
.join( 
 257             re
.sub('[/<>:"\\|\\\\?\\*]', '#', path_part
) 
 258             for path_part 
in os
.path
.split(filename
) 
 260         if alt_filename 
== filename
: 
 263             # An exception here should be caught in the caller 
 264             stream 
= open(encodeFilename(filename
), open_mode
) 
 265             return (stream
, alt_filename
) 
 268 def timeconvert(timestr
): 
 269     """Convert RFC 2822 defined time string into system timestamp""" 
 271     timetuple 
= email
.utils
.parsedate_tz(timestr
) 
 272     if timetuple 
is not None: 
 273         timestamp 
= email
.utils
.mktime_tz(timetuple
) 
 277 def sanitize_filename(s
, restricted
=False, is_id
=False): 
 278     """Sanitizes a string so it could be used as part of a filename. 
 279     If restricted is set, use a stricter subset of allowed characters. 
 280     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible 
 282     def replace_insane(char
): 
 283         if char 
== '?' or ord(char
) < 32 or ord(char
) == 127: 
 286             return '' if restricted 
else '\'' 
 288             return '_-' if restricted 
else ' -' 
 289         elif char 
in '\\/|*<>': 
 291         if restricted 
and (char 
in '!&\'()[]{}$;`^,#' or char
.isspace()): 
 293         if restricted 
and ord(char
) > 127: 
 298     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 299     result 
= ''.join(map(replace_insane
, s
)) 
 301         while '__' in result
: 
 302             result 
= result
.replace('__', '_') 
 303         result 
= result
.strip('_') 
 304         # Common case of "Foreign band name - English song title" 
 305         if restricted 
and result
.startswith('-_'): 
 307         if result
.startswith('-'): 
 308             result 
= '_' + result
[len('-'):] 
 314 def orderedSet(iterable
): 
 315     """ Remove all duplicates from the input iterable """ 
 323 def _htmlentity_transform(entity
): 
 324     """Transforms an HTML entity to a character.""" 
 325     # Known non-numeric HTML entity 
 326     if entity 
in compat_html_entities
.name2codepoint
: 
 327         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 329     mobj 
= re
.match(r
'#(x?[0-9]+)', entity
) 
 331         numstr 
= mobj
.group(1) 
 332         if numstr
.startswith('x'): 
 334             numstr 
= '0%s' % numstr
 
 337         return compat_chr(int(numstr
, base
)) 
 339     # Unknown entity in name, return its literal representation 
 340     return ('&%s;' % entity
) 
 346     assert type(s
) == compat_str
 
 349         r
'&([^;]+);', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 352 def encodeFilename(s
, for_subprocess
=False): 
 354     @param s The name of the file 
 357     assert type(s
) == compat_str
 
 359     # Python 3 has a Unicode API 
 360     if sys
.version_info 
>= (3, 0): 
 363     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 364         # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 365         # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 366         # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 367         if not for_subprocess
: 
 370             # For subprocess calls, encode with locale encoding 
 371             # Refer to http://stackoverflow.com/a/9951851/35070 
 372             encoding 
= preferredencoding() 
 374         encoding 
= sys
.getfilesystemencoding() 
 377     return s
.encode(encoding
, 'ignore') 
 380 def encodeArgument(s
): 
 381     if not isinstance(s
, compat_str
): 
 382         # Legacy code that uses byte strings 
 383         # Uncomment the following line after fixing all post processors 
 384         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 385         s 
= s
.decode('ascii') 
 386     return encodeFilename(s
, True) 
 389 def decodeOption(optval
): 
 392     if isinstance(optval
, bytes): 
 393         optval 
= optval
.decode(preferredencoding()) 
 395     assert isinstance(optval
, compat_str
) 
 399 def formatSeconds(secs
): 
 401         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 403         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 408 def make_HTTPS_handler(params
, **kwargs
): 
 409     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 410     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 411         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 412         if opts_no_check_certificate
: 
 413             context
.check_hostname 
= False 
 414             context
.verify_mode 
= ssl
.CERT_NONE
 
 416             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 419             # (create_default_context present but HTTPSHandler has no context=) 
 422     if sys
.version_info 
< (3, 2): 
 423         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 425         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 426         context
.verify_mode 
= (ssl
.CERT_NONE
 
 427                                if opts_no_check_certificate
 
 428                                else ssl
.CERT_REQUIRED
) 
 429         context
.set_default_verify_paths() 
 430         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 433 class ExtractorError(Exception): 
 434     """Error during info extraction.""" 
 436     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 437         """ tb, if given, is the original traceback (so that it can be printed out). 
 438         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 441         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 443         if video_id 
is not None: 
 444             msg 
= video_id 
+ ': ' + msg
 
 446             msg 
+= ' (caused by %r)' % cause
 
 448             if ytdl_is_updateable(): 
 449                 update_cmd 
= 'type  youtube-dl -U  to update' 
 451                 update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 452             msg 
+= '; please report this issue on https://yt-dl.org/bug .' 
 453             msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 454             msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 455         super(ExtractorError
, self
).__init
__(msg
) 
 458         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 460         self
.video_id 
= video_id
 
 462     def format_traceback(self
): 
 463         if self
.traceback 
is None: 
 465         return ''.join(traceback
.format_tb(self
.traceback
)) 
 468 class UnsupportedError(ExtractorError
): 
 469     def __init__(self
, url
): 
 470         super(UnsupportedError
, self
).__init
__( 
 471             'Unsupported URL: %s' % url
, expected
=True) 
 475 class RegexNotFoundError(ExtractorError
): 
 476     """Error when a regex didn't match""" 
 480 class DownloadError(Exception): 
 481     """Download Error exception. 
 483     This exception may be thrown by FileDownloader objects if they are not 
 484     configured to continue on errors. They will contain the appropriate 
 488     def __init__(self
, msg
, exc_info
=None): 
 489         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 490         super(DownloadError
, self
).__init
__(msg
) 
 491         self
.exc_info 
= exc_info
 
 494 class SameFileError(Exception): 
 495     """Same File exception. 
 497     This exception will be thrown by FileDownloader objects if they detect 
 498     multiple files would have to be downloaded to the same file on disk. 
 503 class PostProcessingError(Exception): 
 504     """Post Processing exception. 
 506     This exception may be raised by PostProcessor's .run() method to 
 507     indicate an error in the postprocessing task. 
 510     def __init__(self
, msg
): 
 514 class MaxDownloadsReached(Exception): 
 515     """ --max-downloads limit has been reached. """ 
 519 class UnavailableVideoError(Exception): 
 520     """Unavailable Format exception. 
 522     This exception will be thrown when a video is requested 
 523     in a format that is not available for that video. 
 528 class ContentTooShortError(Exception): 
 529     """Content Too Short exception. 
 531     This exception may be raised by FileDownloader objects when a file they 
 532     download is too small for what the server announced first, indicating 
 533     the connection was probably interrupted. 
 539     def __init__(self
, downloaded
, expected
): 
 540         self
.downloaded 
= downloaded
 
 541         self
.expected 
= expected
 
 544 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 545     hc 
= http_class(*args
, **kwargs
) 
 546     source_address 
= ydl_handler
._params
.get('source_address') 
 547     if source_address 
is not None: 
 548         sa 
= (source_address
, 0) 
 549         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 550             hc
.source_address 
= sa
 
 552             def _hc_connect(self
, *args
, **kwargs
): 
 553                 sock 
= compat_socket_create_connection( 
 554                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 556                     self
.sock 
= ssl
.wrap_socket( 
 557                         sock
, self
.key_file
, self
.cert_file
, 
 558                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 561             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 566 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 567     """Handler for HTTP requests and responses. 
 569     This class, when installed with an OpenerDirector, automatically adds 
 570     the standard headers to every HTTP request and handles gzipped and 
 571     deflated responses from web servers. If compression is to be avoided in 
 572     a particular request, the original request in the program code only has 
 573     to include the HTTP header "Youtubedl-No-Compression", which will be 
 574     removed before making the real request. 
 576     Part of this code was copied from: 
 578     http://techknack.net/python-urllib2-handlers/ 
 580     Andrew Rowls, the author of that code, agreed to release it to the 
 584     def __init__(self
, params
, *args
, **kwargs
): 
 585         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 586         self
._params 
= params
 
 588     def http_open(self
, req
): 
 589         return self
.do_open(functools
.partial( 
 590             _create_http_connection
, self
, compat_http_client
.HTTPConnection
, False), 
 596             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 598             return zlib
.decompress(data
) 
 601     def addinfourl_wrapper(stream
, headers
, url
, code
): 
 602         if hasattr(compat_urllib_request
.addinfourl
, 'getcode'): 
 603             return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
) 
 604         ret 
= compat_urllib_request
.addinfourl(stream
, headers
, url
) 
 608     def http_request(self
, req
): 
 609         for h
, v 
in std_headers
.items(): 
 610             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
 611             # The dict keys are capitalized because of this bug by urllib 
 612             if h
.capitalize() not in req
.headers
: 
 614         if 'Youtubedl-no-compression' in req
.headers
: 
 615             if 'Accept-encoding' in req
.headers
: 
 616                 del req
.headers
['Accept-encoding'] 
 617             del req
.headers
['Youtubedl-no-compression'] 
 619         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 620             # Python 2.6 is brain-dead when it comes to fragments 
 621             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 622             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 626     def http_response(self
, req
, resp
): 
 629         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 630             content 
= resp
.read() 
 631             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 633                 uncompressed 
= io
.BytesIO(gz
.read()) 
 634             except IOError as original_ioerror
: 
 635                 # There may be junk add the end of the file 
 636                 # See http://stackoverflow.com/q/4928560/35070 for details 
 637                 for i 
in range(1, 1024): 
 639                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 640                         uncompressed 
= io
.BytesIO(gz
.read()) 
 645                     raise original_ioerror
 
 646             resp 
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 647             resp
.msg 
= old_resp
.msg
 
 649         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 650             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
 651             resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 652             resp
.msg 
= old_resp
.msg
 
 655     https_request 
= http_request
 
 656     https_response 
= http_response
 
 659 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
 660     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
 661         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
 662         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
 663         self
._params 
= params
 
 665     def https_open(self
, req
): 
 667         if hasattr(self
, '_context'):  # python > 2.6 
 668             kwargs
['context'] = self
._context
 
 669         if hasattr(self
, '_check_hostname'):  # python 3.x 
 670             kwargs
['check_hostname'] = self
._check
_hostname
 
 671         return self
.do_open(functools
.partial( 
 672             _create_http_connection
, self
, self
._https
_conn
_class
, True), 
 676 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
 677     """ Return a UNIX timestamp from the given date """ 
 684             r
'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
 687             timezone 
= datetime
.timedelta() 
 689             date_str 
= date_str
[:-len(m
.group(0))] 
 690             if not m
.group('sign'): 
 691                 timezone 
= datetime
.timedelta() 
 693                 sign 
= 1 if m
.group('sign') == '+' else -1 
 694                 timezone 
= datetime
.timedelta( 
 695                     hours
=sign 
* int(m
.group('hours')), 
 696                     minutes
=sign 
* int(m
.group('minutes'))) 
 697     date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
 698     dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
 699     return calendar
.timegm(dt
.timetuple()) 
 702 def unified_strdate(date_str
, day_first
=True): 
 703     """Return a string with the date in the format YYYYMMDD""" 
 709     date_str 
= date_str
.replace(',', ' ') 
 710     # %z (UTC offset) is only supported in python>=3.2 
 711     date_str 
= re
.sub(r
' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str
) 
 712     # Remove AM/PM + timezone 
 713     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
 715     format_expressions 
= [ 
 720         '%b %dst %Y %I:%M%p', 
 721         '%b %dnd %Y %I:%M%p', 
 722         '%b %dth %Y %I:%M%p', 
 728         '%Y-%m-%d %H:%M:%S.%f', 
 731         '%Y-%m-%dT%H:%M:%SZ', 
 732         '%Y-%m-%dT%H:%M:%S.%fZ', 
 733         '%Y-%m-%dT%H:%M:%S.%f0Z', 
 735         '%Y-%m-%dT%H:%M:%S.%f', 
 739         format_expressions
.extend([ 
 746         format_expressions
.extend([ 
 752     for expression 
in format_expressions
: 
 754             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
 757     if upload_date 
is None: 
 758         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
 760             upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
 764 def determine_ext(url
, default_ext
='unknown_video'): 
 767     guess 
= url
.partition('?')[0].rpartition('.')[2] 
 768     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
 774 def subtitles_filename(filename
, sub_lang
, sub_format
): 
 775     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
 778 def date_from_str(date_str
): 
 780     Return a datetime object from a string in the format YYYYMMDD or 
 781     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
 782     today 
= datetime
.date
.today() 
 783     if date_str 
in ('now', 'today'): 
 785     if date_str 
== 'yesterday': 
 786         return today 
- datetime
.timedelta(days
=1) 
 787     match 
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
 788     if match 
is not None: 
 789         sign 
= match
.group('sign') 
 790         time 
= int(match
.group('time')) 
 793         unit 
= match
.group('unit') 
 794         # A bad aproximation? 
 802         delta 
= datetime
.timedelta(**{unit
: time
}) 
 804     return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date() 
 807 def hyphenate_date(date_str
): 
 809     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
 810     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
 811     if match 
is not None: 
 812         return '-'.join(match
.groups()) 
 817 class DateRange(object): 
 818     """Represents a time interval between two dates""" 
 820     def __init__(self
, start
=None, end
=None): 
 821         """start and end must be strings in the format accepted by date""" 
 822         if start 
is not None: 
 823             self
.start 
= date_from_str(start
) 
 825             self
.start 
= datetime
.datetime
.min.date() 
 827             self
.end 
= date_from_str(end
) 
 829             self
.end 
= datetime
.datetime
.max.date() 
 830         if self
.start 
> self
.end
: 
 831             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
 835         """Returns a range that only contains the given day""" 
 838     def __contains__(self
, date
): 
 839         """Check if the date is in the range""" 
 840         if not isinstance(date
, datetime
.date
): 
 841             date 
= date_from_str(date
) 
 842         return self
.start 
<= date 
<= self
.end
 
 845         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
 849     """ Returns the platform name as a compat_str """ 
 850     res 
= platform
.platform() 
 851     if isinstance(res
, bytes): 
 852         res 
= res
.decode(preferredencoding()) 
 854     assert isinstance(res
, compat_str
) 
 858 def _windows_write_string(s
, out
): 
 859     """ Returns True if the string was written using special methods, 
 860     False if it has yet to be written out.""" 
 861     # Adapted from http://stackoverflow.com/a/3259271/35070 
 864     import ctypes
.wintypes
 
 872         fileno 
= out
.fileno() 
 873     except AttributeError: 
 874         # If the output stream doesn't have a fileno, it's virtual 
 876     except io
.UnsupportedOperation
: 
 877         # Some strange Windows pseudo files? 
 879     if fileno 
not in WIN_OUTPUT_IDS
: 
 882     GetStdHandle 
= ctypes
.WINFUNCTYPE( 
 883         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
 884         (b
"GetStdHandle", ctypes
.windll
.kernel32
)) 
 885     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
 887     WriteConsoleW 
= ctypes
.WINFUNCTYPE( 
 888         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
 889         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
 890         ctypes
.wintypes
.LPVOID
)((b
"WriteConsoleW", ctypes
.windll
.kernel32
)) 
 891     written 
= ctypes
.wintypes
.DWORD(0) 
 893     GetFileType 
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
"GetFileType", ctypes
.windll
.kernel32
)) 
 894     FILE_TYPE_CHAR 
= 0x0002 
 895     FILE_TYPE_REMOTE 
= 0x8000 
 896     GetConsoleMode 
= ctypes
.WINFUNCTYPE( 
 897         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
 898         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
 899         (b
"GetConsoleMode", ctypes
.windll
.kernel32
)) 
 900     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
 902     def not_a_console(handle
): 
 903         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
 905         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR 
or 
 906                 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
 911     def next_nonbmp_pos(s
): 
 913             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
 914         except StopIteration: 
 918         count 
= min(next_nonbmp_pos(s
), 1024) 
 921             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
 923             raise OSError('Failed to write string') 
 924         if not count
:  # We just wrote a non-BMP character 
 925             assert written
.value 
== 2 
 928             assert written
.value 
> 0 
 929             s 
= s
[written
.value
:] 
 933 def write_string(s
, out
=None, encoding
=None): 
 936     assert type(s
) == compat_str
 
 938     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
 939         if _windows_write_string(s
, out
): 
 942     if ('b' in getattr(out
, 'mode', '') or 
 943             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
 944         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
 946     elif hasattr(out
, 'buffer'): 
 947         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
 948         byt 
= s
.encode(enc
, 'ignore') 
 949         out
.buffer.write(byt
) 
 955 def bytes_to_intlist(bs
): 
 958     if isinstance(bs
[0], int):  # Python 3 
 961         return [ord(c
) for c 
in bs
] 
 964 def intlist_to_bytes(xs
): 
 967     return struct_pack('%dB' % len(xs
), *xs
) 
 970 # Cross-platform file locking 
 971 if sys
.platform 
== 'win32': 
 972     import ctypes
.wintypes
 
 975     class OVERLAPPED(ctypes
.Structure
): 
 977             ('Internal', ctypes
.wintypes
.LPVOID
), 
 978             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
 979             ('Offset', ctypes
.wintypes
.DWORD
), 
 980             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
 981             ('hEvent', ctypes
.wintypes
.HANDLE
), 
 984     kernel32 
= ctypes
.windll
.kernel32
 
 985     LockFileEx 
= kernel32
.LockFileEx
 
 986     LockFileEx
.argtypes 
= [ 
 987         ctypes
.wintypes
.HANDLE
,     # hFile 
 988         ctypes
.wintypes
.DWORD
,      # dwFlags 
 989         ctypes
.wintypes
.DWORD
,      # dwReserved 
 990         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
 991         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
 992         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
 994     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
 995     UnlockFileEx 
= kernel32
.UnlockFileEx
 
 996     UnlockFileEx
.argtypes 
= [ 
 997         ctypes
.wintypes
.HANDLE
,     # hFile 
 998         ctypes
.wintypes
.DWORD
,      # dwReserved 
 999         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1000         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1001         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1003     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1004     whole_low 
= 0xffffffff 
1005     whole_high 
= 0x7fffffff 
1007     def _lock_file(f
, exclusive
): 
1008         overlapped 
= OVERLAPPED() 
1009         overlapped
.Offset 
= 0 
1010         overlapped
.OffsetHigh 
= 0 
1011         overlapped
.hEvent 
= 0 
1012         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1013         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1014         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1015                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1016             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1018     def _unlock_file(f
): 
1019         assert f
._lock
_file
_overlapped
_p
 
1020         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1021         if not UnlockFileEx(handle
, 0, 
1022                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1023             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1028     def _lock_file(f
, exclusive
): 
1029         fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1031     def _unlock_file(f
): 
1032         fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1035 class locked_file(object): 
1036     def __init__(self
, filename
, mode
, encoding
=None): 
1037         assert mode 
in ['r', 'a', 'w'] 
1038         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1041     def __enter__(self
): 
1042         exclusive 
= self
.mode 
!= 'r' 
1044             _lock_file(self
.f
, exclusive
) 
1050     def __exit__(self
, etype
, value
, traceback
): 
1052             _unlock_file(self
.f
) 
1059     def write(self
, *args
): 
1060         return self
.f
.write(*args
) 
1062     def read(self
, *args
): 
1063         return self
.f
.read(*args
) 
1066 def get_filesystem_encoding(): 
1067     encoding 
= sys
.getfilesystemencoding() 
1068     return encoding 
if encoding 
is not None else 'utf-8' 
1071 def shell_quote(args
): 
1073     encoding 
= get_filesystem_encoding() 
1075         if isinstance(a
, bytes): 
1076             # We may get a filename encoded with 'encodeFilename' 
1077             a 
= a
.decode(encoding
) 
1078         quoted_args
.append(pipes
.quote(a
)) 
1079     return ' '.join(quoted_args
) 
1082 def takewhile_inclusive(pred
, seq
): 
1083     """ Like itertools.takewhile, but include the latest evaluated element 
1084         (the first element so that Not pred(e)) """ 
1091 def smuggle_url(url
, data
): 
1092     """ Pass additional data in a URL for internal use. """ 
1094     sdata 
= compat_urllib_parse
.urlencode( 
1095         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1096     return url 
+ '#' + sdata
 
1099 def unsmuggle_url(smug_url
, default
=None): 
1100     if '#__youtubedl_smuggle' not in smug_url
: 
1101         return smug_url
, default
 
1102     url
, _
, sdata 
= smug_url
.rpartition('#') 
1103     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1104     data 
= json
.loads(jsond
) 
1108 def format_bytes(bytes): 
1111     if type(bytes) is str: 
1112         bytes = float(bytes) 
1116         exponent 
= int(math
.log(bytes, 1024.0)) 
1117     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1118     converted 
= float(bytes) / float(1024 ** exponent
) 
1119     return '%.2f%s' % (converted
, suffix
) 
1122 def parse_filesize(s
): 
1126     # The lower-case forms are of course incorrect and inofficial, 
1127     # but we support those too 
1165     units_re 
= '|'.join(re
.escape(u
) for u 
in _UNIT_TABLE
) 
1167         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re
, s
) 
1171     num_str 
= m
.group('num').replace(',', '.') 
1172     mult 
= _UNIT_TABLE
[m
.group('unit')] 
1173     return int(float(num_str
) * mult
) 
1176 def get_term_width(): 
1177     columns 
= compat_getenv('COLUMNS', None) 
1182         sp 
= subprocess
.Popen( 
1184             stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
) 
1185         out
, err 
= sp
.communicate() 
1186         return int(out
.split()[1]) 
1192 def month_by_name(name
): 
1193     """ Return the number of a month by (locale-independently) English name """ 
1196         return ENGLISH_MONTH_NAMES
.index(name
) + 1 
1201 def month_by_abbreviation(abbrev
): 
1202     """ Return the number of a month by (locale-independently) English 
1206         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
1211 def fix_xml_ampersands(xml_str
): 
1212     """Replace all the '&' by '&' in XML""" 
1214         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1219 def setproctitle(title
): 
1220     assert isinstance(title
, compat_str
) 
1222         libc 
= ctypes
.cdll
.LoadLibrary("libc.so.6") 
1225     title_bytes 
= title
.encode('utf-8') 
1226     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1227     buf
.value 
= title_bytes
 
1229         libc
.prctl(15, buf
, 0, 0, 0) 
1230     except AttributeError: 
1231         return  # Strange libc, just skip this 
1234 def remove_start(s
, start
): 
1235     if s
.startswith(start
): 
1236         return s
[len(start
):] 
1240 def remove_end(s
, end
): 
1242         return s
[:-len(end
)] 
1246 def url_basename(url
): 
1247     path 
= compat_urlparse
.urlparse(url
).path
 
1248     return path
.strip('/').split('/')[-1] 
1251 class HEADRequest(compat_urllib_request
.Request
): 
1252     def get_method(self
): 
1256 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1259             v 
= getattr(v
, get_attr
, None) 
1262     return default 
if v 
is None else (int(v
) * invscale 
// scale
) 
1265 def str_or_none(v
, default
=None): 
1266     return default 
if v 
is None else compat_str(v
) 
1269 def str_to_int(int_str
): 
1270     """ A more relaxed version of int_or_none """ 
1273     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1277 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1278     return default 
if v 
is None else (float(v
) * invscale 
/ scale
) 
1281 def parse_duration(s
): 
1282     if not isinstance(s
, compat_basestring
): 
1290             (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*| 
1291             (?P<only_hours>[0-9.]+)\s*(?:hours?)| 
1293             \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*| 
1296                     (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)? 
1297                     (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s* 
1299                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s* 
1301             (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)? 
1306     if m
.group('only_mins'): 
1307         return float_or_none(m
.group('only_mins'), invscale
=60) 
1308     if m
.group('only_hours'): 
1309         return float_or_none(m
.group('only_hours'), invscale
=60 * 60) 
1311         res 
+= int(m
.group('secs')) 
1312     if m
.group('mins_reversed'): 
1313         res 
+= int(m
.group('mins_reversed')) * 60 
1315         res 
+= int(m
.group('mins')) * 60 
1316     if m
.group('hours'): 
1317         res 
+= int(m
.group('hours')) * 60 * 60 
1318     if m
.group('hours_reversed'): 
1319         res 
+= int(m
.group('hours_reversed')) * 60 * 60 
1321         res 
+= int(m
.group('days')) * 24 * 60 * 60 
1323         res 
+= float(m
.group('ms')) 
1327 def prepend_extension(filename
, ext
): 
1328     name
, real_ext 
= os
.path
.splitext(filename
) 
1329     return '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1332 def check_executable(exe
, args
=[]): 
1333     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1334     args can be a list of arguments for a short output (like -version) """ 
1336         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1342 def get_exe_version(exe
, args
=['--version'], 
1343                     version_re
=None, unrecognized
='present'): 
1344     """ Returns the version of the specified executable, 
1345     or False if the executable is not present """ 
1347         out
, _ 
= subprocess
.Popen( 
1349             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1352     if isinstance(out
, bytes):  # Python 2.x 
1353         out 
= out
.decode('ascii', 'ignore') 
1354     return detect_exe_version(out
, version_re
, unrecognized
) 
1357 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
1358     assert isinstance(output
, compat_str
) 
1359     if version_re 
is None: 
1360         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
1361     m 
= re
.search(version_re
, output
) 
1368 class PagedList(object): 
1370         # This is only useful for tests 
1371         return len(self
.getslice()) 
1374 class OnDemandPagedList(PagedList
): 
1375     def __init__(self
, pagefunc
, pagesize
): 
1376         self
._pagefunc 
= pagefunc
 
1377         self
._pagesize 
= pagesize
 
1379     def getslice(self
, start
=0, end
=None): 
1381         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1382             firstid 
= pagenum 
* self
._pagesize
 
1383             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
1384             if start 
>= nextfirstid
: 
1387             page_results 
= list(self
._pagefunc
(pagenum
)) 
1390                 start 
% self
._pagesize
 
1391                 if firstid 
<= start 
< nextfirstid
 
1395                 ((end 
- 1) % self
._pagesize
) + 1 
1396                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
1399             if startv 
!= 0 or endv 
is not None: 
1400                 page_results 
= page_results
[startv
:endv
] 
1401             res
.extend(page_results
) 
1403             # A little optimization - if current page is not "full", ie. does 
1404             # not contain page_size videos then we can assume that this page 
1405             # is the last one - there are no more ids on further pages - 
1406             # i.e. no need to query again. 
1407             if len(page_results
) + startv 
< self
._pagesize
: 
1410             # If we got the whole page, but the next page is not interesting, 
1411             # break out early as well 
1412             if end 
== nextfirstid
: 
1417 class InAdvancePagedList(PagedList
): 
1418     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
1419         self
._pagefunc 
= pagefunc
 
1420         self
._pagecount 
= pagecount
 
1421         self
._pagesize 
= pagesize
 
1423     def getslice(self
, start
=0, end
=None): 
1425         start_page 
= start 
// self
._pagesize
 
1427             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
1428         skip_elems 
= start 
- start_page 
* self
._pagesize
 
1429         only_more 
= None if end 
is None else end 
- start
 
1430         for pagenum 
in range(start_page
, end_page
): 
1431             page 
= list(self
._pagefunc
(pagenum
)) 
1433                 page 
= page
[skip_elems
:] 
1435             if only_more 
is not None: 
1436                 if len(page
) < only_more
: 
1437                     only_more 
-= len(page
) 
1439                     page 
= page
[:only_more
] 
1446 def uppercase_escape(s
): 
1447     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
1449         r
'\\U[0-9a-fA-F]{8}', 
1450         lambda m
: unicode_escape(m
.group(0))[0], 
1454 def escape_rfc3986(s
): 
1455     """Escape non-ASCII characters as suggested by RFC 3986""" 
1456     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
1457         s 
= s
.encode('utf-8') 
1458     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
1461 def escape_url(url
): 
1462     """Escape URL as suggested by RFC 3986""" 
1463     url_parsed 
= compat_urllib_parse_urlparse(url
) 
1464     return url_parsed
._replace
( 
1465         path
=escape_rfc3986(url_parsed
.path
), 
1466         params
=escape_rfc3986(url_parsed
.params
), 
1467         query
=escape_rfc3986(url_parsed
.query
), 
1468         fragment
=escape_rfc3986(url_parsed
.fragment
) 
1472     struct
.pack('!I', 0) 
1474     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument 
1475     def struct_pack(spec
, *args
): 
1476         if isinstance(spec
, compat_str
): 
1477             spec 
= spec
.encode('ascii') 
1478         return struct
.pack(spec
, *args
) 
1480     def struct_unpack(spec
, *args
): 
1481         if isinstance(spec
, compat_str
): 
1482             spec 
= spec
.encode('ascii') 
1483         return struct
.unpack(spec
, *args
) 
1485     struct_pack 
= struct
.pack
 
1486     struct_unpack 
= struct
.unpack
 
1489 def read_batch_urls(batch_fd
): 
1491         if not isinstance(url
, compat_str
): 
1492             url 
= url
.decode('utf-8', 'replace') 
1493         BOM_UTF8 
= '\xef\xbb\xbf' 
1494         if url
.startswith(BOM_UTF8
): 
1495             url 
= url
[len(BOM_UTF8
):] 
1497         if url
.startswith(('#', ';', ']')): 
1501     with contextlib
.closing(batch_fd
) as fd
: 
1502         return [url 
for url 
in map(fixup
, fd
) if url
] 
1505 def urlencode_postdata(*args
, **kargs
): 
1506     return compat_urllib_parse
.urlencode(*args
, **kargs
).encode('ascii') 
1510     etree_iter 
= xml
.etree
.ElementTree
.Element
.iter 
1511 except AttributeError:  # Python <=2.6 
1512     etree_iter 
= lambda n
: n
.findall('.//*') 
1516     class TreeBuilder(xml
.etree
.ElementTree
.TreeBuilder
): 
1517         def doctype(self
, name
, pubid
, system
): 
1518             pass  # Ignore doctypes 
1520     parser 
= xml
.etree
.ElementTree
.XMLParser(target
=TreeBuilder()) 
1521     kwargs 
= {'parser': parser
} if sys
.version_info 
>= (2, 7) else {} 
1522     tree 
= xml
.etree
.ElementTree
.XML(s
.encode('utf-8'), **kwargs
) 
1523     # Fix up XML parser in Python 2.x 
1524     if sys
.version_info 
< (3, 0): 
1525         for n 
in etree_iter(tree
): 
1526             if n
.text 
is not None: 
1527                 if not isinstance(n
.text
, compat_str
): 
1528                     n
.text 
= n
.text
.decode('utf-8') 
1541 def parse_age_limit(s
): 
1544     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
1545     return int(m
.group('age')) if m 
else US_RATINGS
.get(s
, None) 
1548 def strip_jsonp(code
): 
1550         r
'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
) 
1553 def js_to_json(code
): 
1556         if v 
in ('true', 'false', 'null'): 
1558         if v
.startswith('"'): 
1560         if v
.startswith("'"): 
1562             v 
= re
.sub(r
"\\\\|\\'|\"", lambda m: { 
1569     res = re.sub(r'''(?x) 
1570         "(?
:[^
"\\]*(?:\\\\|\\['"nu
]))*[^
"\\]*"|
 
1571         '(?:[^'\\]*(?
:\\\\|
\\['"nu]))*[^'\\]*'| 
1572         [a-zA-Z_][.a-zA-Z_0-9]* 
1574     res = re.sub(r',(\s
*\
])', lambda m: m.group(1), res) 
1578 def qualities(quality_ids): 
1579     """ Get a numeric quality value out of a list of possible values """ 
1582             return quality_ids.index(qid) 
1588 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s' 
1591 def limit_length(s, length): 
1592     """ Add ellipses to overly long strings """ 
1597         return s[:length - len(ELLIPSES)] + ELLIPSES 
1601 def version_tuple(v): 
1602     return tuple(int(e) for e in re.split(r'[-.]', v)) 
1605 def is_outdated_version(version, limit, assume_new=True): 
1607         return not assume_new 
1609         return version_tuple(version) < version_tuple(limit) 
1611         return not assume_new 
1614 def ytdl_is_updateable(): 
1615     """ Returns if youtube-dl can be updated with -U """ 
1616     from zipimport import zipimporter 
1618     return isinstance(globals().get('__loader__
'), zipimporter) or hasattr(sys, 'frozen
') 
1621 def args_to_str(args): 
1622     # Get a short string representation for a subprocess command 
1623     return ' '.join(shlex_quote(a) for a in args) 
1626 def mimetype2ext(mt): 
1627     _, _, res = mt.rpartition('/') 
1631         'x
-mp4
-fragmented
': 'mp4
', 
1635 def urlhandle_detect_ext(url_handle): 
1638         getheader = lambda h: url_handle.headers[h] 
1639     except AttributeError:  # Python < 3 
1640         getheader = url_handle.info().getheader 
1642     cd = getheader('Content
-Disposition
') 
1644         m = re.match(r'attachment
;\s
*filename
="(?P<filename>[^"]+)"', cd) 
1646             e = determine_ext(m.group('filename'), default_ext=None) 
1650     return mimetype2ext(getheader('Content-Type')) 
1653 def age_restricted(content_limit, age_limit): 
1654     """ Returns True iff the content should be blocked """ 
1656     if age_limit is None:  # No limit set 
1658     if content_limit is None: 
1659         return False  # Content available for everyone 
1660     return age_limit < content_limit 
1663 def is_html(first_bytes): 
1664     """ Detect whether a file contains HTML by examining its first bytes. """ 
1667         (b'\xef\xbb\xbf', 'utf-8'), 
1668         (b'\x00\x00\xfe\xff', 'utf-32-be'), 
1669         (b'\xff\xfe\x00\x00', 'utf-32-le'), 
1670         (b'\xff\xfe', 'utf-16-le'), 
1671         (b'\xfe\xff', 'utf-16-be'), 
1673     for bom, enc in BOMS: 
1674         if first_bytes.startswith(bom): 
1675             s = first_bytes[len(bom):].decode(enc, 'replace') 
1678         s = first_bytes.decode('utf-8', 'replace') 
1680     return re.match(r'^\s*<', s) 
1683 def determine_protocol(info_dict): 
1684     protocol = info_dict.get('protocol') 
1685     if protocol is not None: 
1688     url = info_dict['url'] 
1689     if url.startswith('rtmp'): 
1691     elif url.startswith('mms'): 
1693     elif url.startswith('rtsp'): 
1696     ext = determine_ext(url) 
1702     return compat_urllib_parse_urlparse(url).scheme 
1705 def render_table(header_row, data): 
1706     """ Render a list of rows, each as a list of values """ 
1707     table = [header_row] + data 
1708     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] 
1709     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s' 
1710     return '\n'.join(format_str % tuple(row) for row in table) 
1713 def _match_one(filter_part, dct): 
1714     COMPARISON_OPERATORS = { 
1722     operator_rex = re.compile(r'''(?x)\s* 
1724         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
1726             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
1727             (?P<strval>(?![0-9.])[a-z0-9A-Z]*) 
1730         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) 
1731     m = operator_rex.search(filter_part) 
1733         op = COMPARISON_OPERATORS[m.group('op')] 
1734         if m.group('strval') is not None: 
1735             if m.group('op') not in ('=', '!='): 
1737                     'Operator %s does not support string values!' % m.group('op')) 
1738             comparison_value = m.group('strval') 
1741                 comparison_value = int(m.group('intval')) 
1743                 comparison_value = parse_filesize(m.group('intval')) 
1744                 if comparison_value is None: 
1745                     comparison_value = parse_filesize(m.group('intval') + 'B') 
1746                 if comparison_value is None: 
1748                         'Invalid integer value %r in filter part %r' % ( 
1749                             m.group('intval'), filter_part)) 
1750         actual_value = dct.get(m.group('key')) 
1751         if actual_value is None: 
1752             return m.group('none_inclusive') 
1753         return op(actual_value, comparison_value) 
1756         '': lambda v: v is not None, 
1757         '!': lambda v: v is None, 
1759     operator_rex = re.compile(r'''(?x)\s* 
1760         (?P<op>%s)\s*(?P<key>[a-z_]+) 
1762         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) 
1763     m = operator_rex.search(filter_part) 
1765         op = UNARY_OPERATORS[m.group('op')] 
1766         actual_value = dct.get(m.group('key')) 
1767         return op(actual_value) 
1769     raise ValueError('Invalid filter part %r' % filter_part) 
1772 def match_str(filter_str, dct): 
1773     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
1776         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) 
1779 def match_filter_func(filter_str): 
1780     def _match_func(info_dict): 
1781         if match_str(filter_str, info_dict): 
1784             video_title = info_dict.get('title', info_dict.get('id', 'video')) 
1785             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)