4 from __future__ 
import unicode_literals
 
  34 import xml
.etree
.ElementTree
 
  38     compat_HTMLParseError
, 
  42     compat_ctypes_WINFUNCTYPE
, 
  43     compat_etree_fromstring
, 
  46     compat_html_entities_html5
, 
  52     compat_socket_create_connection
, 
  58     compat_urllib_parse_urlencode
, 
  59     compat_urllib_parse_urlparse
, 
  60     compat_urllib_parse_unquote_plus
, 
  61     compat_urllib_request
, 
  72 def register_socks_protocols(): 
  73     # "Register" SOCKS protocols 
  74     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 
  75     # URLs with protocols not in urlparse.uses_netloc are not handled correctly 
  76     for scheme 
in ('socks', 'socks4', 'socks4a', 'socks5'): 
  77         if scheme 
not in compat_urlparse
.uses_netloc
: 
  78             compat_urlparse
.uses_netloc
.append(scheme
) 
  81 # This is not clearly defined otherwise 
  82 compiled_regex_type 
= type(re
.compile('')) 
  85     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)', 
  86     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  87     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  88     'Accept-Encoding': 'gzip, deflate', 
  89     'Accept-Language': 'en-us,en;q=0.5', 
  94     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', 
 100 ENGLISH_MONTH_NAMES 
= [ 
 101     'January', 'February', 'March', 'April', 'May', 'June', 
 102     'July', 'August', 'September', 'October', 'November', 'December'] 
 105     'en': ENGLISH_MONTH_NAMES
, 
 107         'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 
 108         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], 
 112     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 
 113     'flv', 'f4v', 'f4a', 'f4b', 
 114     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 
 115     'mkv', 'mka', 'mk3d', 
 124     'f4f', 'f4m', 'm3u8', 'smil') 
 126 # needed for sanitizing filenames in restricted mode 
 127 ACCENT_CHARS 
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', 
 128                         itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], 
 129                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 
 152     '%Y-%m-%d %H:%M:%S.%f', 
 155     '%Y-%m-%dT%H:%M:%SZ', 
 156     '%Y-%m-%dT%H:%M:%S.%fZ', 
 157     '%Y-%m-%dT%H:%M:%S.%f0Z', 
 159     '%Y-%m-%dT%H:%M:%S.%f', 
 162     '%b %d %Y at %H:%M:%S', 
 164     '%B %d %Y at %H:%M:%S', 
 167 DATE_FORMATS_DAY_FIRST 
= list(DATE_FORMATS
) 
 168 DATE_FORMATS_DAY_FIRST
.extend([ 
 177 DATE_FORMATS_MONTH_FIRST 
= list(DATE_FORMATS
) 
 178 DATE_FORMATS_MONTH_FIRST
.extend([ 
 186 PACKED_CODES_RE 
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" 
 189 def preferredencoding(): 
 190     """Get preferred encoding. 
 192     Returns the best encoding scheme for the system, based on 
 193     locale.getpreferredencoding() and some further tweaks. 
 196         pref 
= locale
.getpreferredencoding() 
 204 def write_json_file(obj
, fn
): 
 205     """ Encode obj as JSON and write it to fn, atomically if possible """ 
 207     fn 
= encodeFilename(fn
) 
 208     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
 209         encoding 
= get_filesystem_encoding() 
 210         # os.path.basename returns a bytes object, but NamedTemporaryFile 
 211         # will fail if the filename contains non ascii characters unless we 
 212         # use a unicode object 
 213         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
 214         # the same for os.path.dirname 
 215         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
 217         path_basename 
= os
.path
.basename
 
 218         path_dirname 
= os
.path
.dirname
 
 222         'prefix': path_basename(fn
) + '.', 
 223         'dir': path_dirname(fn
), 
 227     # In Python 2.x, json.dump expects a bytestream. 
 228     # In Python 3.x, it writes to a character stream 
 229     if sys
.version_info 
< (3, 0): 
 237     tf 
= tempfile
.NamedTemporaryFile(**compat_kwargs(args
)) 
 242         if sys
.platform 
== 'win32': 
 243             # Need to remove existing file on Windows, else os.rename raises 
 244             # WindowsError or FileExistsError. 
 249         os
.rename(tf
.name
, fn
) 
 258 if sys
.version_info 
>= (2, 7): 
 259     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 260         """ Find the xpath xpath[@key=val] """ 
 261         assert re
.match(r
'^[a-zA-Z_-]+$', key
) 
 262         expr 
= xpath 
+ ('[@%s]' % key 
if val 
is None else "[@%s='%s']" % (key
, val
)) 
 263         return node
.find(expr
) 
 265     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 266         for f 
in node
.findall(compat_xpath(xpath
)): 
 267             if key 
not in f
.attrib
: 
 269             if val 
is None or f
.attrib
.get(key
) == val
: 
 273 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 274 # the namespace parameter 
 277 def xpath_with_ns(path
, ns_map
): 
 278     components 
= [c
.split(':') for c 
in path
.split('/')] 
 282             replaced
.append(c
[0]) 
 285             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 286     return '/'.join(replaced
) 
 289 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 290     def _find_xpath(xpath
): 
 291         return node
.find(compat_xpath(xpath
)) 
 293     if isinstance(xpath
, (str, compat_str
)): 
 294         n 
= _find_xpath(xpath
) 
 302         if default 
is not NO_DEFAULT
: 
 305             name 
= xpath 
if name 
is None else name
 
 306             raise ExtractorError('Could not find XML element %s' % name
) 
 312 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 313     n 
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
) 
 314     if n 
is None or n 
== default
: 
 317         if default 
is not NO_DEFAULT
: 
 320             name 
= xpath 
if name 
is None else name
 
 321             raise ExtractorError('Could not find XML element\'s text %s' % name
) 
 327 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 328     n 
= find_xpath_attr(node
, xpath
, key
) 
 330         if default 
is not NO_DEFAULT
: 
 333             name 
= '%s[@%s]' % (xpath
, key
) if name 
is None else name
 
 334             raise ExtractorError('Could not find XML attribute %s' % name
) 
 340 def get_element_by_id(id, html
): 
 341     """Return the content of the tag with the specified ID in the passed HTML document""" 
 342     return get_element_by_attribute('id', id, html
) 
 345 def get_element_by_class(class_name
, html
): 
 346     """Return the content of the first tag with the specified class in the passed HTML document""" 
 347     retval 
= get_elements_by_class(class_name
, html
) 
 348     return retval
[0] if retval 
else None 
 351 def get_element_by_attribute(attribute
, value
, html
, escape_value
=True): 
 352     retval 
= get_elements_by_attribute(attribute
, value
, html
, escape_value
) 
 353     return retval
[0] if retval 
else None 
 356 def get_elements_by_class(class_name
, html
): 
 357     """Return the content of all tags with the specified class in the passed HTML document as a list""" 
 358     return get_elements_by_attribute( 
 359         'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 
 360         html, escape_value=False) 
 363 def get_elements_by_attribute(attribute, value, html, escape_value=True): 
 364     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 366     value = re.escape(value) if escape_value else value 
 369     for m in re.finditer(r'''(?xs) 
 371          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'|
))*?
 
 373          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'|))*? 
 377     ''' % (re.escape(attribute), value), html): 
 378         res = m.group('content
') 
 380         if res.startswith('"') or res.startswith("'"): 
 383         retlist.append(unescapeHTML(res)) 
 388 class HTMLAttributeParser(compat_HTMLParser): 
 389     """Trivial HTML parser to gather the attributes for a single element""" 
 392         compat_HTMLParser.__init__(self) 
 394     def handle_starttag(self, tag, attrs): 
 395         self.attrs = dict(attrs) 
 398 def extract_attributes(html_element): 
 399     """Given a string for an HTML element such as 
 401          a="foo" B="bar" c="&98;az" d=boz 
 402          empty= noval entity="&" 
 405     Decode and return a dictionary of attributes. 
 407         'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
', 
 408         'empty
': '', 'noval
': None, 'entity
': '&', 
 409         'sq
': '"', 'dq': '\'' 
 411     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 
 412     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 
 414     parser = HTMLAttributeParser() 
 416         parser.feed(html_element) 
 418     # Older Python may throw HTMLParseError in case of malformed HTML 
 419     except compat_HTMLParseError: 
 424 def clean_html(html): 
 425     """Clean an HTML snippet into a readable string""" 
 427     if html is None:  # Convenience for sanitizing descriptions etc. 
 431     html = html.replace('\n', ' ') 
 432     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) 
 433     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 
 435     html = re.sub('<.*?>', '', html) 
 436     # Replace html entities 
 437     html = unescapeHTML(html) 
 441 def sanitize_open(filename, open_mode): 
 442     """Try to open the given filename, and slightly tweak it if this fails. 
 444     Attempts to open the given filename. If this fails, it tries to change 
 445     the filename slightly, step by step, until it's either able to open it 
 446     or it fails and raises a final exception, like the standard open() 
 449     It returns the tuple (stream, definitive_file_name). 
 453             if sys.platform == 'win32': 
 455                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 
 456             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 
 457         stream = open(encodeFilename(filename), open_mode) 
 458         return (stream, filename) 
 459     except (IOError, OSError) as err: 
 460         if err.errno in (errno.EACCES,): 
 463         # In case of error, try to remove win32 forbidden chars 
 464         alt_filename = sanitize_path(filename) 
 465         if alt_filename == filename: 
 468             # An exception here should be caught in the caller 
 469             stream = open(encodeFilename(alt_filename), open_mode) 
 470             return (stream, alt_filename) 
 473 def timeconvert(timestr): 
 474     """Convert RFC 2822 defined time string into system timestamp""" 
 476     timetuple = email.utils.parsedate_tz(timestr) 
 477     if timetuple is not None: 
 478         timestamp = email.utils.mktime_tz(timetuple) 
 482 def sanitize_filename(s, restricted=False, is_id=False): 
 483     """Sanitizes a string so it could be used as part of a filename. 
 484     If restricted is set, use a stricter subset of allowed characters. 
 485     Set is_id if this is not an arbitrary string, but an ID that should be kept 
 488     def replace_insane(char): 
 489         if restricted and char in ACCENT_CHARS: 
 490             return ACCENT_CHARS[char] 
 491         if char == '?' or ord(char) < 32 or ord(char) == 127: 
 494             return '' if restricted else '\'' 
 496             return '_
-' if restricted else ' -' 
 497         elif char in '\\/|
*<>': 
 499         if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()): 
 501         if restricted 
and ord(char
) > 127: 
 506     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 507     result 
= ''.join(map(replace_insane
, s
)) 
 509         while '__' in result
: 
 510             result 
= result
.replace('__', '_') 
 511         result 
= result
.strip('_') 
 512         # Common case of "Foreign band name - English song title" 
 513         if restricted 
and result
.startswith('-_'): 
 515         if result
.startswith('-'): 
 516             result 
= '_' + result
[len('-'):] 
 517         result 
= result
.lstrip('.') 
 523 def sanitize_path(s
): 
 524     """Sanitizes and normalizes path on Windows""" 
 525     if sys
.platform 
!= 'win32': 
 527     drive_or_unc
, _ 
= os
.path
.splitdrive(s
) 
 528     if sys
.version_info 
< (2, 7) and not drive_or_unc
: 
 529         drive_or_unc
, _ 
= os
.path
.splitunc(s
) 
 530     norm_path 
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
) 
 534         path_part 
if path_part 
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
) 
 535         for path_part 
in norm_path
] 
 537         sanitized_path
.insert(0, drive_or_unc 
+ os
.path
.sep
) 
 538     return os
.path
.join(*sanitized_path
) 
 541 def sanitize_url(url
): 
 542     # Prepend protocol-less URLs with `http:` scheme in order to mitigate 
 543     # the number of unwanted failures due to missing protocol 
 544     if url
.startswith('//'): 
 545         return 'http:%s' % url
 
 546     # Fix some common typos seen so far 
 548         # https://github.com/rg3/youtube-dl/issues/15649 
 549         (r
'^httpss://', r
'https://'), 
 550         # https://bx1.be/lives/direct-tv/ 
 551         (r
'^rmtp([es]?)://', r
'rtmp\1://'), 
 553     for mistake
, fixup 
in COMMON_TYPOS
: 
 554         if re
.match(mistake
, url
): 
 555             return re
.sub(mistake
, fixup
, url
) 
 559 def sanitized_Request(url
, *args
, **kwargs
): 
 560     return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
) 
 564     """Expand shell variables and ~""" 
 565     return os
.path
.expandvars(compat_expanduser(s
)) 
 568 def orderedSet(iterable
): 
 569     """ Remove all duplicates from the input iterable """ 
 577 def _htmlentity_transform(entity_with_semicolon
): 
 578     """Transforms an HTML entity to a character.""" 
 579     entity 
= entity_with_semicolon
[:-1] 
 581     # Known non-numeric HTML entity 
 582     if entity 
in compat_html_entities
.name2codepoint
: 
 583         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 585     # TODO: HTML5 allows entities without a semicolon. For example, 
 586     # 'Éric' should be decoded as 'Éric'. 
 587     if entity_with_semicolon 
in compat_html_entities_html5
: 
 588         return compat_html_entities_html5
[entity_with_semicolon
] 
 590     mobj 
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
) 
 592         numstr 
= mobj
.group(1) 
 593         if numstr
.startswith('x'): 
 595             numstr 
= '0%s' % numstr
 
 598         # See https://github.com/rg3/youtube-dl/issues/7518 
 600             return compat_chr(int(numstr
, base
)) 
 604     # Unknown entity in name, return its literal representation 
 605     return '&%s;' % entity
 
 611     assert type(s
) == compat_str
 
 614         r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 617 def get_subprocess_encoding(): 
 618     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 619         # For subprocess calls, encode with locale encoding 
 620         # Refer to http://stackoverflow.com/a/9951851/35070 
 621         encoding 
= preferredencoding() 
 623         encoding 
= sys
.getfilesystemencoding() 
 629 def encodeFilename(s
, for_subprocess
=False): 
 631     @param s The name of the file 
 634     assert type(s
) == compat_str
 
 636     # Python 3 has a Unicode API 
 637     if sys
.version_info 
>= (3, 0): 
 640     # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 641     # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 642     # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 643     if not for_subprocess 
and sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 646     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 
 647     if sys
.platform
.startswith('java'): 
 650     return s
.encode(get_subprocess_encoding(), 'ignore') 
 653 def decodeFilename(b
, for_subprocess
=False): 
 655     if sys
.version_info 
>= (3, 0): 
 658     if not isinstance(b
, bytes): 
 661     return b
.decode(get_subprocess_encoding(), 'ignore') 
 664 def encodeArgument(s
): 
 665     if not isinstance(s
, compat_str
): 
 666         # Legacy code that uses byte strings 
 667         # Uncomment the following line after fixing all post processors 
 668         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 669         s 
= s
.decode('ascii') 
 670     return encodeFilename(s
, True) 
 673 def decodeArgument(b
): 
 674     return decodeFilename(b
, True) 
 677 def decodeOption(optval
): 
 680     if isinstance(optval
, bytes): 
 681         optval 
= optval
.decode(preferredencoding()) 
 683     assert isinstance(optval
, compat_str
) 
 687 def formatSeconds(secs
): 
 689         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 691         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 696 def make_HTTPS_handler(params
, **kwargs
): 
 697     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 698     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 699         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 700         if opts_no_check_certificate
: 
 701             context
.check_hostname 
= False 
 702             context
.verify_mode 
= ssl
.CERT_NONE
 
 704             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 707             # (create_default_context present but HTTPSHandler has no context=) 
 710     if sys
.version_info 
< (3, 2): 
 711         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 713         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 714         context
.verify_mode 
= (ssl
.CERT_NONE
 
 715                                if opts_no_check_certificate
 
 716                                else ssl
.CERT_REQUIRED
) 
 717         context
.set_default_verify_paths() 
 718         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 721 def bug_reports_message(): 
 722     if ytdl_is_updateable(): 
 723         update_cmd 
= 'type  youtube-dl -U  to update' 
 725         update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 726     msg 
= '; please report this issue on https://yt-dl.org/bug .' 
 727     msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 728     msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 732 class YoutubeDLError(Exception): 
 733     """Base exception for YoutubeDL errors.""" 
 737 class ExtractorError(YoutubeDLError
): 
 738     """Error during info extraction.""" 
 740     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 741         """ tb, if given, is the original traceback (so that it can be printed out). 
 742         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 745         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 747         if video_id 
is not None: 
 748             msg 
= video_id 
+ ': ' + msg
 
 750             msg 
+= ' (caused by %r)' % cause
 
 752             msg 
+= bug_reports_message() 
 753         super(ExtractorError
, self
).__init
__(msg
) 
 756         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 758         self
.video_id 
= video_id
 
 760     def format_traceback(self
): 
 761         if self
.traceback 
is None: 
 763         return ''.join(traceback
.format_tb(self
.traceback
)) 
 766 class UnsupportedError(ExtractorError
): 
 767     def __init__(self
, url
): 
 768         super(UnsupportedError
, self
).__init
__( 
 769             'Unsupported URL: %s' % url
, expected
=True) 
 773 class RegexNotFoundError(ExtractorError
): 
 774     """Error when a regex didn't match""" 
 778 class GeoRestrictedError(ExtractorError
): 
 779     """Geographic restriction Error exception. 
 781     This exception may be thrown when a video is not available from your 
 782     geographic location due to geographic restrictions imposed by a website. 
 784     def __init__(self
, msg
, countries
=None): 
 785         super(GeoRestrictedError
, self
).__init
__(msg
, expected
=True) 
 787         self
.countries 
= countries
 
 790 class DownloadError(YoutubeDLError
): 
 791     """Download Error exception. 
 793     This exception may be thrown by FileDownloader objects if they are not 
 794     configured to continue on errors. They will contain the appropriate 
 798     def __init__(self
, msg
, exc_info
=None): 
 799         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 800         super(DownloadError
, self
).__init
__(msg
) 
 801         self
.exc_info 
= exc_info
 
 804 class SameFileError(YoutubeDLError
): 
 805     """Same File exception. 
 807     This exception will be thrown by FileDownloader objects if they detect 
 808     multiple files would have to be downloaded to the same file on disk. 
 813 class PostProcessingError(YoutubeDLError
): 
 814     """Post Processing exception. 
 816     This exception may be raised by PostProcessor's .run() method to 
 817     indicate an error in the postprocessing task. 
 820     def __init__(self
, msg
): 
 821         super(PostProcessingError
, self
).__init
__(msg
) 
 825 class MaxDownloadsReached(YoutubeDLError
): 
 826     """ --max-downloads limit has been reached. """ 
 830 class UnavailableVideoError(YoutubeDLError
): 
 831     """Unavailable Format exception. 
 833     This exception will be thrown when a video is requested 
 834     in a format that is not available for that video. 
 839 class ContentTooShortError(YoutubeDLError
): 
 840     """Content Too Short exception. 
 842     This exception may be raised by FileDownloader objects when a file they 
 843     download is too small for what the server announced first, indicating 
 844     the connection was probably interrupted. 
 847     def __init__(self
, downloaded
, expected
): 
 848         super(ContentTooShortError
, self
).__init
__( 
 849             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded
, expected
) 
 852         self
.downloaded 
= downloaded
 
 853         self
.expected 
= expected
 
 856 class XAttrMetadataError(YoutubeDLError
): 
 857     def __init__(self
, code
=None, msg
='Unknown error'): 
 858         super(XAttrMetadataError
, self
).__init
__(msg
) 
 862         # Parsing code and msg 
 863         if (self
.code 
in (errno
.ENOSPC
, errno
.EDQUOT
) or 
 864                 'No space left' in self
.msg 
or 'Disk quota excedded' in self
.msg
): 
 865             self
.reason 
= 'NO_SPACE' 
 866         elif self
.code 
== errno
.E2BIG 
or 'Argument list too long' in self
.msg
: 
 867             self
.reason 
= 'VALUE_TOO_LONG' 
 869             self
.reason 
= 'NOT_SUPPORTED' 
 872 class XAttrUnavailableError(YoutubeDLError
): 
 876 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 877     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 
 878     # expected HTTP responses to meet HTTP/1.0 or later (see also 
 879     # https://github.com/rg3/youtube-dl/issues/6727) 
 880     if sys
.version_info 
< (3, 0): 
 881         kwargs
['strict'] = True 
 882     hc 
= http_class(*args
, **compat_kwargs(kwargs
)) 
 883     source_address 
= ydl_handler
._params
.get('source_address') 
 884     if source_address 
is not None: 
 885         sa 
= (source_address
, 0) 
 886         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 887             hc
.source_address 
= sa
 
 889             def _hc_connect(self
, *args
, **kwargs
): 
 890                 sock 
= compat_socket_create_connection( 
 891                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 893                     self
.sock 
= ssl
.wrap_socket( 
 894                         sock
, self
.key_file
, self
.cert_file
, 
 895                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 898             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 903 def handle_youtubedl_headers(headers
): 
 904     filtered_headers 
= headers
 
 906     if 'Youtubedl-no-compression' in filtered_headers
: 
 907         filtered_headers 
= dict((k
, v
) for k
, v 
in filtered_headers
.items() if k
.lower() != 'accept-encoding') 
 908         del filtered_headers
['Youtubedl-no-compression'] 
 910     return filtered_headers
 
 913 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 914     """Handler for HTTP requests and responses. 
 916     This class, when installed with an OpenerDirector, automatically adds 
 917     the standard headers to every HTTP request and handles gzipped and 
 918     deflated responses from web servers. If compression is to be avoided in 
 919     a particular request, the original request in the program code only has 
 920     to include the HTTP header "Youtubedl-no-compression", which will be 
 921     removed before making the real request. 
 923     Part of this code was copied from: 
 925     http://techknack.net/python-urllib2-handlers/ 
 927     Andrew Rowls, the author of that code, agreed to release it to the 
 931     def __init__(self
, params
, *args
, **kwargs
): 
 932         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 933         self
._params 
= params
 
 935     def http_open(self
, req
): 
 936         conn_class 
= compat_http_client
.HTTPConnection
 
 938         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
 940             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
 941             del req
.headers
['Ytdl-socks-proxy'] 
 943         return self
.do_open(functools
.partial( 
 944             _create_http_connection
, self
, conn_class
, False), 
 950             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 952             return zlib
.decompress(data
) 
 954     def http_request(self
, req
): 
 955         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 
 956         # always respected by websites, some tend to give out URLs with non percent-encoded 
 957         # non-ASCII characters (see telemb.py, ard.py [#3412]) 
 958         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 
 959         # To work around aforementioned issue we will replace request's original URL with 
 960         # percent-encoded one 
 961         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 
 962         # the code of this workaround has been moved here from YoutubeDL.urlopen() 
 963         url 
= req
.get_full_url() 
 964         url_escaped 
= escape_url(url
) 
 966         # Substitute URL if any change after escaping 
 967         if url 
!= url_escaped
: 
 968             req 
= update_Request(req
, url
=url_escaped
) 
 970         for h
, v 
in std_headers
.items(): 
 971             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
 972             # The dict keys are capitalized because of this bug by urllib 
 973             if h
.capitalize() not in req
.headers
: 
 976         req
.headers 
= handle_youtubedl_headers(req
.headers
) 
 978         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 979             # Python 2.6 is brain-dead when it comes to fragments 
 980             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 981             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 985     def http_response(self
, req
, resp
): 
 988         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 989             content 
= resp
.read() 
 990             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 992                 uncompressed 
= io
.BytesIO(gz
.read()) 
 993             except IOError as original_ioerror
: 
 994                 # There may be junk add the end of the file 
 995                 # See http://stackoverflow.com/q/4928560/35070 for details 
 996                 for i 
in range(1, 1024): 
 998                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 999                         uncompressed 
= io
.BytesIO(gz
.read()) 
1004                     raise original_ioerror
 
1005             resp 
= compat_urllib_request
.addinfourl(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
1006             resp
.msg 
= old_resp
.msg
 
1007             del resp
.headers
['Content-encoding'] 
1009         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
1010             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
1011             resp 
= compat_urllib_request
.addinfourl(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
1012             resp
.msg 
= old_resp
.msg
 
1013             del resp
.headers
['Content-encoding'] 
1014         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 
1015         # https://github.com/rg3/youtube-dl/issues/6457). 
1016         if 300 <= resp
.code 
< 400: 
1017             location 
= resp
.headers
.get('Location') 
1019                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 
1020                 if sys
.version_info 
>= (3, 0): 
1021                     location 
= location
.encode('iso-8859-1').decode('utf-8') 
1023                     location 
= location
.decode('utf-8') 
1024                 location_escaped 
= escape_url(location
) 
1025                 if location 
!= location_escaped
: 
1026                     del resp
.headers
['Location'] 
1027                     if sys
.version_info 
< (3, 0): 
1028                         location_escaped 
= location_escaped
.encode('utf-8') 
1029                     resp
.headers
['Location'] = location_escaped
 
1032     https_request 
= http_request
 
1033     https_response 
= http_response
 
1036 def make_socks_conn_class(base_class
, socks_proxy
): 
1037     assert issubclass(base_class
, ( 
1038         compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
)) 
1040     url_components 
= compat_urlparse
.urlparse(socks_proxy
) 
1041     if url_components
.scheme
.lower() == 'socks5': 
1042         socks_type 
= ProxyType
.SOCKS5
 
1043     elif url_components
.scheme
.lower() in ('socks', 'socks4'): 
1044         socks_type 
= ProxyType
.SOCKS4
 
1045     elif url_components
.scheme
.lower() == 'socks4a': 
1046         socks_type 
= ProxyType
.SOCKS4A
 
1048     def unquote_if_non_empty(s
): 
1051         return compat_urllib_parse_unquote_plus(s
) 
1055         url_components
.hostname
, url_components
.port 
or 1080, 
1057         unquote_if_non_empty(url_components
.username
), 
1058         unquote_if_non_empty(url_components
.password
), 
1061     class SocksConnection(base_class
): 
1063             self
.sock 
= sockssocket() 
1064             self
.sock
.setproxy(*proxy_args
) 
1065             if type(self
.timeout
) in (int, float): 
1066                 self
.sock
.settimeout(self
.timeout
) 
1067             self
.sock
.connect((self
.host
, self
.port
)) 
1069             if isinstance(self
, compat_http_client
.HTTPSConnection
): 
1070                 if hasattr(self
, '_context'):  # Python > 2.6 
1071                     self
.sock 
= self
._context
.wrap_socket( 
1072                         self
.sock
, server_hostname
=self
.host
) 
1074                     self
.sock 
= ssl
.wrap_socket(self
.sock
) 
1076     return SocksConnection
 
1079 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
1080     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
1081         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
1082         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
1083         self
._params 
= params
 
1085     def https_open(self
, req
): 
1087         conn_class 
= self
._https
_conn
_class
 
1089         if hasattr(self
, '_context'):  # python > 2.6 
1090             kwargs
['context'] = self
._context
 
1091         if hasattr(self
, '_check_hostname'):  # python 3.x 
1092             kwargs
['check_hostname'] = self
._check
_hostname
 
1094         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
1096             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
1097             del req
.headers
['Ytdl-socks-proxy'] 
1099         return self
.do_open(functools
.partial( 
1100             _create_http_connection
, self
, conn_class
, True), 
1104 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
): 
1105     def __init__(self
, cookiejar
=None): 
1106         compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
) 
1108     def http_response(self
, request
, response
): 
1109         # Python 2 will choke on next HTTP request in row if there are non-ASCII 
1110         # characters in Set-Cookie HTTP header of last response (see 
1111         # https://github.com/rg3/youtube-dl/issues/6769). 
1112         # In order to at least prevent crashing we will percent encode Set-Cookie 
1113         # header before HTTPCookieProcessor starts processing it. 
1114         # if sys.version_info < (3, 0) and response.headers: 
1115         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 
1116         #         set_cookie = response.headers.get(set_cookie_header) 
1118         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 
1119         #             if set_cookie != set_cookie_escaped: 
1120         #                 del response.headers[set_cookie_header] 
1121         #                 response.headers[set_cookie_header] = set_cookie_escaped 
1122         return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
) 
1124     https_request 
= compat_urllib_request
.HTTPCookieProcessor
.http_request
 
1125     https_response 
= http_response
 
1128 def extract_timezone(date_str
): 
1130         r
'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
1133         timezone 
= datetime
.timedelta() 
1135         date_str 
= date_str
[:-len(m
.group('tz'))] 
1136         if not m
.group('sign'): 
1137             timezone 
= datetime
.timedelta() 
1139             sign 
= 1 if m
.group('sign') == '+' else -1 
1140             timezone 
= datetime
.timedelta( 
1141                 hours
=sign 
* int(m
.group('hours')), 
1142                 minutes
=sign 
* int(m
.group('minutes'))) 
1143     return timezone
, date_str
 
1146 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
1147     """ Return a UNIX timestamp from the given date """ 
1149     if date_str 
is None: 
1152     date_str 
= re
.sub(r
'\.[0-9]+', '', date_str
) 
1154     if timezone 
is None: 
1155         timezone
, date_str 
= extract_timezone(date_str
) 
1158         date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
1159         dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
1160         return calendar
.timegm(dt
.timetuple()) 
1165 def date_formats(day_first
=True): 
1166     return DATE_FORMATS_DAY_FIRST 
if day_first 
else DATE_FORMATS_MONTH_FIRST
 
1169 def unified_strdate(date_str
, day_first
=True): 
1170     """Return a string with the date in the format YYYYMMDD""" 
1172     if date_str 
is None: 
1176     date_str 
= date_str
.replace(',', ' ') 
1177     # Remove AM/PM + timezone 
1178     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1179     _
, date_str 
= extract_timezone(date_str
) 
1181     for expression 
in date_formats(day_first
): 
1183             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
1186     if upload_date 
is None: 
1187         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1190                 upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
1193     if upload_date 
is not None: 
1194         return compat_str(upload_date
) 
1197 def unified_timestamp(date_str
, day_first
=True): 
1198     if date_str 
is None: 
1201     date_str 
= re
.sub(r
'[,|]', '', date_str
) 
1203     pm_delta 
= 12 if re
.search(r
'(?i)PM', date_str
) else 0 
1204     timezone
, date_str 
= extract_timezone(date_str
) 
1206     # Remove AM/PM + timezone 
1207     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1209     # Remove unrecognized timezones from ISO 8601 alike timestamps 
1210     m 
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
) 
1212         date_str 
= date_str
[:-len(m
.group('tz'))] 
1214     # Python only supports microseconds, so remove nanoseconds 
1215     m 
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
) 
1217         date_str 
= m
.group(1) 
1219     for expression 
in date_formats(day_first
): 
1221             dt 
= datetime
.datetime
.strptime(date_str
, expression
) - timezone 
+ datetime
.timedelta(hours
=pm_delta
) 
1222             return calendar
.timegm(dt
.timetuple()) 
1225     timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1227         return calendar
.timegm(timetuple
) + pm_delta 
* 3600 
1230 def determine_ext(url
, default_ext
='unknown_video'): 
1233     guess 
= url
.partition('?')[0].rpartition('.')[2] 
1234     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
1236     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 
1237     elif guess
.rstrip('/') in KNOWN_EXTENSIONS
: 
1238         return guess
.rstrip('/') 
1243 def subtitles_filename(filename
, sub_lang
, sub_format
): 
1244     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
1247 def date_from_str(date_str
): 
1249     Return a datetime object from a string in the format YYYYMMDD or 
1250     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
1251     today 
= datetime
.date
.today() 
1252     if date_str 
in ('now', 'today'): 
1254     if date_str 
== 'yesterday': 
1255         return today 
- datetime
.timedelta(days
=1) 
1256     match 
= re
.match(r
'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
1257     if match 
is not None: 
1258         sign 
= match
.group('sign') 
1259         time 
= int(match
.group('time')) 
1262         unit 
= match
.group('unit') 
1263         # A bad approximation? 
1267         elif unit 
== 'year': 
1271         delta 
= datetime
.timedelta(**{unit
: time
}) 
1272         return today 
+ delta
 
1273     return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date() 
1276 def hyphenate_date(date_str
): 
1278     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
1279     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
1280     if match 
is not None: 
1281         return '-'.join(match
.groups()) 
1286 class DateRange(object): 
1287     """Represents a time interval between two dates""" 
1289     def __init__(self
, start
=None, end
=None): 
1290         """start and end must be strings in the format accepted by date""" 
1291         if start 
is not None: 
1292             self
.start 
= date_from_str(start
) 
1294             self
.start 
= datetime
.datetime
.min.date() 
1296             self
.end 
= date_from_str(end
) 
1298             self
.end 
= datetime
.datetime
.max.date() 
1299         if self
.start 
> self
.end
: 
1300             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
1304         """Returns a range that only contains the given day""" 
1305         return cls(day
, day
) 
1307     def __contains__(self
, date
): 
1308         """Check if the date is in the range""" 
1309         if not isinstance(date
, datetime
.date
): 
1310             date 
= date_from_str(date
) 
1311         return self
.start 
<= date 
<= self
.end
 
1314         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
1317 def platform_name(): 
1318     """ Returns the platform name as a compat_str """ 
1319     res 
= platform
.platform() 
1320     if isinstance(res
, bytes): 
1321         res 
= res
.decode(preferredencoding()) 
1323     assert isinstance(res
, compat_str
) 
1327 def _windows_write_string(s
, out
): 
1328     """ Returns True if the string was written using special methods, 
1329     False if it has yet to be written out.""" 
1330     # Adapted from http://stackoverflow.com/a/3259271/35070 
1333     import ctypes
.wintypes
 
1341         fileno 
= out
.fileno() 
1342     except AttributeError: 
1343         # If the output stream doesn't have a fileno, it's virtual 
1345     except io
.UnsupportedOperation
: 
1346         # Some strange Windows pseudo files? 
1348     if fileno 
not in WIN_OUTPUT_IDS
: 
1351     GetStdHandle 
= compat_ctypes_WINFUNCTYPE( 
1352         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
1353         ('GetStdHandle', ctypes
.windll
.kernel32
)) 
1354     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
1356     WriteConsoleW 
= compat_ctypes_WINFUNCTYPE( 
1357         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
1358         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
1359         ctypes
.wintypes
.LPVOID
)(('WriteConsoleW', ctypes
.windll
.kernel32
)) 
1360     written 
= ctypes
.wintypes
.DWORD(0) 
1362     GetFileType 
= compat_ctypes_WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)(('GetFileType', ctypes
.windll
.kernel32
)) 
1363     FILE_TYPE_CHAR 
= 0x0002 
1364     FILE_TYPE_REMOTE 
= 0x8000 
1365     GetConsoleMode 
= compat_ctypes_WINFUNCTYPE( 
1366         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
1367         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
1368         ('GetConsoleMode', ctypes
.windll
.kernel32
)) 
1369     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
1371     def not_a_console(handle
): 
1372         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
1374         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR 
or 
1375                 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
1377     if not_a_console(h
): 
1380     def next_nonbmp_pos(s
): 
1382             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
1383         except StopIteration: 
1387         count 
= min(next_nonbmp_pos(s
), 1024) 
1389         ret 
= WriteConsoleW( 
1390             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
1392             raise OSError('Failed to write string') 
1393         if not count
:  # We just wrote a non-BMP character 
1394             assert written
.value 
== 2 
1397             assert written
.value 
> 0 
1398             s 
= s
[written
.value
:] 
1402 def write_string(s
, out
=None, encoding
=None): 
1405     assert type(s
) == compat_str
 
1407     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
1408         if _windows_write_string(s
, out
): 
1411     if ('b' in getattr(out
, 'mode', '') or 
1412             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
1413         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
1415     elif hasattr(out
, 'buffer'): 
1416         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
1417         byt 
= s
.encode(enc
, 'ignore') 
1418         out
.buffer.write(byt
) 
1424 def bytes_to_intlist(bs
): 
1427     if isinstance(bs
[0], int):  # Python 3 
1430         return [ord(c
) for c 
in bs
] 
1433 def intlist_to_bytes(xs
): 
1436     return compat_struct_pack('%dB' % len(xs
), *xs
) 
1439 # Cross-platform file locking 
1440 if sys
.platform 
== 'win32': 
1441     import ctypes
.wintypes
 
1444     class OVERLAPPED(ctypes
.Structure
): 
1446             ('Internal', ctypes
.wintypes
.LPVOID
), 
1447             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
1448             ('Offset', ctypes
.wintypes
.DWORD
), 
1449             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
1450             ('hEvent', ctypes
.wintypes
.HANDLE
), 
1453     kernel32 
= ctypes
.windll
.kernel32
 
1454     LockFileEx 
= kernel32
.LockFileEx
 
1455     LockFileEx
.argtypes 
= [ 
1456         ctypes
.wintypes
.HANDLE
,     # hFile 
1457         ctypes
.wintypes
.DWORD
,      # dwFlags 
1458         ctypes
.wintypes
.DWORD
,      # dwReserved 
1459         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1460         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1461         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1463     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1464     UnlockFileEx 
= kernel32
.UnlockFileEx
 
1465     UnlockFileEx
.argtypes 
= [ 
1466         ctypes
.wintypes
.HANDLE
,     # hFile 
1467         ctypes
.wintypes
.DWORD
,      # dwReserved 
1468         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1469         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1470         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1472     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1473     whole_low 
= 0xffffffff 
1474     whole_high 
= 0x7fffffff 
1476     def _lock_file(f
, exclusive
): 
1477         overlapped 
= OVERLAPPED() 
1478         overlapped
.Offset 
= 0 
1479         overlapped
.OffsetHigh 
= 0 
1480         overlapped
.hEvent 
= 0 
1481         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1482         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1483         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1484                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1485             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1487     def _unlock_file(f
): 
1488         assert f
._lock
_file
_overlapped
_p
 
1489         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1490         if not UnlockFileEx(handle
, 0, 
1491                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1492             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1495     # Some platforms, such as Jython, is missing fcntl 
1499         def _lock_file(f
, exclusive
): 
1500             fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1502         def _unlock_file(f
): 
1503             fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1505         UNSUPPORTED_MSG 
= 'file locking is not supported on this platform' 
1507         def _lock_file(f
, exclusive
): 
1508             raise IOError(UNSUPPORTED_MSG
) 
1510         def _unlock_file(f
): 
1511             raise IOError(UNSUPPORTED_MSG
) 
1514 class locked_file(object): 
1515     def __init__(self
, filename
, mode
, encoding
=None): 
1516         assert mode 
in ['r', 'a', 'w'] 
1517         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1520     def __enter__(self
): 
1521         exclusive 
= self
.mode 
!= 'r' 
1523             _lock_file(self
.f
, exclusive
) 
1529     def __exit__(self
, etype
, value
, traceback
): 
1531             _unlock_file(self
.f
) 
1538     def write(self
, *args
): 
1539         return self
.f
.write(*args
) 
1541     def read(self
, *args
): 
1542         return self
.f
.read(*args
) 
1545 def get_filesystem_encoding(): 
1546     encoding 
= sys
.getfilesystemencoding() 
1547     return encoding 
if encoding 
is not None else 'utf-8' 
1550 def shell_quote(args
): 
1552     encoding 
= get_filesystem_encoding() 
1554         if isinstance(a
, bytes): 
1555             # We may get a filename encoded with 'encodeFilename' 
1556             a 
= a
.decode(encoding
) 
1557         quoted_args
.append(compat_shlex_quote(a
)) 
1558     return ' '.join(quoted_args
) 
1561 def smuggle_url(url
, data
): 
1562     """ Pass additional data in a URL for internal use. """ 
1564     url
, idata 
= unsmuggle_url(url
, {}) 
1566     sdata 
= compat_urllib_parse_urlencode( 
1567         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1568     return url 
+ '#' + sdata
 
1571 def unsmuggle_url(smug_url
, default
=None): 
1572     if '#__youtubedl_smuggle' not in smug_url
: 
1573         return smug_url
, default
 
1574     url
, _
, sdata 
= smug_url
.rpartition('#') 
1575     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1576     data 
= json
.loads(jsond
) 
1580 def format_bytes(bytes): 
1583     if type(bytes) is str: 
1584         bytes = float(bytes) 
1588         exponent 
= int(math
.log(bytes, 1024.0)) 
1589     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1590     converted 
= float(bytes) / float(1024 ** exponent
) 
1591     return '%.2f%s' % (converted
, suffix
) 
1594 def lookup_unit_table(unit_table
, s
): 
1595     units_re 
= '|'.join(re
.escape(u
) for u 
in unit_table
) 
1597         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
) 
1600     num_str 
= m
.group('num').replace(',', '.') 
1601     mult 
= unit_table
[m
.group('unit')] 
1602     return int(float(num_str
) * mult
) 
1605 def parse_filesize(s
): 
1609     # The lower-case forms are of course incorrect and unofficial, 
1610     # but we support those too 
1627         'megabytes': 1000 ** 2, 
1628         'mebibytes': 1024 ** 2, 
1634         'gigabytes': 1000 ** 3, 
1635         'gibibytes': 1024 ** 3, 
1641         'terabytes': 1000 ** 4, 
1642         'tebibytes': 1024 ** 4, 
1648         'petabytes': 1000 ** 5, 
1649         'pebibytes': 1024 ** 5, 
1655         'exabytes': 1000 ** 6, 
1656         'exbibytes': 1024 ** 6, 
1662         'zettabytes': 1000 ** 7, 
1663         'zebibytes': 1024 ** 7, 
1669         'yottabytes': 1000 ** 8, 
1670         'yobibytes': 1024 ** 8, 
1673     return lookup_unit_table(_UNIT_TABLE
, s
) 
1682     if re
.match(r
'^[\d,.]+$', s
): 
1683         return str_to_int(s
) 
1694     return lookup_unit_table(_UNIT_TABLE
, s
) 
1697 def parse_resolution(s
): 
1701     mobj 
= re
.search(r
'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s
) 
1704             'width': int(mobj
.group('w')), 
1705             'height': int(mobj
.group('h')), 
1708     mobj 
= re
.search(r
'\b(\d+)[pPiI]\b', s
) 
1710         return {'height': int(mobj
.group(1))} 
1712     mobj 
= re
.search(r
'\b([48])[kK]\b', s
) 
1714         return {'height': int(mobj
.group(1)) * 540} 
1719 def month_by_name(name
, lang
='en'): 
1720     """ Return the number of a month by (locale-independently) English name """ 
1722     month_names 
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en']) 
1725         return month_names
.index(name
) + 1 
1730 def month_by_abbreviation(abbrev
): 
1731     """ Return the number of a month by (locale-independently) English 
1735         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
1740 def fix_xml_ampersands(xml_str
): 
1741     """Replace all the '&' by '&' in XML""" 
1743         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1748 def setproctitle(title
): 
1749     assert isinstance(title
, compat_str
) 
1751     # ctypes in Jython is not complete 
1752     # http://bugs.jython.org/issue2148 
1753     if sys
.platform
.startswith('java'): 
1757         libc 
= ctypes
.cdll
.LoadLibrary('libc.so.6') 
1761         # LoadLibrary in Windows Python 2.7.13 only expects 
1762         # a bytestring, but since unicode_literals turns 
1763         # every string into a unicode string, it fails. 
1765     title_bytes 
= title
.encode('utf-8') 
1766     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1767     buf
.value 
= title_bytes
 
1769         libc
.prctl(15, buf
, 0, 0, 0) 
1770     except AttributeError: 
1771         return  # Strange libc, just skip this 
1774 def remove_start(s
, start
): 
1775     return s
[len(start
):] if s 
is not None and s
.startswith(start
) else s
 
1778 def remove_end(s
, end
): 
1779     return s
[:-len(end
)] if s 
is not None and s
.endswith(end
) else s
 
1782 def remove_quotes(s
): 
1783     if s 
is None or len(s
) < 2: 
1785     for quote 
in ('"', "'", ): 
1786         if s
[0] == quote 
and s
[-1] == quote
: 
1791 def url_basename(url
): 
1792     path 
= compat_urlparse
.urlparse(url
).path
 
1793     return path
.strip('/').split('/')[-1] 
1797     return re
.match(r
'https?://[^?#&]+/', url
).group() 
1800 def urljoin(base
, path
): 
1801     if isinstance(path
, bytes): 
1802         path 
= path
.decode('utf-8') 
1803     if not isinstance(path
, compat_str
) or not path
: 
1805     if re
.match(r
'^(?:https?:)?//', path
): 
1807     if isinstance(base
, bytes): 
1808         base 
= base
.decode('utf-8') 
1809     if not isinstance(base
, compat_str
) or not re
.match( 
1810             r
'^(?:https?:)?//', base
): 
1812     return compat_urlparse
.urljoin(base
, path
) 
1815 class HEADRequest(compat_urllib_request
.Request
): 
1816     def get_method(self
): 
1820 class PUTRequest(compat_urllib_request
.Request
): 
1821     def get_method(self
): 
1825 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1828             v 
= getattr(v
, get_attr
, None) 
1834         return int(v
) * invscale 
// scale
 
1839 def str_or_none(v
, default
=None): 
1840     return default 
if v 
is None else compat_str(v
) 
1843 def str_to_int(int_str
): 
1844     """ A more relaxed version of int_or_none """ 
1847     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1851 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1855         return float(v
) * invscale 
/ scale
 
1860 def bool_or_none(v
, default
=None): 
1861     return v 
if isinstance(v
, bool) else default
 
1864 def strip_or_none(v
): 
1865     return None if v 
is None else v
.strip() 
1868 def parse_duration(s
): 
1869     if not isinstance(s
, compat_basestring
): 
1874     days
, hours
, mins
, secs
, ms 
= [None] * 5 
1875     m 
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s
) 
1877         days
, hours
, mins
, secs
, ms 
= m
.groups() 
1882                     [0-9]+\s*y(?:ears?)?\s* 
1885                     [0-9]+\s*m(?:onths?)?\s* 
1888                     [0-9]+\s*w(?:eeks?)?\s* 
1891                     (?P<days>[0-9]+)\s*d(?:ays?)?\s* 
1895                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 
1898                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 
1901                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 
1904             days
, hours
, mins
, secs
, ms 
= m
.groups() 
1906             m 
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
) 
1908                 hours
, mins 
= m
.groups() 
1914         duration 
+= float(secs
) 
1916         duration 
+= float(mins
) * 60 
1918         duration 
+= float(hours
) * 60 * 60 
1920         duration 
+= float(days
) * 24 * 60 * 60 
1922         duration 
+= float(ms
) 
1926 def prepend_extension(filename
, ext
, expected_real_ext
=None): 
1927     name
, real_ext 
= os
.path
.splitext(filename
) 
1929         '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1930         if not expected_real_ext 
or real_ext
[1:] == expected_real_ext
 
1931         else '{0}.{1}'.format(filename
, ext
)) 
1934 def replace_extension(filename
, ext
, expected_real_ext
=None): 
1935     name
, real_ext 
= os
.path
.splitext(filename
) 
1936     return '{0}.{1}'.format( 
1937         name 
if not expected_real_ext 
or real_ext
[1:] == expected_real_ext 
else filename
, 
1941 def check_executable(exe
, args
=[]): 
1942     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1943     args can be a list of arguments for a short output (like -version) """ 
1945         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1951 def get_exe_version(exe
, args
=['--version'], 
1952                     version_re
=None, unrecognized
='present'): 
1953     """ Returns the version of the specified executable, 
1954     or False if the executable is not present """ 
1956         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers 
1957         # SIGTTOU if youtube-dl is run in the background. 
1958         # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656 
1959         out
, _ 
= subprocess
.Popen( 
1960             [encodeArgument(exe
)] + args
, 
1961             stdin
=subprocess
.PIPE
, 
1962             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1965     if isinstance(out
, bytes):  # Python 2.x 
1966         out 
= out
.decode('ascii', 'ignore') 
1967     return detect_exe_version(out
, version_re
, unrecognized
) 
1970 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
1971     assert isinstance(output
, compat_str
) 
1972     if version_re 
is None: 
1973         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
1974     m 
= re
.search(version_re
, output
) 
1981 class PagedList(object): 
1983         # This is only useful for tests 
1984         return len(self
.getslice()) 
1987 class OnDemandPagedList(PagedList
): 
1988     def __init__(self
, pagefunc
, pagesize
, use_cache
=True): 
1989         self
._pagefunc 
= pagefunc
 
1990         self
._pagesize 
= pagesize
 
1991         self
._use
_cache 
= use_cache
 
1995     def getslice(self
, start
=0, end
=None): 
1997         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1998             firstid 
= pagenum 
* self
._pagesize
 
1999             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
2000             if start 
>= nextfirstid
: 
2005                 page_results 
= self
._cache
.get(pagenum
) 
2006             if page_results 
is None: 
2007                 page_results 
= list(self
._pagefunc
(pagenum
)) 
2009                 self
._cache
[pagenum
] = page_results
 
2012                 start 
% self
._pagesize
 
2013                 if firstid 
<= start 
< nextfirstid
 
2017                 ((end 
- 1) % self
._pagesize
) + 1 
2018                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
2021             if startv 
!= 0 or endv 
is not None: 
2022                 page_results 
= page_results
[startv
:endv
] 
2023             res
.extend(page_results
) 
2025             # A little optimization - if current page is not "full", ie. does 
2026             # not contain page_size videos then we can assume that this page 
2027             # is the last one - there are no more ids on further pages - 
2028             # i.e. no need to query again. 
2029             if len(page_results
) + startv 
< self
._pagesize
: 
2032             # If we got the whole page, but the next page is not interesting, 
2033             # break out early as well 
2034             if end 
== nextfirstid
: 
2039 class InAdvancePagedList(PagedList
): 
2040     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
2041         self
._pagefunc 
= pagefunc
 
2042         self
._pagecount 
= pagecount
 
2043         self
._pagesize 
= pagesize
 
2045     def getslice(self
, start
=0, end
=None): 
2047         start_page 
= start 
// self
._pagesize
 
2049             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
2050         skip_elems 
= start 
- start_page 
* self
._pagesize
 
2051         only_more 
= None if end 
is None else end 
- start
 
2052         for pagenum 
in range(start_page
, end_page
): 
2053             page 
= list(self
._pagefunc
(pagenum
)) 
2055                 page 
= page
[skip_elems
:] 
2057             if only_more 
is not None: 
2058                 if len(page
) < only_more
: 
2059                     only_more 
-= len(page
) 
2061                     page 
= page
[:only_more
] 
2068 def uppercase_escape(s
): 
2069     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2071         r
'\\U[0-9a-fA-F]{8}', 
2072         lambda m
: unicode_escape(m
.group(0))[0], 
2076 def lowercase_escape(s
): 
2077     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2079         r
'\\u[0-9a-fA-F]{4}', 
2080         lambda m
: unicode_escape(m
.group(0))[0], 
2084 def escape_rfc3986(s
): 
2085     """Escape non-ASCII characters as suggested by RFC 3986""" 
2086     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
2087         s 
= s
.encode('utf-8') 
2088     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
2091 def escape_url(url
): 
2092     """Escape URL as suggested by RFC 3986""" 
2093     url_parsed 
= compat_urllib_parse_urlparse(url
) 
2094     return url_parsed
._replace
( 
2095         netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'), 
2096         path
=escape_rfc3986(url_parsed
.path
), 
2097         params
=escape_rfc3986(url_parsed
.params
), 
2098         query
=escape_rfc3986(url_parsed
.query
), 
2099         fragment
=escape_rfc3986(url_parsed
.fragment
) 
2103 def read_batch_urls(batch_fd
): 
2105         if not isinstance(url
, compat_str
): 
2106             url 
= url
.decode('utf-8', 'replace') 
2107         BOM_UTF8 
= '\xef\xbb\xbf' 
2108         if url
.startswith(BOM_UTF8
): 
2109             url 
= url
[len(BOM_UTF8
):] 
2111         if url
.startswith(('#', ';', ']')): 
2115     with contextlib
.closing(batch_fd
) as fd
: 
2116         return [url 
for url 
in map(fixup
, fd
) if url
] 
2119 def urlencode_postdata(*args
, **kargs
): 
2120     return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii') 
2123 def update_url_query(url
, query
): 
2126     parsed_url 
= compat_urlparse
.urlparse(url
) 
2127     qs 
= compat_parse_qs(parsed_url
.query
) 
2129     return compat_urlparse
.urlunparse(parsed_url
._replace
( 
2130         query
=compat_urllib_parse_urlencode(qs
, True))) 
2133 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}): 
2134     req_headers 
= req
.headers
.copy() 
2135     req_headers
.update(headers
) 
2136     req_data 
= data 
or req
.data
 
2137     req_url 
= update_url_query(url 
or req
.get_full_url(), query
) 
2138     req_get_method 
= req
.get_method() 
2139     if req_get_method 
== 'HEAD': 
2140         req_type 
= HEADRequest
 
2141     elif req_get_method 
== 'PUT': 
2142         req_type 
= PUTRequest
 
2144         req_type 
= compat_urllib_request
.Request
 
2146         req_url
, data
=req_data
, headers
=req_headers
, 
2147         origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
) 
2148     if hasattr(req
, 'timeout'): 
2149         new_req
.timeout 
= req
.timeout
 
2153 def _multipart_encode_impl(data
, boundary
): 
2154     content_type 
= 'multipart/form-data; boundary=%s' % boundary
 
2157     for k
, v 
in data
.items(): 
2158         out 
+= b
'--' + boundary
.encode('ascii') + b
'\r\n' 
2159         if isinstance(k
, compat_str
): 
2160             k 
= k
.encode('utf-8') 
2161         if isinstance(v
, compat_str
): 
2162             v 
= v
.encode('utf-8') 
2163         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 
2164         # suggests sending UTF-8 directly. Firefox sends UTF-8, too 
2165         content 
= b
'Content-Disposition: form-data; name="' + k 
+ b
'"\r\n\r\n' + v 
+ b
'\r\n' 
2166         if boundary
.encode('ascii') in content
: 
2167             raise ValueError('Boundary overlaps with data') 
2170     out 
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n' 
2172     return out
, content_type
 
2175 def multipart_encode(data
, boundary
=None): 
2177     Encode a dict to RFC 7578-compliant form-data 
2180         A dict where keys and values can be either Unicode or bytes-like 
2183         If specified a Unicode object, it's used as the boundary. Otherwise 
2184         a random boundary is generated. 
2186     Reference: https://tools.ietf.org/html/rfc7578 
2188     has_specified_boundary 
= boundary 
is not None 
2191         if boundary 
is None: 
2192             boundary 
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff)) 
2195             out
, content_type 
= _multipart_encode_impl(data
, boundary
) 
2198             if has_specified_boundary
: 
2202     return out
, content_type
 
2205 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True): 
2206     if isinstance(key_or_keys
, (list, tuple)): 
2207         for key 
in key_or_keys
: 
2208             if key 
not in d 
or d
[key
] is None or skip_false_values 
and not d
[key
]: 
2212     return d
.get(key_or_keys
, default
) 
2215 def try_get(src
, getter
, expected_type
=None): 
2216     if not isinstance(getter
, (list, tuple)): 
2221         except (AttributeError, KeyError, TypeError, IndexError): 
2224             if expected_type 
is None or isinstance(v
, expected_type
): 
2228 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'): 
2229     return string 
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
) 
2241 TV_PARENTAL_GUIDELINES 
= { 
2251 def parse_age_limit(s
): 
2253         return s 
if 0 <= s 
<= 21 else None 
2254     if not isinstance(s
, compat_basestring
): 
2256     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
2258         return int(m
.group('age')) 
2260         return US_RATINGS
[s
] 
2261     return TV_PARENTAL_GUIDELINES
.get(s
) 
2264 def strip_jsonp(code
): 
2267             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+) 
2268             (?:\s*&&\s*(?P=func_name))? 
2269             \s*\(\s*(?P<callback_data>.*)\);? 
2270             \s*?(?://[^\n]*)*$''', 
2271         r
'\g<callback_data>', code
) 
2274 def js_to_json(code
): 
2275     COMMENT_RE 
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*' 
2276     SKIP_RE 
= r
'\s*(?:{comment})?\s*'.format(comment
=COMMENT_RE
) 
2278         (r
'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip
=SKIP_RE
), 16), 
2279         (r
'(?s)^(0+[0-7]+){skip}:?$'.format(skip
=SKIP_RE
), 8), 
2284         if v 
in ('true', 'false', 'null'): 
2286         elif v
.startswith('/*') or v
.startswith('//') or v 
== ',': 
2289         if v
[0] in ("'", '"'): 
2290             v 
= re
.sub(r
'(?s)\\.|"', lambda m
: { 
2295             }.get(m
.group(0), m
.group(0)), v
[1:-1]) 
2297         for regex
, base 
in INTEGER_TABLE
: 
2298             im 
= re
.match(regex
, v
) 
2300                 i 
= int(im
.group(1), base
) 
2301                 return '"%d":' % i 
if v
.endswith(':') else '%d' % i
 
2305     return re
.sub(r
'''(?sx) 
2306         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 
2307         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 
2308         {comment}|,(?={skip}[\]}}])| 
2309         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*| 
2310         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| 
2312         '''.format(comment
=COMMENT_RE
, skip
=SKIP_RE
), fix_kv
, code
) 
2315 def qualities(quality_ids
): 
2316     """ Get a numeric quality value out of a list of possible values """ 
2319             return quality_ids
.index(qid
) 
2325 DEFAULT_OUTTMPL 
= '%(title)s-%(id)s.%(ext)s' 
2328 def limit_length(s
, length
): 
2329     """ Add ellipses to overly long strings """ 
2334         return s
[:length 
- len(ELLIPSES
)] + ELLIPSES
 
2338 def version_tuple(v
): 
2339     return tuple(int(e
) for e 
in re
.split(r
'[-.]', v
)) 
2342 def is_outdated_version(version
, limit
, assume_new
=True): 
2344         return not assume_new
 
2346         return version_tuple(version
) < version_tuple(limit
) 
2348         return not assume_new
 
2351 def ytdl_is_updateable(): 
2352     """ Returns if youtube-dl can be updated with -U """ 
2353     from zipimport 
import zipimporter
 
2355     return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen') 
2358 def args_to_str(args
): 
2359     # Get a short string representation for a subprocess command 
2360     return ' '.join(compat_shlex_quote(a
) for a 
in args
) 
2363 def error_to_compat_str(err
): 
2365     # On python 2 error byte string must be decoded with proper 
2366     # encoding rather than ascii 
2367     if sys
.version_info
[0] < 3: 
2368         err_str 
= err_str
.decode(preferredencoding()) 
2372 def mimetype2ext(mt
): 
2378         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 
2379         # it's the most popular one 
2380         'audio/mpeg': 'mp3', 
2385     _
, _
, res 
= mt
.rpartition('/') 
2386     res 
= res
.split(';')[0].strip().lower() 
2390         'smptett+xml': 'tt', 
2394         'x-mp4-fragmented': 'mp4', 
2395         'x-ms-sami': 'sami', 
2398         'x-mpegurl': 'm3u8', 
2399         'vnd.apple.mpegurl': 'm3u8', 
2403         'vnd.ms-sstr+xml': 'ism', 
2409 def parse_codecs(codecs_str
): 
2410     # http://tools.ietf.org/html/rfc6381 
2413     splited_codecs 
= list(filter(None, map( 
2414         lambda str: str.strip(), codecs_str
.strip().strip(',').split(',')))) 
2415     vcodec
, acodec 
= None, None 
2416     for full_codec 
in splited_codecs
: 
2417         codec 
= full_codec
.split('.')[0] 
2418         if codec 
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'): 
2421         elif codec 
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): 
2425             write_string('WARNING: Unknown codec %s\n' % full_codec
, sys
.stderr
) 
2426     if not vcodec 
and not acodec
: 
2427         if len(splited_codecs
) == 2: 
2432         elif len(splited_codecs
) == 1: 
2439             'vcodec': vcodec 
or 'none', 
2440             'acodec': acodec 
or 'none', 
2445 def urlhandle_detect_ext(url_handle
): 
2446     getheader 
= url_handle
.headers
.get
 
2448     cd 
= getheader('Content-Disposition') 
2450         m 
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
) 
2452             e 
= determine_ext(m
.group('filename'), default_ext
=None) 
2456     return mimetype2ext(getheader('Content-Type')) 
2459 def encode_data_uri(data
, mime_type
): 
2460     return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii')) 
2463 def age_restricted(content_limit
, age_limit
): 
2464     """ Returns True iff the content should be blocked """ 
2466     if age_limit 
is None:  # No limit set 
2468     if content_limit 
is None: 
2469         return False  # Content available for everyone 
2470     return age_limit 
< content_limit
 
2473 def is_html(first_bytes
): 
2474     """ Detect whether a file contains HTML by examining its first bytes. """ 
2477         (b
'\xef\xbb\xbf', 'utf-8'), 
2478         (b
'\x00\x00\xfe\xff', 'utf-32-be'), 
2479         (b
'\xff\xfe\x00\x00', 'utf-32-le'), 
2480         (b
'\xff\xfe', 'utf-16-le'), 
2481         (b
'\xfe\xff', 'utf-16-be'), 
2483     for bom
, enc 
in BOMS
: 
2484         if first_bytes
.startswith(bom
): 
2485             s 
= first_bytes
[len(bom
):].decode(enc
, 'replace') 
2488         s 
= first_bytes
.decode('utf-8', 'replace') 
2490     return re
.match(r
'^\s*<', s
) 
2493 def determine_protocol(info_dict
): 
2494     protocol 
= info_dict
.get('protocol') 
2495     if protocol 
is not None: 
2498     url 
= info_dict
['url'] 
2499     if url
.startswith('rtmp'): 
2501     elif url
.startswith('mms'): 
2503     elif url
.startswith('rtsp'): 
2506     ext 
= determine_ext(url
) 
2512     return compat_urllib_parse_urlparse(url
).scheme
 
2515 def render_table(header_row
, data
): 
2516     """ Render a list of rows, each as a list of values """ 
2517     table 
= [header_row
] + data
 
2518     max_lens 
= [max(len(compat_str(v
)) for v 
in col
) for col 
in zip(*table
)] 
2519     format_str 
= ' '.join('%-' + compat_str(ml 
+ 1) + 's' for ml 
in max_lens
[:-1]) + '%s' 
2520     return '\n'.join(format_str 
% tuple(row
) for row 
in table
) 
2523 def _match_one(filter_part
, dct
): 
2524     COMPARISON_OPERATORS 
= { 
2532     operator_rex 
= re
.compile(r
'''(?x)\s* 
2534         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
2536             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
2537             (?P<quote>["\'])(?P
<quotedstrval
>(?
:\\.|
(?
!(?P
=quote
)|
\\).)+?
)(?P
=quote
)|
 
2538             (?P
<strval
>(?
![0-9.])[a
-z0
-9A
-Z
]*) 
2541         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) 
2542     m = operator_rex.search(filter_part) 
2544         op = COMPARISON_OPERATORS[m.group('op')] 
2545         actual_value = dct.get(m.group('key')) 
2546         if (m.group('quotedstrval') is not None or 
2547             m.group('strval') is not None or 
2548             # If the original field is a string and matching comparisonvalue is 
2549             # a number we should respect the origin of the original field 
2550             # and process comparison value as a string (see 
2551             # https://github.com/rg3/youtube-dl/issues/11082). 
2552             actual_value is not None and m.group('intval') is not None and 
2553                 isinstance(actual_value, compat_str)): 
2554             if m.group('op') not in ('=', '!='): 
2556                     'Operator %s does not support string values!' % m.group('op')) 
2557             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') 
2558             quote = m.group('quote') 
2559             if quote is not None: 
2560                 comparison_value = comparison_value.replace(r'\%s' % quote, quote) 
2563                 comparison_value = int(m.group('intval')) 
2565                 comparison_value = parse_filesize(m.group('intval')) 
2566                 if comparison_value is None: 
2567                     comparison_value = parse_filesize(m.group('intval') + 'B') 
2568                 if comparison_value is None: 
2570                         'Invalid integer value %r in filter part %r' % ( 
2571                             m.group('intval'), filter_part)) 
2572         if actual_value is None: 
2573             return m.group('none_inclusive') 
2574         return op(actual_value, comparison_value) 
2577         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None), 
2578         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None), 
2580     operator_rex = re.compile(r'''(?x
)\s
* 
2581         (?P
<op
>%s)\s
*(?P
<key
>[a
-z_
]+) 
2583         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) 
2584     m = operator_rex.search(filter_part) 
2586         op = UNARY_OPERATORS[m.group('op')] 
2587         actual_value = dct.get(m.group('key')) 
2588         return op(actual_value) 
2590     raise ValueError('Invalid filter part %r' % filter_part) 
2593 def match_str(filter_str, dct): 
2594     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
2597         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) 
2600 def match_filter_func(filter_str): 
2601     def _match_func(info_dict): 
2602         if match_str(filter_str, info_dict): 
2605             video_title = info_dict.get('title', info_dict.get('id', 'video')) 
2606             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) 
2610 def parse_dfxp_time_expr(time_expr): 
2614     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) 
2616         return float(mobj.group('time_offset')) 
2618     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr) 
2620         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.')) 
2623 def srt_subtitles_timecode(seconds): 
2624     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) 
2627 def dfxp2srt(dfxp_data): 
2629     @param dfxp_data A 
bytes-like 
object containing DFXP data
 
2630     @returns A 
unicode object containing converted SRT data
 
2632     LEGACY_NAMESPACES = ( 
2633         (b'http://www.w3.org/ns/ttml', [ 
2634             b'http://www.w3.org/2004/11/ttaf1', 
2635             b'http://www.w3.org/2006/04/ttaf1', 
2636             b'http://www.w3.org/2006/10/ttaf1', 
2638         (b'http://www.w3.org/ns/ttml#styling', [ 
2639             b'http://www.w3.org/ns/ttml#style', 
2643     SUPPORTED_STYLING = [ 
2652     _x = functools.partial(xpath_with_ns, ns_map={ 
2653         'ttml': 'http://www.w3.org/ns/ttml', 
2654         'tts': 'http://www.w3.org/ns/ttml#styling', 
2660     class TTMLPElementParser(object): 
2662         _unclosed_elements = [] 
2663         _applied_styles = [] 
2665         def start(self, tag, attrib): 
2666             if tag in (_x('ttml:br'), 'br'): 
2669                 unclosed_elements = [] 
2671                 element_style_id = attrib.get('style') 
2673                     style.update(default_style) 
2674                 if element_style_id: 
2675                     style.update(styles.get(element_style_id, {})) 
2676                 for prop in SUPPORTED_STYLING: 
2677                     prop_val = attrib.get(_x('tts:' + prop)) 
2679                         style[prop] = prop_val 
2682                     for k, v in sorted(style.items()): 
2683                         if self._applied_styles and self._applied_styles[-1].get(k) == v: 
2686                             font += ' color="%s"' % v 
2687                         elif k == 'fontSize': 
2688                             font += ' size="%s"' % v 
2689                         elif k == 'fontFamily': 
2690                             font += ' face="%s"' % v 
2691                         elif k == 'fontWeight' and v == 'bold': 
2693                             unclosed_elements.append('b') 
2694                         elif k == 'fontStyle' and v == 'italic': 
2696                             unclosed_elements.append('i') 
2697                         elif k == 'textDecoration' and v == 'underline': 
2699                             unclosed_elements.append('u') 
2701                         self._out += '<font' + font + '>' 
2702                         unclosed_elements.append('font') 
2704                     if self._applied_styles: 
2705                         applied_style.update(self._applied_styles[-1]) 
2706                     applied_style.update(style) 
2707                     self._applied_styles.append(applied_style) 
2708                 self._unclosed_elements.append(unclosed_elements) 
2711             if tag not in (_x('ttml:br'), 'br'): 
2712                 unclosed_elements = self._unclosed_elements.pop() 
2713                 for element in reversed(unclosed_elements): 
2714                     self._out += '</%s>' % element 
2715                 if unclosed_elements and self._applied_styles: 
2716                     self._applied_styles.pop() 
2718         def data(self, data): 
2722             return self._out.strip() 
2724     def parse_node(node): 
2725         target = TTMLPElementParser() 
2726         parser = xml.etree.ElementTree.XMLParser(target=target) 
2727         parser.feed(xml.etree.ElementTree.tostring(node)) 
2728         return parser.close() 
2730     for k, v in LEGACY_NAMESPACES: 
2732             dfxp_data = dfxp_data.replace(ns, k) 
2734     dfxp = compat_etree_fromstring(dfxp_data) 
2736     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') 
2739         raise ValueError('Invalid dfxp/TTML subtitle') 
2743         for style in dfxp.findall(_x('.//ttml:style')): 
2744             style_id = style.get('id') 
2745             parent_style_id = style.get('style') 
2747                 if parent_style_id not in styles: 
2750                 styles[style_id] = styles[parent_style_id].copy() 
2751             for prop in SUPPORTED_STYLING: 
2752                 prop_val = style.get(_x('tts:' + prop)) 
2754                     styles.setdefault(style_id, {})[prop] = prop_val 
2760     for p in ('body', 'div'): 
2761         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p]) 
2764         style = styles.get(ele.get('style')) 
2767         default_style.update(style) 
2769     for para, index in zip(paras, itertools.count(1)): 
2770         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) 
2771         end_time = parse_dfxp_time_expr(para.attrib.get('end')) 
2772         dur = parse_dfxp_time_expr(para.attrib.get('dur')) 
2773         if begin_time is None: 
2778             end_time = begin_time + dur 
2779         out.append('%d\n%s --> %s\n%s\n\n' % ( 
2781             srt_subtitles_timecode(begin_time), 
2782             srt_subtitles_timecode(end_time), 
2788 def cli_option(params, command_option, param): 
2789     param = params.get(param) 
2791         param = compat_str(param) 
2792     return [command_option, param] if param is not None else [] 
2795 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): 
2796     param = params.get(param) 
2799     assert isinstance(param, bool) 
2801         return [command_option + separator + (true_value if param else false_value)] 
2802     return [command_option, true_value if param else false_value] 
2805 def cli_valueless_option(params, command_option, param, expected_value=True): 
2806     param = params.get(param) 
2807     return [command_option] if param == expected_value else [] 
2810 def cli_configuration_args(params, param, default=[]): 
2811     ex_args = params.get(param) 
2814     assert isinstance(ex_args, list) 
2818 class ISO639Utils(object): 
2819     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 
3008     def short2long(cls, code): 
3009         """Convert language code from ISO 639-1 to ISO 639-2/T""" 
3010         return cls._lang_map.get(code[:2]) 
3013     def long2short(cls, code): 
3014         """Convert language code from ISO 639-2/T to ISO 639-1""" 
3015         for short_name, long_name in cls._lang_map.items(): 
3016             if long_name == code: 
3020 class ISO3166Utils(object): 
3021     # From http://data.okfn.org/data/core/country-list 
3023         'AF': 'Afghanistan', 
3024         'AX': 'Åland Islands', 
3027         'AS': 'American Samoa', 
3032         'AG': 'Antigua and Barbuda', 
3049         'BO': 'Bolivia, Plurinational State of', 
3050         'BQ': 'Bonaire, Sint Eustatius and Saba', 
3051         'BA': 'Bosnia and Herzegovina', 
3053         'BV': 'Bouvet Island', 
3055         'IO': 'British Indian Ocean Territory', 
3056         'BN': 'Brunei Darussalam', 
3058         'BF': 'Burkina Faso', 
3064         'KY': 'Cayman Islands', 
3065         'CF': 'Central African Republic', 
3069         'CX': 'Christmas Island', 
3070         'CC': 'Cocos (Keeling) Islands', 
3074         'CD': 'Congo, the Democratic Republic of the', 
3075         'CK': 'Cook Islands', 
3077         'CI': 'Côte d\'Ivoire', 
3082         'CZ': 'Czech Republic', 
3086         'DO': 'Dominican Republic', 
3089         'SV': 'El Salvador', 
3090         'GQ': 'Equatorial Guinea', 
3094         'FK': 'Falkland Islands (Malvinas)', 
3095         'FO': 'Faroe Islands', 
3099         'GF': 'French Guiana', 
3100         'PF': 'French Polynesia', 
3101         'TF': 'French Southern Territories', 
3116         'GW': 'Guinea-Bissau', 
3119         'HM': 'Heard Island and McDonald Islands', 
3120         'VA': 'Holy See (Vatican City State)', 
3127         'IR': 'Iran, Islamic Republic of', 
3130         'IM': 'Isle of Man', 
3140         'KP': 'Korea, Democratic People\'s Republic of', 
3141         'KR': 'Korea, Republic of', 
3144         'LA': 'Lao People\'s Democratic Republic', 
3150         'LI': 'Liechtenstein', 
3154         'MK': 'Macedonia, the Former Yugoslav Republic of', 
3161         'MH': 'Marshall Islands', 
3167         'FM': 'Micronesia, Federated States of', 
3168         'MD': 'Moldova, Republic of', 
3179         'NL': 'Netherlands', 
3180         'NC': 'New Caledonia', 
3181         'NZ': 'New Zealand', 
3186         'NF': 'Norfolk Island', 
3187         'MP': 'Northern Mariana Islands', 
3192         'PS': 'Palestine, State of', 
3194         'PG': 'Papua New Guinea', 
3197         'PH': 'Philippines', 
3201         'PR': 'Puerto Rico', 
3205         'RU': 'Russian Federation', 
3207         'BL': 'Saint Barthélemy', 
3208         'SH': 'Saint Helena, Ascension and Tristan da Cunha', 
3209         'KN': 'Saint Kitts and Nevis', 
3210         'LC': 'Saint Lucia', 
3211         'MF': 'Saint Martin (French part)', 
3212         'PM': 'Saint Pierre and Miquelon', 
3213         'VC': 'Saint Vincent and the Grenadines', 
3216         'ST': 'Sao Tome and Principe', 
3217         'SA': 'Saudi Arabia', 
3221         'SL': 'Sierra Leone', 
3223         'SX': 'Sint Maarten (Dutch part)', 
3226         'SB': 'Solomon Islands', 
3228         'ZA': 'South Africa', 
3229         'GS': 'South Georgia and the South Sandwich Islands', 
3230         'SS': 'South Sudan', 
3235         'SJ': 'Svalbard and Jan Mayen', 
3238         'CH': 'Switzerland', 
3239         'SY': 'Syrian Arab Republic', 
3240         'TW': 'Taiwan, Province of China', 
3242         'TZ': 'Tanzania, United Republic of', 
3244         'TL': 'Timor-Leste', 
3248         'TT': 'Trinidad and Tobago', 
3251         'TM': 'Turkmenistan', 
3252         'TC': 'Turks and Caicos Islands', 
3256         'AE': 'United Arab Emirates', 
3257         'GB': 'United Kingdom', 
3258         'US': 'United States', 
3259         'UM': 'United States Minor Outlying Islands', 
3263         'VE': 'Venezuela, Bolivarian Republic of', 
3265         'VG': 'Virgin Islands, British', 
3266         'VI': 'Virgin Islands, U.S.', 
3267         'WF': 'Wallis and Futuna', 
3268         'EH': 'Western Sahara', 
3275     def short2full(cls, code): 
3276         """Convert an ISO 3166-2 country code to the corresponding full name""" 
3277         return cls._country_map.get(code.upper()) 
3280 class GeoUtils(object): 
3281     # Major IPv4 address blocks per country 
3283         'AD': '85.94.160.0/19', 
3284         'AE': '94.200.0.0/13', 
3285         'AF': '149.54.0.0/17', 
3286         'AG': '209.59.64.0/18', 
3287         'AI': '204.14.248.0/21', 
3288         'AL': '46.99.0.0/16', 
3289         'AM': '46.70.0.0/15', 
3290         'AO': '105.168.0.0/13', 
3291         'AP': '159.117.192.0/21', 
3292         'AR': '181.0.0.0/12', 
3293         'AS': '202.70.112.0/20', 
3294         'AT': '84.112.0.0/13', 
3295         'AU': '1.128.0.0/11', 
3296         'AW': '181.41.0.0/18', 
3297         'AZ': '5.191.0.0/16', 
3298         'BA': '31.176.128.0/17', 
3299         'BB': '65.48.128.0/17', 
3300         'BD': '114.130.0.0/16', 
3302         'BF': '129.45.128.0/17', 
3303         'BG': '95.42.0.0/15', 
3304         'BH': '37.131.0.0/17', 
3305         'BI': '154.117.192.0/18', 
3306         'BJ': '137.255.0.0/16', 
3307         'BL': '192.131.134.0/24', 
3308         'BM': '196.12.64.0/18', 
3309         'BN': '156.31.0.0/16', 
3310         'BO': '161.56.0.0/16', 
3311         'BQ': '161.0.80.0/20', 
3312         'BR': '152.240.0.0/12', 
3313         'BS': '24.51.64.0/18', 
3314         'BT': '119.2.96.0/19', 
3315         'BW': '168.167.0.0/16', 
3316         'BY': '178.120.0.0/13', 
3317         'BZ': '179.42.192.0/18', 
3318         'CA': '99.224.0.0/11', 
3319         'CD': '41.243.0.0/16', 
3320         'CF': '196.32.200.0/21', 
3321         'CG': '197.214.128.0/17', 
3322         'CH': '85.0.0.0/13', 
3323         'CI': '154.232.0.0/14', 
3324         'CK': '202.65.32.0/19', 
3325         'CL': '152.172.0.0/14', 
3326         'CM': '165.210.0.0/15', 
3327         'CN': '36.128.0.0/10', 
3328         'CO': '181.240.0.0/12', 
3329         'CR': '201.192.0.0/12', 
3330         'CU': '152.206.0.0/15', 
3331         'CV': '165.90.96.0/19', 
3332         'CW': '190.88.128.0/17', 
3333         'CY': '46.198.0.0/15', 
3334         'CZ': '88.100.0.0/14', 
3336         'DJ': '197.241.0.0/17', 
3337         'DK': '87.48.0.0/12', 
3338         'DM': '192.243.48.0/20', 
3339         'DO': '152.166.0.0/15', 
3340         'DZ': '41.96.0.0/12', 
3341         'EC': '186.68.0.0/15', 
3342         'EE': '90.190.0.0/15', 
3343         'EG': '156.160.0.0/11', 
3344         'ER': '196.200.96.0/20', 
3345         'ES': '88.0.0.0/11', 
3346         'ET': '196.188.0.0/14', 
3347         'EU': '2.16.0.0/13', 
3348         'FI': '91.152.0.0/13', 
3349         'FJ': '144.120.0.0/16', 
3350         'FM': '119.252.112.0/20', 
3351         'FO': '88.85.32.0/19', 
3353         'GA': '41.158.0.0/15', 
3355         'GD': '74.122.88.0/21', 
3356         'GE': '31.146.0.0/16', 
3357         'GF': '161.22.64.0/18', 
3358         'GG': '62.68.160.0/19', 
3359         'GH': '45.208.0.0/14', 
3360         'GI': '85.115.128.0/19', 
3361         'GL': '88.83.0.0/19', 
3362         'GM': '160.182.0.0/15', 
3363         'GN': '197.149.192.0/18', 
3364         'GP': '104.250.0.0/19', 
3365         'GQ': '105.235.224.0/20', 
3366         'GR': '94.64.0.0/13', 
3367         'GT': '168.234.0.0/16', 
3368         'GU': '168.123.0.0/16', 
3369         'GW': '197.214.80.0/20', 
3370         'GY': '181.41.64.0/18', 
3371         'HK': '113.252.0.0/14', 
3372         'HN': '181.210.0.0/16', 
3373         'HR': '93.136.0.0/13', 
3374         'HT': '148.102.128.0/17', 
3375         'HU': '84.0.0.0/14', 
3376         'ID': '39.192.0.0/10', 
3377         'IE': '87.32.0.0/12', 
3378         'IL': '79.176.0.0/13', 
3379         'IM': '5.62.80.0/20', 
3380         'IN': '117.192.0.0/10', 
3381         'IO': '203.83.48.0/21', 
3382         'IQ': '37.236.0.0/14', 
3383         'IR': '2.176.0.0/12', 
3384         'IS': '82.221.0.0/16', 
3385         'IT': '79.0.0.0/10', 
3386         'JE': '87.244.64.0/18', 
3387         'JM': '72.27.0.0/17', 
3388         'JO': '176.29.0.0/16', 
3389         'JP': '126.0.0.0/8', 
3390         'KE': '105.48.0.0/12', 
3391         'KG': '158.181.128.0/17', 
3392         'KH': '36.37.128.0/17', 
3393         'KI': '103.25.140.0/22', 
3394         'KM': '197.255.224.0/20', 
3395         'KN': '198.32.32.0/19', 
3396         'KP': '175.45.176.0/22', 
3397         'KR': '175.192.0.0/10', 
3398         'KW': '37.36.0.0/14', 
3399         'KY': '64.96.0.0/15', 
3400         'KZ': '2.72.0.0/13', 
3401         'LA': '115.84.64.0/18', 
3402         'LB': '178.135.0.0/16', 
3403         'LC': '192.147.231.0/24', 
3404         'LI': '82.117.0.0/19', 
3405         'LK': '112.134.0.0/15', 
3406         'LR': '41.86.0.0/19', 
3407         'LS': '129.232.0.0/17', 
3408         'LT': '78.56.0.0/13', 
3409         'LU': '188.42.0.0/16', 
3410         'LV': '46.109.0.0/16', 
3411         'LY': '41.252.0.0/14', 
3412         'MA': '105.128.0.0/11', 
3413         'MC': '88.209.64.0/18', 
3414         'MD': '37.246.0.0/16', 
3415         'ME': '178.175.0.0/17', 
3416         'MF': '74.112.232.0/21', 
3417         'MG': '154.126.0.0/17', 
3418         'MH': '117.103.88.0/21', 
3419         'MK': '77.28.0.0/15', 
3420         'ML': '154.118.128.0/18', 
3421         'MM': '37.111.0.0/17', 
3422         'MN': '49.0.128.0/17', 
3423         'MO': '60.246.0.0/16', 
3424         'MP': '202.88.64.0/20', 
3425         'MQ': '109.203.224.0/19', 
3426         'MR': '41.188.64.0/18', 
3427         'MS': '208.90.112.0/22', 
3428         'MT': '46.11.0.0/16', 
3429         'MU': '105.16.0.0/12', 
3430         'MV': '27.114.128.0/18', 
3431         'MW': '105.234.0.0/16', 
3432         'MX': '187.192.0.0/11', 
3433         'MY': '175.136.0.0/13', 
3434         'MZ': '197.218.0.0/15', 
3435         'NA': '41.182.0.0/16', 
3436         'NC': '101.101.0.0/18', 
3437         'NE': '197.214.0.0/18', 
3438         'NF': '203.17.240.0/22', 
3439         'NG': '105.112.0.0/12', 
3440         'NI': '186.76.0.0/15', 
3441         'NL': '145.96.0.0/11', 
3442         'NO': '84.208.0.0/13', 
3443         'NP': '36.252.0.0/15', 
3444         'NR': '203.98.224.0/19', 
3445         'NU': '49.156.48.0/22', 
3446         'NZ': '49.224.0.0/14', 
3447         'OM': '5.36.0.0/15', 
3448         'PA': '186.72.0.0/15', 
3449         'PE': '186.160.0.0/14', 
3450         'PF': '123.50.64.0/18', 
3451         'PG': '124.240.192.0/19', 
3452         'PH': '49.144.0.0/13', 
3453         'PK': '39.32.0.0/11', 
3454         'PL': '83.0.0.0/11', 
3455         'PM': '70.36.0.0/20', 
3456         'PR': '66.50.0.0/16', 
3457         'PS': '188.161.0.0/16', 
3458         'PT': '85.240.0.0/13', 
3459         'PW': '202.124.224.0/20', 
3460         'PY': '181.120.0.0/14', 
3461         'QA': '37.210.0.0/15', 
3462         'RE': '139.26.0.0/16', 
3463         'RO': '79.112.0.0/13', 
3464         'RS': '178.220.0.0/14', 
3465         'RU': '5.136.0.0/13', 
3466         'RW': '105.178.0.0/15', 
3467         'SA': '188.48.0.0/13', 
3468         'SB': '202.1.160.0/19', 
3469         'SC': '154.192.0.0/11', 
3470         'SD': '154.96.0.0/13', 
3471         'SE': '78.64.0.0/12', 
3472         'SG': '152.56.0.0/14', 
3473         'SI': '188.196.0.0/14', 
3474         'SK': '78.98.0.0/15', 
3475         'SL': '197.215.0.0/17', 
3476         'SM': '89.186.32.0/19', 
3477         'SN': '41.82.0.0/15', 
3478         'SO': '197.220.64.0/19', 
3479         'SR': '186.179.128.0/17', 
3480         'SS': '105.235.208.0/21', 
3481         'ST': '197.159.160.0/19', 
3482         'SV': '168.243.0.0/16', 
3483         'SX': '190.102.0.0/20', 
3485         'SZ': '41.84.224.0/19', 
3486         'TC': '65.255.48.0/20', 
3487         'TD': '154.68.128.0/19', 
3488         'TG': '196.168.0.0/14', 
3489         'TH': '171.96.0.0/13', 
3490         'TJ': '85.9.128.0/18', 
3491         'TK': '27.96.24.0/21', 
3492         'TL': '180.189.160.0/20', 
3493         'TM': '95.85.96.0/19', 
3494         'TN': '197.0.0.0/11', 
3495         'TO': '175.176.144.0/21', 
3496         'TR': '78.160.0.0/11', 
3497         'TT': '186.44.0.0/15', 
3498         'TV': '202.2.96.0/19', 
3499         'TW': '120.96.0.0/11', 
3500         'TZ': '156.156.0.0/14', 
3501         'UA': '93.72.0.0/13', 
3502         'UG': '154.224.0.0/13', 
3504         'UY': '167.56.0.0/13', 
3505         'UZ': '82.215.64.0/18', 
3506         'VA': '212.77.0.0/19', 
3507         'VC': '24.92.144.0/20', 
3508         'VE': '186.88.0.0/13', 
3509         'VG': '172.103.64.0/18', 
3510         'VI': '146.226.0.0/16', 
3511         'VN': '14.160.0.0/11', 
3512         'VU': '202.80.32.0/20', 
3513         'WF': '117.20.32.0/21', 
3514         'WS': '202.4.32.0/19', 
3515         'YE': '134.35.0.0/16', 
3516         'YT': '41.242.116.0/22', 
3517         'ZA': '41.0.0.0/11', 
3518         'ZM': '165.56.0.0/13', 
3519         'ZW': '41.85.192.0/19', 
3523     def random_ipv4(cls, code): 
3524         block = cls._country_ip_map.get(code.upper()) 
3527         addr, preflen = block.split('/') 
3528         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] 
3529         addr_max = addr_min | (0xffffffff >> int(preflen)) 
3530         return compat_str(socket.inet_ntoa( 
3531             compat_struct_pack('!L', random.randint(addr_min, addr_max)))) 
3534 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): 
3535     def __init__(self, proxies=None): 
3536         # Set default handlers 
3537         for type in ('http', 'https'): 
3538             setattr(self, '%s_open' % type, 
3539                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: 
3540                         meth(r, proxy, type)) 
3541         return compat_urllib_request.ProxyHandler.__init__(self, proxies) 
3543     def proxy_open(self, req, proxy, type): 
3544         req_proxy = req.headers.get('Ytdl-request-proxy') 
3545         if req_proxy is not None: 
3547             del req.headers['Ytdl-request-proxy'] 
3549         if proxy == '__noproxy__': 
3550             return None  # No Proxy 
3551         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 
3552             req.add_header('Ytdl-socks-proxy', proxy) 
3553             # youtube-dl's http/https handlers do wrapping the socket with socks 
3555         return compat_urllib_request.ProxyHandler.proxy_open( 
3556             self, req, proxy, type) 
3559 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is 
3560 # released into Public Domain 
3561 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 
3563 def long_to_bytes(n, blocksize=0): 
3564     """long_to_bytes(n:long, blocksize:int) : string 
3565     Convert a long integer to a byte string. 
3567     If optional blocksize is given and greater than zero, pad the front of the 
3568     byte string with binary zeros so that the length is a multiple of 
3571     # after much testing, this algorithm was deemed to be the fastest 
3575         s = compat_struct_pack('>I', n & 0xffffffff) + s 
3577     # strip off leading zeros 
3578     for i in range(len(s)): 
3579         if s[i] != b'\000'[0]: 
3582         # only happens when n == 0 
3586     # add back some pad bytes.  this could be done more efficiently w.r.t. the 
3587     # de-padding being done above, but sigh... 
3588     if blocksize > 0 and len(s) % blocksize: 
3589         s = (blocksize - len(s) % blocksize) * b'\000' + s 
3593 def bytes_to_long(s): 
3594     """bytes_to_long(string) : long 
3595     Convert a byte string to a long integer. 
3597     This is (essentially) the inverse of long_to_bytes(). 
3602         extra = (4 - length % 4) 
3603         s = b'\000' * extra + s 
3604         length = length + extra 
3605     for i in range(0, length, 4): 
3606         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0] 
3610 def ohdave_rsa_encrypt(data, exponent, modulus): 
3612     Implement OHDave
's RSA algorithm. See http://www.ohdave.com/rsa/ 
3615         data: data to encrypt, bytes-like object 
3616         exponent, modulus: parameter e and N of RSA algorithm, both integer 
3617     Output: hex string of encrypted data 
3619     Limitation: supports one block encryption only 
3622     payload = int(binascii.hexlify(data[::-1]), 16) 
3623     encrypted = pow(payload, exponent, modulus) 
3624     return '%x' % encrypted 
3627 def pkcs1pad(data, length): 
3629     Padding input data with PKCS#1 scheme 
3631     @param {int[]} data        input data 
3632     @param {int}   length      target length 
3633     @returns {int[]}           padded data 
3635     if len(data) > length - 11: 
3636         raise ValueError('Input data too 
long for PKCS
#1 padding') 
3638     pseudo_random 
= [random
.randint(0, 254) for _ 
in range(length 
- len(data
) - 3)] 
3639     return [0, 2] + pseudo_random 
+ [0] + data
 
3642 def encode_base_n(num
, n
, table
=None): 
3643     FULL_TABLE 
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 
3645         table 
= FULL_TABLE
[:n
] 
3648         raise ValueError('base %d exceeds table length %d' % (n
, len(table
))) 
3655         ret 
= table
[num 
% n
] + ret
 
3660 def decode_packed_codes(code
): 
3661     mobj 
= re
.search(PACKED_CODES_RE
, code
) 
3662     obfucasted_code
, base
, count
, symbols 
= mobj
.groups() 
3665     symbols 
= symbols
.split('|') 
3670         base_n_count 
= encode_base_n(count
, base
) 
3671         symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
 
3674         r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)], 
3678 def parse_m3u8_attributes(attrib
): 
3680     for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
): 
3681         if val
.startswith('"'): 
3687 def urshift(val
, n
): 
3688     return val 
>> n 
if val 
>= 0 else (val 
+ 0x100000000) >> n
 
3691 # Based on png2str() written by @gdkchan and improved by @yokrysty 
3692 # Originally posted at https://github.com/rg3/youtube-dl/issues/9706 
3693 def decode_png(png_data
): 
3694     # Reference: https://www.w3.org/TR/PNG/ 
3695     header 
= png_data
[8:] 
3697     if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR': 
3698         raise IOError('Not a valid PNG file.') 
3700     int_map 
= {1: '>B', 2: '>H', 4: '>I'} 
3701     unpack_integer 
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0] 
3706         length 
= unpack_integer(header
[:4]) 
3709         chunk_type 
= header
[:4] 
3712         chunk_data 
= header
[:length
] 
3713         header 
= header
[length
:] 
3715         header 
= header
[4:]  # Skip CRC 
3723     ihdr 
= chunks
[0]['data'] 
3725     width 
= unpack_integer(ihdr
[:4]) 
3726     height 
= unpack_integer(ihdr
[4:8]) 
3730     for chunk 
in chunks
: 
3731         if chunk
['type'] == b
'IDAT': 
3732             idat 
+= chunk
['data'] 
3735         raise IOError('Unable to read PNG data.') 
3737     decompressed_data 
= bytearray(zlib
.decompress(idat
)) 
3742     def _get_pixel(idx
): 
3747     for y 
in range(height
): 
3748         basePos 
= y 
* (1 + stride
) 
3749         filter_type 
= decompressed_data
[basePos
] 
3753         pixels
.append(current_row
) 
3755         for x 
in range(stride
): 
3756             color 
= decompressed_data
[1 + basePos 
+ x
] 
3757             basex 
= y 
* stride 
+ x
 
3762                 left 
= _get_pixel(basex 
- 3) 
3764                 up 
= _get_pixel(basex 
- stride
) 
3766             if filter_type 
== 1:  # Sub 
3767                 color 
= (color 
+ left
) & 0xff 
3768             elif filter_type 
== 2:  # Up 
3769                 color 
= (color 
+ up
) & 0xff 
3770             elif filter_type 
== 3:  # Average 
3771                 color 
= (color 
+ ((left 
+ up
) >> 1)) & 0xff 
3772             elif filter_type 
== 4:  # Paeth 
3778                     c 
= _get_pixel(basex 
- stride 
- 3) 
3786                 if pa 
<= pb 
and pa 
<= pc
: 
3787                     color 
= (color 
+ a
) & 0xff 
3789                     color 
= (color 
+ b
) & 0xff 
3791                     color 
= (color 
+ c
) & 0xff 
3793             current_row
.append(color
) 
3795     return width
, height
, pixels
 
3798 def write_xattr(path
, key
, value
): 
3799     # This mess below finds the best xattr tool for the job 
3801         # try the pyxattr module... 
3804         if hasattr(xattr
, 'set'):  # pyxattr 
3805             # Unicode arguments are not supported in python-pyxattr until 
3807             # See https://github.com/rg3/youtube-dl/issues/5498 
3808             pyxattr_required_version 
= '0.5.0' 
3809             if version_tuple(xattr
.__version
__) < version_tuple(pyxattr_required_version
): 
3810                 # TODO: fallback to CLI tools 
3811                 raise XAttrUnavailableError( 
3812                     'python-pyxattr is detected but is too old. ' 
3813                     'youtube-dl requires %s or above while your version is %s. ' 
3814                     'Falling back to other xattr implementations' % ( 
3815                         pyxattr_required_version
, xattr
.__version
__)) 
3817             setxattr 
= xattr
.set 
3819             setxattr 
= xattr
.setxattr
 
3822             setxattr(path
, key
, value
) 
3823         except EnvironmentError as e
: 
3824             raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3827         if compat_os_name 
== 'nt': 
3828             # Write xattrs to NTFS Alternate Data Streams: 
3829             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 
3830             assert ':' not in key
 
3831             assert os
.path
.exists(path
) 
3833             ads_fn 
= path 
+ ':' + key
 
3835                 with open(ads_fn
, 'wb') as f
: 
3837             except EnvironmentError as e
: 
3838                 raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3840             user_has_setfattr 
= check_executable('setfattr', ['--version']) 
3841             user_has_xattr 
= check_executable('xattr', ['-h']) 
3843             if user_has_setfattr 
or user_has_xattr
: 
3845                 value 
= value
.decode('utf-8') 
3846                 if user_has_setfattr
: 
3847                     executable 
= 'setfattr' 
3848                     opts 
= ['-n', key
, '-v', value
] 
3849                 elif user_has_xattr
: 
3850                     executable 
= 'xattr' 
3851                     opts 
= ['-w', key
, value
] 
3853                 cmd 
= ([encodeFilename(executable
, True)] + 
3854                        [encodeArgument(o
) for o 
in opts
] + 
3855                        [encodeFilename(path
, True)]) 
3858                     p 
= subprocess
.Popen( 
3859                         cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
) 
3860                 except EnvironmentError as e
: 
3861                     raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3862                 stdout
, stderr 
= p
.communicate() 
3863                 stderr 
= stderr
.decode('utf-8', 'replace') 
3864                 if p
.returncode 
!= 0: 
3865                     raise XAttrMetadataError(p
.returncode
, stderr
) 
3868                 # On Unix, and can't find pyxattr, setfattr, or xattr. 
3869                 if sys
.platform
.startswith('linux'): 
3870                     raise XAttrUnavailableError( 
3871                         "Couldn't find a tool to set the xattrs. " 
3872                         "Install either the python 'pyxattr' or 'xattr' " 
3873                         "modules, or the GNU 'attr' package " 
3874                         "(which contains the 'setfattr' tool).") 
3876                     raise XAttrUnavailableError( 
3877                         "Couldn't find a tool to set the xattrs. " 
3878                         "Install either the python 'xattr' module, " 
3879                         "or the 'xattr' binary.") 
3882 def random_birthday(year_field
, month_field
, day_field
): 
3884         year_field
: str(random
.randint(1950, 1995)), 
3885         month_field
: str(random
.randint(1, 12)), 
3886         day_field
: str(random
.randint(1, 31)),