4 from __future__ 
import unicode_literals
 
  34 import xml
.etree
.ElementTree
 
  38     compat_HTMLParseError
, 
  42     compat_etree_fromstring
, 
  45     compat_html_entities_html5
, 
  51     compat_socket_create_connection
, 
  57     compat_urllib_parse_urlencode
, 
  58     compat_urllib_parse_urlparse
, 
  59     compat_urllib_parse_unquote_plus
, 
  60     compat_urllib_request
, 
  71 def register_socks_protocols(): 
  72     # "Register" SOCKS protocols 
  73     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 
  74     # URLs with protocols not in urlparse.uses_netloc are not handled correctly 
  75     for scheme 
in ('socks', 'socks4', 'socks4a', 'socks5'): 
  76         if scheme 
not in compat_urlparse
.uses_netloc
: 
  77             compat_urlparse
.uses_netloc
.append(scheme
) 
  80 # This is not clearly defined otherwise 
  81 compiled_regex_type 
= type(re
.compile('')) 
  84     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', 
  85     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  86     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  87     'Accept-Encoding': 'gzip, deflate', 
  88     'Accept-Language': 'en-us,en;q=0.5', 
  93     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', 
  99 ENGLISH_MONTH_NAMES 
= [ 
 100     'January', 'February', 'March', 'April', 'May', 'June', 
 101     'July', 'August', 'September', 'October', 'November', 'December'] 
 104     'en': ENGLISH_MONTH_NAMES
, 
 106         'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 
 107         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], 
 111     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 
 112     'flv', 'f4v', 'f4a', 'f4b', 
 113     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 
 114     'mkv', 'mka', 'mk3d', 
 123     'f4f', 'f4m', 'm3u8', 'smil') 
 125 # needed for sanitizing filenames in restricted mode 
 126 ACCENT_CHARS 
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', 
 127                         itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], 
 128                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 
 151     '%Y-%m-%d %H:%M:%S.%f', 
 154     '%Y-%m-%dT%H:%M:%SZ', 
 155     '%Y-%m-%dT%H:%M:%S.%fZ', 
 156     '%Y-%m-%dT%H:%M:%S.%f0Z', 
 158     '%Y-%m-%dT%H:%M:%S.%f', 
 161     '%b %d %Y at %H:%M:%S', 
 164 DATE_FORMATS_DAY_FIRST 
= list(DATE_FORMATS
) 
 165 DATE_FORMATS_DAY_FIRST
.extend([ 
 174 DATE_FORMATS_MONTH_FIRST 
= list(DATE_FORMATS
) 
 175 DATE_FORMATS_MONTH_FIRST
.extend([ 
 183 PACKED_CODES_RE 
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" 
 186 def preferredencoding(): 
 187     """Get preferred encoding. 
 189     Returns the best encoding scheme for the system, based on 
 190     locale.getpreferredencoding() and some further tweaks. 
 193         pref 
= locale
.getpreferredencoding() 
 201 def write_json_file(obj
, fn
): 
 202     """ Encode obj as JSON and write it to fn, atomically if possible """ 
 204     fn 
= encodeFilename(fn
) 
 205     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
 206         encoding 
= get_filesystem_encoding() 
 207         # os.path.basename returns a bytes object, but NamedTemporaryFile 
 208         # will fail if the filename contains non ascii characters unless we 
 209         # use a unicode object 
 210         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
 211         # the same for os.path.dirname 
 212         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
 214         path_basename 
= os
.path
.basename
 
 215         path_dirname 
= os
.path
.dirname
 
 219         'prefix': path_basename(fn
) + '.', 
 220         'dir': path_dirname(fn
), 
 224     # In Python 2.x, json.dump expects a bytestream. 
 225     # In Python 3.x, it writes to a character stream 
 226     if sys
.version_info 
< (3, 0): 
 234     tf 
= tempfile
.NamedTemporaryFile(**compat_kwargs(args
)) 
 239         if sys
.platform 
== 'win32': 
 240             # Need to remove existing file on Windows, else os.rename raises 
 241             # WindowsError or FileExistsError. 
 246         os
.rename(tf
.name
, fn
) 
 255 if sys
.version_info 
>= (2, 7): 
 256     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 257         """ Find the xpath xpath[@key=val] """ 
 258         assert re
.match(r
'^[a-zA-Z_-]+$', key
) 
 259         expr 
= xpath 
+ ('[@%s]' % key 
if val 
is None else "[@%s='%s']" % (key
, val
)) 
 260         return node
.find(expr
) 
 262     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 263         for f 
in node
.findall(compat_xpath(xpath
)): 
 264             if key 
not in f
.attrib
: 
 266             if val 
is None or f
.attrib
.get(key
) == val
: 
 270 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 271 # the namespace parameter 
 274 def xpath_with_ns(path
, ns_map
): 
 275     components 
= [c
.split(':') for c 
in path
.split('/')] 
 279             replaced
.append(c
[0]) 
 282             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 283     return '/'.join(replaced
) 
 286 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 287     def _find_xpath(xpath
): 
 288         return node
.find(compat_xpath(xpath
)) 
 290     if isinstance(xpath
, (str, compat_str
)): 
 291         n 
= _find_xpath(xpath
) 
 299         if default 
is not NO_DEFAULT
: 
 302             name 
= xpath 
if name 
is None else name
 
 303             raise ExtractorError('Could not find XML element %s' % name
) 
 309 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 310     n 
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
) 
 311     if n 
is None or n 
== default
: 
 314         if default 
is not NO_DEFAULT
: 
 317             name 
= xpath 
if name 
is None else name
 
 318             raise ExtractorError('Could not find XML element\'s text %s' % name
) 
 324 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 325     n 
= find_xpath_attr(node
, xpath
, key
) 
 327         if default 
is not NO_DEFAULT
: 
 330             name 
= '%s[@%s]' % (xpath
, key
) if name 
is None else name
 
 331             raise ExtractorError('Could not find XML attribute %s' % name
) 
 337 def get_element_by_id(id, html
): 
 338     """Return the content of the tag with the specified ID in the passed HTML document""" 
 339     return get_element_by_attribute('id', id, html
) 
 342 def get_element_by_class(class_name
, html
): 
 343     """Return the content of the first tag with the specified class in the passed HTML document""" 
 344     retval 
= get_elements_by_class(class_name
, html
) 
 345     return retval
[0] if retval 
else None 
 348 def get_element_by_attribute(attribute
, value
, html
, escape_value
=True): 
 349     retval 
= get_elements_by_attribute(attribute
, value
, html
, escape_value
) 
 350     return retval
[0] if retval 
else None 
 353 def get_elements_by_class(class_name
, html
): 
 354     """Return the content of all tags with the specified class in the passed HTML document as a list""" 
 355     return get_elements_by_attribute( 
 356         'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 
 357         html, escape_value=False) 
 360 def get_elements_by_attribute(attribute, value, html, escape_value=True): 
 361     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 363     value = re.escape(value) if escape_value else value 
 366     for m in re.finditer(r'''(?xs) 
 368          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'|
))*?
 
 370          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'|))*? 
 374     ''' % (re.escape(attribute), value), html): 
 375         res = m.group('content
') 
 377         if res.startswith('"') or res.startswith("'"): 
 380         retlist.append(unescapeHTML(res)) 
 385 class HTMLAttributeParser(compat_HTMLParser): 
 386     """Trivial HTML parser to gather the attributes for a single element""" 
 389         compat_HTMLParser.__init__(self) 
 391     def handle_starttag(self, tag, attrs): 
 392         self.attrs = dict(attrs) 
 395 def extract_attributes(html_element): 
 396     """Given a string for an HTML element such as 
 398          a="foo" B="bar" c="&98;az" d=boz 
 399          empty= noval entity="&" 
 402     Decode and return a dictionary of attributes. 
 404         'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
', 
 405         'empty
': '', 'noval
': None, 'entity
': '&', 
 406         'sq
': '"', 'dq': '\'' 
 408     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 
 409     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 
 411     parser = HTMLAttributeParser() 
 413         parser.feed(html_element) 
 415     # Older Python may throw HTMLParseError in case of malformed HTML 
 416     except compat_HTMLParseError: 
 421 def clean_html(html): 
 422     """Clean an HTML snippet into a readable string""" 
 424     if html is None:  # Convenience for sanitizing descriptions etc. 
 428     html = html.replace('\n', ' ') 
 429     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) 
 430     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 
 432     html = re.sub('<.*?>', '', html) 
 433     # Replace html entities 
 434     html = unescapeHTML(html) 
 438 def sanitize_open(filename, open_mode): 
 439     """Try to open the given filename, and slightly tweak it if this fails. 
 441     Attempts to open the given filename. If this fails, it tries to change 
 442     the filename slightly, step by step, until it's either able to open it 
 443     or it fails and raises a final exception, like the standard open() 
 446     It returns the tuple (stream, definitive_file_name). 
 450             if sys.platform == 'win32': 
 452                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 
 453             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 
 454         stream = open(encodeFilename(filename), open_mode) 
 455         return (stream, filename) 
 456     except (IOError, OSError) as err: 
 457         if err.errno in (errno.EACCES,): 
 460         # In case of error, try to remove win32 forbidden chars 
 461         alt_filename = sanitize_path(filename) 
 462         if alt_filename == filename: 
 465             # An exception here should be caught in the caller 
 466             stream = open(encodeFilename(alt_filename), open_mode) 
 467             return (stream, alt_filename) 
 470 def timeconvert(timestr): 
 471     """Convert RFC 2822 defined time string into system timestamp""" 
 473     timetuple = email.utils.parsedate_tz(timestr) 
 474     if timetuple is not None: 
 475         timestamp = email.utils.mktime_tz(timetuple) 
 479 def sanitize_filename(s, restricted=False, is_id=False): 
 480     """Sanitizes a string so it could be used as part of a filename. 
 481     If restricted is set, use a stricter subset of allowed characters. 
 482     Set is_id if this is not an arbitrary string, but an ID that should be kept 
 485     def replace_insane(char): 
 486         if restricted and char in ACCENT_CHARS: 
 487             return ACCENT_CHARS[char] 
 488         if char == '?' or ord(char) < 32 or ord(char) == 127: 
 491             return '' if restricted else '\'' 
 493             return '_
-' if restricted else ' -' 
 494         elif char in '\\/|
*<>': 
 496         if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()): 
 498         if restricted 
and ord(char
) > 127: 
 503     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 504     result 
= ''.join(map(replace_insane
, s
)) 
 506         while '__' in result
: 
 507             result 
= result
.replace('__', '_') 
 508         result 
= result
.strip('_') 
 509         # Common case of "Foreign band name - English song title" 
 510         if restricted 
and result
.startswith('-_'): 
 512         if result
.startswith('-'): 
 513             result 
= '_' + result
[len('-'):] 
 514         result 
= result
.lstrip('.') 
 520 def sanitize_path(s
): 
 521     """Sanitizes and normalizes path on Windows""" 
 522     if sys
.platform 
!= 'win32': 
 524     drive_or_unc
, _ 
= os
.path
.splitdrive(s
) 
 525     if sys
.version_info 
< (2, 7) and not drive_or_unc
: 
 526         drive_or_unc
, _ 
= os
.path
.splitunc(s
) 
 527     norm_path 
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
) 
 531         path_part 
if path_part 
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
) 
 532         for path_part 
in norm_path
] 
 534         sanitized_path
.insert(0, drive_or_unc 
+ os
.path
.sep
) 
 535     return os
.path
.join(*sanitized_path
) 
 538 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of 
 539 # unwanted failures due to missing protocol 
 540 def sanitize_url(url
): 
 541     return 'http:%s' % url 
if url
.startswith('//') else url
 
 544 def sanitized_Request(url
, *args
, **kwargs
): 
 545     return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
) 
 549     """Expand shell variables and ~""" 
 550     return os
.path
.expandvars(compat_expanduser(s
)) 
 553 def orderedSet(iterable
): 
 554     """ Remove all duplicates from the input iterable """ 
 562 def _htmlentity_transform(entity_with_semicolon
): 
 563     """Transforms an HTML entity to a character.""" 
 564     entity 
= entity_with_semicolon
[:-1] 
 566     # Known non-numeric HTML entity 
 567     if entity 
in compat_html_entities
.name2codepoint
: 
 568         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 570     # TODO: HTML5 allows entities without a semicolon. For example, 
 571     # 'Éric' should be decoded as 'Éric'. 
 572     if entity_with_semicolon 
in compat_html_entities_html5
: 
 573         return compat_html_entities_html5
[entity_with_semicolon
] 
 575     mobj 
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
) 
 577         numstr 
= mobj
.group(1) 
 578         if numstr
.startswith('x'): 
 580             numstr 
= '0%s' % numstr
 
 583         # See https://github.com/rg3/youtube-dl/issues/7518 
 585             return compat_chr(int(numstr
, base
)) 
 589     # Unknown entity in name, return its literal representation 
 590     return '&%s;' % entity
 
 596     assert type(s
) == compat_str
 
 599         r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 602 def get_subprocess_encoding(): 
 603     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 604         # For subprocess calls, encode with locale encoding 
 605         # Refer to http://stackoverflow.com/a/9951851/35070 
 606         encoding 
= preferredencoding() 
 608         encoding 
= sys
.getfilesystemencoding() 
 614 def encodeFilename(s
, for_subprocess
=False): 
 616     @param s The name of the file 
 619     assert type(s
) == compat_str
 
 621     # Python 3 has a Unicode API 
 622     if sys
.version_info 
>= (3, 0): 
 625     # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 626     # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 627     # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 628     if not for_subprocess 
and sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 631     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 
 632     if sys
.platform
.startswith('java'): 
 635     return s
.encode(get_subprocess_encoding(), 'ignore') 
 638 def decodeFilename(b
, for_subprocess
=False): 
 640     if sys
.version_info 
>= (3, 0): 
 643     if not isinstance(b
, bytes): 
 646     return b
.decode(get_subprocess_encoding(), 'ignore') 
 649 def encodeArgument(s
): 
 650     if not isinstance(s
, compat_str
): 
 651         # Legacy code that uses byte strings 
 652         # Uncomment the following line after fixing all post processors 
 653         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 654         s 
= s
.decode('ascii') 
 655     return encodeFilename(s
, True) 
 658 def decodeArgument(b
): 
 659     return decodeFilename(b
, True) 
 662 def decodeOption(optval
): 
 665     if isinstance(optval
, bytes): 
 666         optval 
= optval
.decode(preferredencoding()) 
 668     assert isinstance(optval
, compat_str
) 
 672 def formatSeconds(secs
): 
 674         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 676         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 681 def make_HTTPS_handler(params
, **kwargs
): 
 682     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 683     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 684         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 685         if opts_no_check_certificate
: 
 686             context
.check_hostname 
= False 
 687             context
.verify_mode 
= ssl
.CERT_NONE
 
 689             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 692             # (create_default_context present but HTTPSHandler has no context=) 
 695     if sys
.version_info 
< (3, 2): 
 696         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 698         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 699         context
.verify_mode 
= (ssl
.CERT_NONE
 
 700                                if opts_no_check_certificate
 
 701                                else ssl
.CERT_REQUIRED
) 
 702         context
.set_default_verify_paths() 
 703         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 706 def bug_reports_message(): 
 707     if ytdl_is_updateable(): 
 708         update_cmd 
= 'type  youtube-dl -U  to update' 
 710         update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 711     msg 
= '; please report this issue on https://yt-dl.org/bug .' 
 712     msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 713     msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 717 class YoutubeDLError(Exception): 
 718     """Base exception for YoutubeDL errors.""" 
 722 class ExtractorError(YoutubeDLError
): 
 723     """Error during info extraction.""" 
 725     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 726         """ tb, if given, is the original traceback (so that it can be printed out). 
 727         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 730         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 732         if video_id 
is not None: 
 733             msg 
= video_id 
+ ': ' + msg
 
 735             msg 
+= ' (caused by %r)' % cause
 
 737             msg 
+= bug_reports_message() 
 738         super(ExtractorError
, self
).__init
__(msg
) 
 741         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 743         self
.video_id 
= video_id
 
 745     def format_traceback(self
): 
 746         if self
.traceback 
is None: 
 748         return ''.join(traceback
.format_tb(self
.traceback
)) 
 751 class UnsupportedError(ExtractorError
): 
 752     def __init__(self
, url
): 
 753         super(UnsupportedError
, self
).__init
__( 
 754             'Unsupported URL: %s' % url
, expected
=True) 
 758 class RegexNotFoundError(ExtractorError
): 
 759     """Error when a regex didn't match""" 
 763 class GeoRestrictedError(ExtractorError
): 
 764     """Geographic restriction Error exception. 
 766     This exception may be thrown when a video is not available from your 
 767     geographic location due to geographic restrictions imposed by a website. 
 769     def __init__(self
, msg
, countries
=None): 
 770         super(GeoRestrictedError
, self
).__init
__(msg
, expected
=True) 
 772         self
.countries 
= countries
 
 775 class DownloadError(YoutubeDLError
): 
 776     """Download Error exception. 
 778     This exception may be thrown by FileDownloader objects if they are not 
 779     configured to continue on errors. They will contain the appropriate 
 783     def __init__(self
, msg
, exc_info
=None): 
 784         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 785         super(DownloadError
, self
).__init
__(msg
) 
 786         self
.exc_info 
= exc_info
 
 789 class SameFileError(YoutubeDLError
): 
 790     """Same File exception. 
 792     This exception will be thrown by FileDownloader objects if they detect 
 793     multiple files would have to be downloaded to the same file on disk. 
 798 class PostProcessingError(YoutubeDLError
): 
 799     """Post Processing exception. 
 801     This exception may be raised by PostProcessor's .run() method to 
 802     indicate an error in the postprocessing task. 
 805     def __init__(self
, msg
): 
 806         super(PostProcessingError
, self
).__init
__(msg
) 
 810 class MaxDownloadsReached(YoutubeDLError
): 
 811     """ --max-downloads limit has been reached. """ 
 815 class UnavailableVideoError(YoutubeDLError
): 
 816     """Unavailable Format exception. 
 818     This exception will be thrown when a video is requested 
 819     in a format that is not available for that video. 
 824 class ContentTooShortError(YoutubeDLError
): 
 825     """Content Too Short exception. 
 827     This exception may be raised by FileDownloader objects when a file they 
 828     download is too small for what the server announced first, indicating 
 829     the connection was probably interrupted. 
 832     def __init__(self
, downloaded
, expected
): 
 833         super(ContentTooShortError
, self
).__init
__( 
 834             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded
, expected
) 
 837         self
.downloaded 
= downloaded
 
 838         self
.expected 
= expected
 
 841 class XAttrMetadataError(YoutubeDLError
): 
 842     def __init__(self
, code
=None, msg
='Unknown error'): 
 843         super(XAttrMetadataError
, self
).__init
__(msg
) 
 847         # Parsing code and msg 
 848         if (self
.code 
in (errno
.ENOSPC
, errno
.EDQUOT
) or 
 849                 'No space left' in self
.msg 
or 'Disk quota excedded' in self
.msg
): 
 850             self
.reason 
= 'NO_SPACE' 
 851         elif self
.code 
== errno
.E2BIG 
or 'Argument list too long' in self
.msg
: 
 852             self
.reason 
= 'VALUE_TOO_LONG' 
 854             self
.reason 
= 'NOT_SUPPORTED' 
 857 class XAttrUnavailableError(YoutubeDLError
): 
 861 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 862     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 
 863     # expected HTTP responses to meet HTTP/1.0 or later (see also 
 864     # https://github.com/rg3/youtube-dl/issues/6727) 
 865     if sys
.version_info 
< (3, 0): 
 866         kwargs
[b
'strict'] = True 
 867     hc 
= http_class(*args
, **kwargs
) 
 868     source_address 
= ydl_handler
._params
.get('source_address') 
 869     if source_address 
is not None: 
 870         sa 
= (source_address
, 0) 
 871         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 872             hc
.source_address 
= sa
 
 874             def _hc_connect(self
, *args
, **kwargs
): 
 875                 sock 
= compat_socket_create_connection( 
 876                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 878                     self
.sock 
= ssl
.wrap_socket( 
 879                         sock
, self
.key_file
, self
.cert_file
, 
 880                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 883             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 888 def handle_youtubedl_headers(headers
): 
 889     filtered_headers 
= headers
 
 891     if 'Youtubedl-no-compression' in filtered_headers
: 
 892         filtered_headers 
= dict((k
, v
) for k
, v 
in filtered_headers
.items() if k
.lower() != 'accept-encoding') 
 893         del filtered_headers
['Youtubedl-no-compression'] 
 895     return filtered_headers
 
 898 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 899     """Handler for HTTP requests and responses. 
 901     This class, when installed with an OpenerDirector, automatically adds 
 902     the standard headers to every HTTP request and handles gzipped and 
 903     deflated responses from web servers. If compression is to be avoided in 
 904     a particular request, the original request in the program code only has 
 905     to include the HTTP header "Youtubedl-no-compression", which will be 
 906     removed before making the real request. 
 908     Part of this code was copied from: 
 910     http://techknack.net/python-urllib2-handlers/ 
 912     Andrew Rowls, the author of that code, agreed to release it to the 
 916     def __init__(self
, params
, *args
, **kwargs
): 
 917         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 918         self
._params 
= params
 
 920     def http_open(self
, req
): 
 921         conn_class 
= compat_http_client
.HTTPConnection
 
 923         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
 925             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
 926             del req
.headers
['Ytdl-socks-proxy'] 
 928         return self
.do_open(functools
.partial( 
 929             _create_http_connection
, self
, conn_class
, False), 
 935             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 937             return zlib
.decompress(data
) 
 939     def http_request(self
, req
): 
 940         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 
 941         # always respected by websites, some tend to give out URLs with non percent-encoded 
 942         # non-ASCII characters (see telemb.py, ard.py [#3412]) 
 943         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 
 944         # To work around aforementioned issue we will replace request's original URL with 
 945         # percent-encoded one 
 946         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 
 947         # the code of this workaround has been moved here from YoutubeDL.urlopen() 
 948         url 
= req
.get_full_url() 
 949         url_escaped 
= escape_url(url
) 
 951         # Substitute URL if any change after escaping 
 952         if url 
!= url_escaped
: 
 953             req 
= update_Request(req
, url
=url_escaped
) 
 955         for h
, v 
in std_headers
.items(): 
 956             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
 957             # The dict keys are capitalized because of this bug by urllib 
 958             if h
.capitalize() not in req
.headers
: 
 961         req
.headers 
= handle_youtubedl_headers(req
.headers
) 
 963         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 964             # Python 2.6 is brain-dead when it comes to fragments 
 965             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 966             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 970     def http_response(self
, req
, resp
): 
 973         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 974             content 
= resp
.read() 
 975             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 977                 uncompressed 
= io
.BytesIO(gz
.read()) 
 978             except IOError as original_ioerror
: 
 979                 # There may be junk add the end of the file 
 980                 # See http://stackoverflow.com/q/4928560/35070 for details 
 981                 for i 
in range(1, 1024): 
 983                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 984                         uncompressed 
= io
.BytesIO(gz
.read()) 
 989                     raise original_ioerror
 
 990             resp 
= compat_urllib_request
.addinfourl(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 991             resp
.msg 
= old_resp
.msg
 
 992             del resp
.headers
['Content-encoding'] 
 994         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 995             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
 996             resp 
= compat_urllib_request
.addinfourl(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 997             resp
.msg 
= old_resp
.msg
 
 998             del resp
.headers
['Content-encoding'] 
 999         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 
1000         # https://github.com/rg3/youtube-dl/issues/6457). 
1001         if 300 <= resp
.code 
< 400: 
1002             location 
= resp
.headers
.get('Location') 
1004                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 
1005                 if sys
.version_info 
>= (3, 0): 
1006                     location 
= location
.encode('iso-8859-1').decode('utf-8') 
1008                     location 
= location
.decode('utf-8') 
1009                 location_escaped 
= escape_url(location
) 
1010                 if location 
!= location_escaped
: 
1011                     del resp
.headers
['Location'] 
1012                     if sys
.version_info 
< (3, 0): 
1013                         location_escaped 
= location_escaped
.encode('utf-8') 
1014                     resp
.headers
['Location'] = location_escaped
 
1017     https_request 
= http_request
 
1018     https_response 
= http_response
 
1021 def make_socks_conn_class(base_class
, socks_proxy
): 
1022     assert issubclass(base_class
, ( 
1023         compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
)) 
1025     url_components 
= compat_urlparse
.urlparse(socks_proxy
) 
1026     if url_components
.scheme
.lower() == 'socks5': 
1027         socks_type 
= ProxyType
.SOCKS5
 
1028     elif url_components
.scheme
.lower() in ('socks', 'socks4'): 
1029         socks_type 
= ProxyType
.SOCKS4
 
1030     elif url_components
.scheme
.lower() == 'socks4a': 
1031         socks_type 
= ProxyType
.SOCKS4A
 
1033     def unquote_if_non_empty(s
): 
1036         return compat_urllib_parse_unquote_plus(s
) 
1040         url_components
.hostname
, url_components
.port 
or 1080, 
1042         unquote_if_non_empty(url_components
.username
), 
1043         unquote_if_non_empty(url_components
.password
), 
1046     class SocksConnection(base_class
): 
1048             self
.sock 
= sockssocket() 
1049             self
.sock
.setproxy(*proxy_args
) 
1050             if type(self
.timeout
) in (int, float): 
1051                 self
.sock
.settimeout(self
.timeout
) 
1052             self
.sock
.connect((self
.host
, self
.port
)) 
1054             if isinstance(self
, compat_http_client
.HTTPSConnection
): 
1055                 if hasattr(self
, '_context'):  # Python > 2.6 
1056                     self
.sock 
= self
._context
.wrap_socket( 
1057                         self
.sock
, server_hostname
=self
.host
) 
1059                     self
.sock 
= ssl
.wrap_socket(self
.sock
) 
1061     return SocksConnection
 
1064 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
1065     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
1066         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
1067         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
1068         self
._params 
= params
 
1070     def https_open(self
, req
): 
1072         conn_class 
= self
._https
_conn
_class
 
1074         if hasattr(self
, '_context'):  # python > 2.6 
1075             kwargs
['context'] = self
._context
 
1076         if hasattr(self
, '_check_hostname'):  # python 3.x 
1077             kwargs
['check_hostname'] = self
._check
_hostname
 
1079         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
1081             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
1082             del req
.headers
['Ytdl-socks-proxy'] 
1084         return self
.do_open(functools
.partial( 
1085             _create_http_connection
, self
, conn_class
, True), 
1089 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
): 
1090     def __init__(self
, cookiejar
=None): 
1091         compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
) 
1093     def http_response(self
, request
, response
): 
1094         # Python 2 will choke on next HTTP request in row if there are non-ASCII 
1095         # characters in Set-Cookie HTTP header of last response (see 
1096         # https://github.com/rg3/youtube-dl/issues/6769). 
1097         # In order to at least prevent crashing we will percent encode Set-Cookie 
1098         # header before HTTPCookieProcessor starts processing it. 
1099         # if sys.version_info < (3, 0) and response.headers: 
1100         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 
1101         #         set_cookie = response.headers.get(set_cookie_header) 
1103         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 
1104         #             if set_cookie != set_cookie_escaped: 
1105         #                 del response.headers[set_cookie_header] 
1106         #                 response.headers[set_cookie_header] = set_cookie_escaped 
1107         return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
) 
1109     https_request 
= compat_urllib_request
.HTTPCookieProcessor
.http_request
 
1110     https_response 
= http_response
 
1113 def extract_timezone(date_str
): 
1115         r
'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
1118         timezone 
= datetime
.timedelta() 
1120         date_str 
= date_str
[:-len(m
.group('tz'))] 
1121         if not m
.group('sign'): 
1122             timezone 
= datetime
.timedelta() 
1124             sign 
= 1 if m
.group('sign') == '+' else -1 
1125             timezone 
= datetime
.timedelta( 
1126                 hours
=sign 
* int(m
.group('hours')), 
1127                 minutes
=sign 
* int(m
.group('minutes'))) 
1128     return timezone
, date_str
 
1131 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
1132     """ Return a UNIX timestamp from the given date """ 
1134     if date_str 
is None: 
1137     date_str 
= re
.sub(r
'\.[0-9]+', '', date_str
) 
1139     if timezone 
is None: 
1140         timezone
, date_str 
= extract_timezone(date_str
) 
1143         date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
1144         dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
1145         return calendar
.timegm(dt
.timetuple()) 
1150 def date_formats(day_first
=True): 
1151     return DATE_FORMATS_DAY_FIRST 
if day_first 
else DATE_FORMATS_MONTH_FIRST
 
1154 def unified_strdate(date_str
, day_first
=True): 
1155     """Return a string with the date in the format YYYYMMDD""" 
1157     if date_str 
is None: 
1161     date_str 
= date_str
.replace(',', ' ') 
1162     # Remove AM/PM + timezone 
1163     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1164     _
, date_str 
= extract_timezone(date_str
) 
1166     for expression 
in date_formats(day_first
): 
1168             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
1171     if upload_date 
is None: 
1172         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1175                 upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
1178     if upload_date 
is not None: 
1179         return compat_str(upload_date
) 
1182 def unified_timestamp(date_str
, day_first
=True): 
1183     if date_str 
is None: 
1186     date_str 
= re
.sub(r
'[,|]', '', date_str
) 
1188     pm_delta 
= 12 if re
.search(r
'(?i)PM', date_str
) else 0 
1189     timezone
, date_str 
= extract_timezone(date_str
) 
1191     # Remove AM/PM + timezone 
1192     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1194     # Remove unrecognized timezones from ISO 8601 alike timestamps 
1195     m 
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
) 
1197         date_str 
= date_str
[:-len(m
.group('tz'))] 
1199     for expression 
in date_formats(day_first
): 
1201             dt 
= datetime
.datetime
.strptime(date_str
, expression
) - timezone 
+ datetime
.timedelta(hours
=pm_delta
) 
1202             return calendar
.timegm(dt
.timetuple()) 
1205     timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1207         return calendar
.timegm(timetuple
) + pm_delta 
* 3600 
1210 def determine_ext(url
, default_ext
='unknown_video'): 
1213     guess 
= url
.partition('?')[0].rpartition('.')[2] 
1214     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
1216     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 
1217     elif guess
.rstrip('/') in KNOWN_EXTENSIONS
: 
1218         return guess
.rstrip('/') 
1223 def subtitles_filename(filename
, sub_lang
, sub_format
): 
1224     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
1227 def date_from_str(date_str
): 
1229     Return a datetime object from a string in the format YYYYMMDD or 
1230     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
1231     today 
= datetime
.date
.today() 
1232     if date_str 
in ('now', 'today'): 
1234     if date_str 
== 'yesterday': 
1235         return today 
- datetime
.timedelta(days
=1) 
1236     match 
= re
.match(r
'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
1237     if match 
is not None: 
1238         sign 
= match
.group('sign') 
1239         time 
= int(match
.group('time')) 
1242         unit 
= match
.group('unit') 
1243         # A bad approximation? 
1247         elif unit 
== 'year': 
1251         delta 
= datetime
.timedelta(**{unit
: time
}) 
1252         return today 
+ delta
 
1253     return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date() 
1256 def hyphenate_date(date_str
): 
1258     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
1259     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
1260     if match 
is not None: 
1261         return '-'.join(match
.groups()) 
1266 class DateRange(object): 
1267     """Represents a time interval between two dates""" 
1269     def __init__(self
, start
=None, end
=None): 
1270         """start and end must be strings in the format accepted by date""" 
1271         if start 
is not None: 
1272             self
.start 
= date_from_str(start
) 
1274             self
.start 
= datetime
.datetime
.min.date() 
1276             self
.end 
= date_from_str(end
) 
1278             self
.end 
= datetime
.datetime
.max.date() 
1279         if self
.start 
> self
.end
: 
1280             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
1284         """Returns a range that only contains the given day""" 
1285         return cls(day
, day
) 
1287     def __contains__(self
, date
): 
1288         """Check if the date is in the range""" 
1289         if not isinstance(date
, datetime
.date
): 
1290             date 
= date_from_str(date
) 
1291         return self
.start 
<= date 
<= self
.end
 
1294         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
1297 def platform_name(): 
1298     """ Returns the platform name as a compat_str """ 
1299     res 
= platform
.platform() 
1300     if isinstance(res
, bytes): 
1301         res 
= res
.decode(preferredencoding()) 
1303     assert isinstance(res
, compat_str
) 
1307 def _windows_write_string(s
, out
): 
1308     """ Returns True if the string was written using special methods, 
1309     False if it has yet to be written out.""" 
1310     # Adapted from http://stackoverflow.com/a/3259271/35070 
1313     import ctypes
.wintypes
 
1321         fileno 
= out
.fileno() 
1322     except AttributeError: 
1323         # If the output stream doesn't have a fileno, it's virtual 
1325     except io
.UnsupportedOperation
: 
1326         # Some strange Windows pseudo files? 
1328     if fileno 
not in WIN_OUTPUT_IDS
: 
1331     GetStdHandle 
= ctypes
.WINFUNCTYPE( 
1332         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
1333         (b
'GetStdHandle', ctypes
.windll
.kernel32
)) 
1334     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
1336     WriteConsoleW 
= ctypes
.WINFUNCTYPE( 
1337         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
1338         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
1339         ctypes
.wintypes
.LPVOID
)((b
'WriteConsoleW', ctypes
.windll
.kernel32
)) 
1340     written 
= ctypes
.wintypes
.DWORD(0) 
1342     GetFileType 
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
'GetFileType', ctypes
.windll
.kernel32
)) 
1343     FILE_TYPE_CHAR 
= 0x0002 
1344     FILE_TYPE_REMOTE 
= 0x8000 
1345     GetConsoleMode 
= ctypes
.WINFUNCTYPE( 
1346         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
1347         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
1348         (b
'GetConsoleMode', ctypes
.windll
.kernel32
)) 
1349     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
1351     def not_a_console(handle
): 
1352         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
1354         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR 
or 
1355                 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
1357     if not_a_console(h
): 
1360     def next_nonbmp_pos(s
): 
1362             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
1363         except StopIteration: 
1367         count 
= min(next_nonbmp_pos(s
), 1024) 
1369         ret 
= WriteConsoleW( 
1370             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
1372             raise OSError('Failed to write string') 
1373         if not count
:  # We just wrote a non-BMP character 
1374             assert written
.value 
== 2 
1377             assert written
.value 
> 0 
1378             s 
= s
[written
.value
:] 
1382 def write_string(s
, out
=None, encoding
=None): 
1385     assert type(s
) == compat_str
 
1387     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
1388         if _windows_write_string(s
, out
): 
1391     if ('b' in getattr(out
, 'mode', '') or 
1392             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
1393         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
1395     elif hasattr(out
, 'buffer'): 
1396         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
1397         byt 
= s
.encode(enc
, 'ignore') 
1398         out
.buffer.write(byt
) 
1404 def bytes_to_intlist(bs
): 
1407     if isinstance(bs
[0], int):  # Python 3 
1410         return [ord(c
) for c 
in bs
] 
1413 def intlist_to_bytes(xs
): 
1416     return compat_struct_pack('%dB' % len(xs
), *xs
) 
1419 # Cross-platform file locking 
1420 if sys
.platform 
== 'win32': 
1421     import ctypes
.wintypes
 
1424     class OVERLAPPED(ctypes
.Structure
): 
1426             ('Internal', ctypes
.wintypes
.LPVOID
), 
1427             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
1428             ('Offset', ctypes
.wintypes
.DWORD
), 
1429             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
1430             ('hEvent', ctypes
.wintypes
.HANDLE
), 
1433     kernel32 
= ctypes
.windll
.kernel32
 
1434     LockFileEx 
= kernel32
.LockFileEx
 
1435     LockFileEx
.argtypes 
= [ 
1436         ctypes
.wintypes
.HANDLE
,     # hFile 
1437         ctypes
.wintypes
.DWORD
,      # dwFlags 
1438         ctypes
.wintypes
.DWORD
,      # dwReserved 
1439         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1440         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1441         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1443     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1444     UnlockFileEx 
= kernel32
.UnlockFileEx
 
1445     UnlockFileEx
.argtypes 
= [ 
1446         ctypes
.wintypes
.HANDLE
,     # hFile 
1447         ctypes
.wintypes
.DWORD
,      # dwReserved 
1448         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1449         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1450         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1452     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1453     whole_low 
= 0xffffffff 
1454     whole_high 
= 0x7fffffff 
1456     def _lock_file(f
, exclusive
): 
1457         overlapped 
= OVERLAPPED() 
1458         overlapped
.Offset 
= 0 
1459         overlapped
.OffsetHigh 
= 0 
1460         overlapped
.hEvent 
= 0 
1461         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1462         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1463         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1464                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1465             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1467     def _unlock_file(f
): 
1468         assert f
._lock
_file
_overlapped
_p
 
1469         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1470         if not UnlockFileEx(handle
, 0, 
1471                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1472             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1475     # Some platforms, such as Jython, is missing fcntl 
1479         def _lock_file(f
, exclusive
): 
1480             fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1482         def _unlock_file(f
): 
1483             fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1485         UNSUPPORTED_MSG 
= 'file locking is not supported on this platform' 
1487         def _lock_file(f
, exclusive
): 
1488             raise IOError(UNSUPPORTED_MSG
) 
1490         def _unlock_file(f
): 
1491             raise IOError(UNSUPPORTED_MSG
) 
1494 class locked_file(object): 
1495     def __init__(self
, filename
, mode
, encoding
=None): 
1496         assert mode 
in ['r', 'a', 'w'] 
1497         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1500     def __enter__(self
): 
1501         exclusive 
= self
.mode 
!= 'r' 
1503             _lock_file(self
.f
, exclusive
) 
1509     def __exit__(self
, etype
, value
, traceback
): 
1511             _unlock_file(self
.f
) 
1518     def write(self
, *args
): 
1519         return self
.f
.write(*args
) 
1521     def read(self
, *args
): 
1522         return self
.f
.read(*args
) 
1525 def get_filesystem_encoding(): 
1526     encoding 
= sys
.getfilesystemencoding() 
1527     return encoding 
if encoding 
is not None else 'utf-8' 
1530 def shell_quote(args
): 
1532     encoding 
= get_filesystem_encoding() 
1534         if isinstance(a
, bytes): 
1535             # We may get a filename encoded with 'encodeFilename' 
1536             a 
= a
.decode(encoding
) 
1537         quoted_args
.append(compat_shlex_quote(a
)) 
1538     return ' '.join(quoted_args
) 
1541 def smuggle_url(url
, data
): 
1542     """ Pass additional data in a URL for internal use. """ 
1544     url
, idata 
= unsmuggle_url(url
, {}) 
1546     sdata 
= compat_urllib_parse_urlencode( 
1547         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1548     return url 
+ '#' + sdata
 
1551 def unsmuggle_url(smug_url
, default
=None): 
1552     if '#__youtubedl_smuggle' not in smug_url
: 
1553         return smug_url
, default
 
1554     url
, _
, sdata 
= smug_url
.rpartition('#') 
1555     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1556     data 
= json
.loads(jsond
) 
1560 def format_bytes(bytes): 
1563     if type(bytes) is str: 
1564         bytes = float(bytes) 
1568         exponent 
= int(math
.log(bytes, 1024.0)) 
1569     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1570     converted 
= float(bytes) / float(1024 ** exponent
) 
1571     return '%.2f%s' % (converted
, suffix
) 
1574 def lookup_unit_table(unit_table
, s
): 
1575     units_re 
= '|'.join(re
.escape(u
) for u 
in unit_table
) 
1577         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
) 
1580     num_str 
= m
.group('num').replace(',', '.') 
1581     mult 
= unit_table
[m
.group('unit')] 
1582     return int(float(num_str
) * mult
) 
1585 def parse_filesize(s
): 
1589     # The lower-case forms are of course incorrect and unofficial, 
1590     # but we support those too 
1607         'megabytes': 1000 ** 2, 
1608         'mebibytes': 1024 ** 2, 
1614         'gigabytes': 1000 ** 3, 
1615         'gibibytes': 1024 ** 3, 
1621         'terabytes': 1000 ** 4, 
1622         'tebibytes': 1024 ** 4, 
1628         'petabytes': 1000 ** 5, 
1629         'pebibytes': 1024 ** 5, 
1635         'exabytes': 1000 ** 6, 
1636         'exbibytes': 1024 ** 6, 
1642         'zettabytes': 1000 ** 7, 
1643         'zebibytes': 1024 ** 7, 
1649         'yottabytes': 1000 ** 8, 
1650         'yobibytes': 1024 ** 8, 
1653     return lookup_unit_table(_UNIT_TABLE
, s
) 
1662     if re
.match(r
'^[\d,.]+$', s
): 
1663         return str_to_int(s
) 
1674     return lookup_unit_table(_UNIT_TABLE
, s
) 
1677 def month_by_name(name
, lang
='en'): 
1678     """ Return the number of a month by (locale-independently) English name """ 
1680     month_names 
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en']) 
1683         return month_names
.index(name
) + 1 
1688 def month_by_abbreviation(abbrev
): 
1689     """ Return the number of a month by (locale-independently) English 
1693         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
1698 def fix_xml_ampersands(xml_str
): 
1699     """Replace all the '&' by '&' in XML""" 
1701         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1706 def setproctitle(title
): 
1707     assert isinstance(title
, compat_str
) 
1709     # ctypes in Jython is not complete 
1710     # http://bugs.jython.org/issue2148 
1711     if sys
.platform
.startswith('java'): 
1715         libc 
= ctypes
.cdll
.LoadLibrary('libc.so.6') 
1719         # LoadLibrary in Windows Python 2.7.13 only expects 
1720         # a bytestring, but since unicode_literals turns 
1721         # every string into a unicode string, it fails. 
1723     title_bytes 
= title
.encode('utf-8') 
1724     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1725     buf
.value 
= title_bytes
 
1727         libc
.prctl(15, buf
, 0, 0, 0) 
1728     except AttributeError: 
1729         return  # Strange libc, just skip this 
1732 def remove_start(s
, start
): 
1733     return s
[len(start
):] if s 
is not None and s
.startswith(start
) else s
 
1736 def remove_end(s
, end
): 
1737     return s
[:-len(end
)] if s 
is not None and s
.endswith(end
) else s
 
1740 def remove_quotes(s
): 
1741     if s 
is None or len(s
) < 2: 
1743     for quote 
in ('"', "'", ): 
1744         if s
[0] == quote 
and s
[-1] == quote
: 
1749 def url_basename(url
): 
1750     path 
= compat_urlparse
.urlparse(url
).path
 
1751     return path
.strip('/').split('/')[-1] 
1755     return re
.match(r
'https?://[^?#&]+/', url
).group() 
1758 def urljoin(base
, path
): 
1759     if isinstance(path
, bytes): 
1760         path 
= path
.decode('utf-8') 
1761     if not isinstance(path
, compat_str
) or not path
: 
1763     if re
.match(r
'^(?:https?:)?//', path
): 
1765     if isinstance(base
, bytes): 
1766         base 
= base
.decode('utf-8') 
1767     if not isinstance(base
, compat_str
) or not re
.match( 
1768             r
'^(?:https?:)?//', base
): 
1770     return compat_urlparse
.urljoin(base
, path
) 
1773 class HEADRequest(compat_urllib_request
.Request
): 
1774     def get_method(self
): 
1778 class PUTRequest(compat_urllib_request
.Request
): 
1779     def get_method(self
): 
1783 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1786             v 
= getattr(v
, get_attr
, None) 
1792         return int(v
) * invscale 
// scale
 
1797 def str_or_none(v
, default
=None): 
1798     return default 
if v 
is None else compat_str(v
) 
1801 def str_to_int(int_str
): 
1802     """ A more relaxed version of int_or_none """ 
1805     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1809 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1813         return float(v
) * invscale 
/ scale
 
1818 def bool_or_none(v
, default
=None): 
1819     return v 
if isinstance(v
, bool) else default
 
1822 def strip_or_none(v
): 
1823     return None if v 
is None else v
.strip() 
1826 def parse_duration(s
): 
1827     if not isinstance(s
, compat_basestring
): 
1832     days
, hours
, mins
, secs
, ms 
= [None] * 5 
1833     m 
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s
) 
1835         days
, hours
, mins
, secs
, ms 
= m
.groups() 
1840                     [0-9]+\s*y(?:ears?)?\s* 
1843                     [0-9]+\s*m(?:onths?)?\s* 
1846                     [0-9]+\s*w(?:eeks?)?\s* 
1849                     (?P<days>[0-9]+)\s*d(?:ays?)?\s* 
1853                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 
1856                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 
1859                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 
1862             days
, hours
, mins
, secs
, ms 
= m
.groups() 
1864             m 
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
) 
1866                 hours
, mins 
= m
.groups() 
1872         duration 
+= float(secs
) 
1874         duration 
+= float(mins
) * 60 
1876         duration 
+= float(hours
) * 60 * 60 
1878         duration 
+= float(days
) * 24 * 60 * 60 
1880         duration 
+= float(ms
) 
1884 def prepend_extension(filename
, ext
, expected_real_ext
=None): 
1885     name
, real_ext 
= os
.path
.splitext(filename
) 
1887         '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1888         if not expected_real_ext 
or real_ext
[1:] == expected_real_ext
 
1889         else '{0}.{1}'.format(filename
, ext
)) 
1892 def replace_extension(filename
, ext
, expected_real_ext
=None): 
1893     name
, real_ext 
= os
.path
.splitext(filename
) 
1894     return '{0}.{1}'.format( 
1895         name 
if not expected_real_ext 
or real_ext
[1:] == expected_real_ext 
else filename
, 
1899 def check_executable(exe
, args
=[]): 
1900     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1901     args can be a list of arguments for a short output (like -version) """ 
1903         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1909 def get_exe_version(exe
, args
=['--version'], 
1910                     version_re
=None, unrecognized
='present'): 
1911     """ Returns the version of the specified executable, 
1912     or False if the executable is not present """ 
1914         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers 
1915         # SIGTTOU if youtube-dl is run in the background. 
1916         # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656 
1917         out
, _ 
= subprocess
.Popen( 
1918             [encodeArgument(exe
)] + args
, 
1919             stdin
=subprocess
.PIPE
, 
1920             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1923     if isinstance(out
, bytes):  # Python 2.x 
1924         out 
= out
.decode('ascii', 'ignore') 
1925     return detect_exe_version(out
, version_re
, unrecognized
) 
1928 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
1929     assert isinstance(output
, compat_str
) 
1930     if version_re 
is None: 
1931         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
1932     m 
= re
.search(version_re
, output
) 
1939 class PagedList(object): 
1941         # This is only useful for tests 
1942         return len(self
.getslice()) 
1945 class OnDemandPagedList(PagedList
): 
1946     def __init__(self
, pagefunc
, pagesize
, use_cache
=True): 
1947         self
._pagefunc 
= pagefunc
 
1948         self
._pagesize 
= pagesize
 
1949         self
._use
_cache 
= use_cache
 
1953     def getslice(self
, start
=0, end
=None): 
1955         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1956             firstid 
= pagenum 
* self
._pagesize
 
1957             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
1958             if start 
>= nextfirstid
: 
1963                 page_results 
= self
._cache
.get(pagenum
) 
1964             if page_results 
is None: 
1965                 page_results 
= list(self
._pagefunc
(pagenum
)) 
1967                 self
._cache
[pagenum
] = page_results
 
1970                 start 
% self
._pagesize
 
1971                 if firstid 
<= start 
< nextfirstid
 
1975                 ((end 
- 1) % self
._pagesize
) + 1 
1976                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
1979             if startv 
!= 0 or endv 
is not None: 
1980                 page_results 
= page_results
[startv
:endv
] 
1981             res
.extend(page_results
) 
1983             # A little optimization - if current page is not "full", ie. does 
1984             # not contain page_size videos then we can assume that this page 
1985             # is the last one - there are no more ids on further pages - 
1986             # i.e. no need to query again. 
1987             if len(page_results
) + startv 
< self
._pagesize
: 
1990             # If we got the whole page, but the next page is not interesting, 
1991             # break out early as well 
1992             if end 
== nextfirstid
: 
1997 class InAdvancePagedList(PagedList
): 
1998     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
1999         self
._pagefunc 
= pagefunc
 
2000         self
._pagecount 
= pagecount
 
2001         self
._pagesize 
= pagesize
 
2003     def getslice(self
, start
=0, end
=None): 
2005         start_page 
= start 
// self
._pagesize
 
2007             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
2008         skip_elems 
= start 
- start_page 
* self
._pagesize
 
2009         only_more 
= None if end 
is None else end 
- start
 
2010         for pagenum 
in range(start_page
, end_page
): 
2011             page 
= list(self
._pagefunc
(pagenum
)) 
2013                 page 
= page
[skip_elems
:] 
2015             if only_more 
is not None: 
2016                 if len(page
) < only_more
: 
2017                     only_more 
-= len(page
) 
2019                     page 
= page
[:only_more
] 
2026 def uppercase_escape(s
): 
2027     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2029         r
'\\U[0-9a-fA-F]{8}', 
2030         lambda m
: unicode_escape(m
.group(0))[0], 
2034 def lowercase_escape(s
): 
2035     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2037         r
'\\u[0-9a-fA-F]{4}', 
2038         lambda m
: unicode_escape(m
.group(0))[0], 
2042 def escape_rfc3986(s
): 
2043     """Escape non-ASCII characters as suggested by RFC 3986""" 
2044     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
2045         s 
= s
.encode('utf-8') 
2046     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
2049 def escape_url(url
): 
2050     """Escape URL as suggested by RFC 3986""" 
2051     url_parsed 
= compat_urllib_parse_urlparse(url
) 
2052     return url_parsed
._replace
( 
2053         netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'), 
2054         path
=escape_rfc3986(url_parsed
.path
), 
2055         params
=escape_rfc3986(url_parsed
.params
), 
2056         query
=escape_rfc3986(url_parsed
.query
), 
2057         fragment
=escape_rfc3986(url_parsed
.fragment
) 
2061 def read_batch_urls(batch_fd
): 
2063         if not isinstance(url
, compat_str
): 
2064             url 
= url
.decode('utf-8', 'replace') 
2065         BOM_UTF8 
= '\xef\xbb\xbf' 
2066         if url
.startswith(BOM_UTF8
): 
2067             url 
= url
[len(BOM_UTF8
):] 
2069         if url
.startswith(('#', ';', ']')): 
2073     with contextlib
.closing(batch_fd
) as fd
: 
2074         return [url 
for url 
in map(fixup
, fd
) if url
] 
2077 def urlencode_postdata(*args
, **kargs
): 
2078     return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii') 
2081 def update_url_query(url
, query
): 
2084     parsed_url 
= compat_urlparse
.urlparse(url
) 
2085     qs 
= compat_parse_qs(parsed_url
.query
) 
2087     return compat_urlparse
.urlunparse(parsed_url
._replace
( 
2088         query
=compat_urllib_parse_urlencode(qs
, True))) 
2091 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}): 
2092     req_headers 
= req
.headers
.copy() 
2093     req_headers
.update(headers
) 
2094     req_data 
= data 
or req
.data
 
2095     req_url 
= update_url_query(url 
or req
.get_full_url(), query
) 
2096     req_get_method 
= req
.get_method() 
2097     if req_get_method 
== 'HEAD': 
2098         req_type 
= HEADRequest
 
2099     elif req_get_method 
== 'PUT': 
2100         req_type 
= PUTRequest
 
2102         req_type 
= compat_urllib_request
.Request
 
2104         req_url
, data
=req_data
, headers
=req_headers
, 
2105         origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
) 
2106     if hasattr(req
, 'timeout'): 
2107         new_req
.timeout 
= req
.timeout
 
2111 def _multipart_encode_impl(data
, boundary
): 
2112     content_type 
= 'multipart/form-data; boundary=%s' % boundary
 
2115     for k
, v 
in data
.items(): 
2116         out 
+= b
'--' + boundary
.encode('ascii') + b
'\r\n' 
2117         if isinstance(k
, compat_str
): 
2118             k 
= k
.encode('utf-8') 
2119         if isinstance(v
, compat_str
): 
2120             v 
= v
.encode('utf-8') 
2121         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 
2122         # suggests sending UTF-8 directly. Firefox sends UTF-8, too 
2123         content 
= b
'Content-Disposition: form-data; name="' + k 
+ b
'"\r\n\r\n' + v 
+ b
'\r\n' 
2124         if boundary
.encode('ascii') in content
: 
2125             raise ValueError('Boundary overlaps with data') 
2128     out 
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n' 
2130     return out
, content_type
 
2133 def multipart_encode(data
, boundary
=None): 
2135     Encode a dict to RFC 7578-compliant form-data 
2138         A dict where keys and values can be either Unicode or bytes-like 
2141         If specified a Unicode object, it's used as the boundary. Otherwise 
2142         a random boundary is generated. 
2144     Reference: https://tools.ietf.org/html/rfc7578 
2146     has_specified_boundary 
= boundary 
is not None 
2149         if boundary 
is None: 
2150             boundary 
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff)) 
2153             out
, content_type 
= _multipart_encode_impl(data
, boundary
) 
2156             if has_specified_boundary
: 
2160     return out
, content_type
 
2163 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True): 
2164     if isinstance(key_or_keys
, (list, tuple)): 
2165         for key 
in key_or_keys
: 
2166             if key 
not in d 
or d
[key
] is None or skip_false_values 
and not d
[key
]: 
2170     return d
.get(key_or_keys
, default
) 
2173 def try_get(src
, getter
, expected_type
=None): 
2174     if not isinstance(getter
, (list, tuple)): 
2179         except (AttributeError, KeyError, TypeError, IndexError): 
2182             if expected_type 
is None or isinstance(v
, expected_type
): 
2186 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'): 
2187     return string 
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
) 
2199 TV_PARENTAL_GUIDELINES 
= { 
2209 def parse_age_limit(s
): 
2211         return s 
if 0 <= s 
<= 21 else None 
2212     if not isinstance(s
, compat_basestring
): 
2214     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
2216         return int(m
.group('age')) 
2218         return US_RATINGS
[s
] 
2219     return TV_PARENTAL_GUIDELINES
.get(s
) 
2222 def strip_jsonp(code
): 
2225             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+) 
2226             (?:\s*&&\s*(?P=func_name))? 
2227             \s*\(\s*(?P<callback_data>.*)\);? 
2228             \s*?(?://[^\n]*)*$''', 
2229         r
'\g<callback_data>', code
) 
2232 def js_to_json(code
): 
2233     COMMENT_RE 
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*' 
2234     SKIP_RE 
= r
'\s*(?:{comment})?\s*'.format(comment
=COMMENT_RE
) 
2236         (r
'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip
=SKIP_RE
), 16), 
2237         (r
'(?s)^(0+[0-7]+){skip}:?$'.format(skip
=SKIP_RE
), 8), 
2242         if v 
in ('true', 'false', 'null'): 
2244         elif v
.startswith('/*') or v
.startswith('//') or v 
== ',': 
2247         if v
[0] in ("'", '"'): 
2248             v 
= re
.sub(r
'(?s)\\.|"', lambda m
: { 
2253             }.get(m
.group(0), m
.group(0)), v
[1:-1]) 
2255         for regex
, base 
in INTEGER_TABLE
: 
2256             im 
= re
.match(regex
, v
) 
2258                 i 
= int(im
.group(1), base
) 
2259                 return '"%d":' % i 
if v
.endswith(':') else '%d' % i
 
2263     return re
.sub(r
'''(?sx) 
2264         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 
2265         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 
2266         {comment}|,(?={skip}[\]}}])| 
2267         [a-zA-Z_][.a-zA-Z_0-9]*| 
2268         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| 
2270         '''.format(comment
=COMMENT_RE
, skip
=SKIP_RE
), fix_kv
, code
) 
2273 def qualities(quality_ids
): 
2274     """ Get a numeric quality value out of a list of possible values """ 
2277             return quality_ids
.index(qid
) 
2283 DEFAULT_OUTTMPL 
= '%(title)s-%(id)s.%(ext)s' 
2286 def limit_length(s
, length
): 
2287     """ Add ellipses to overly long strings """ 
2292         return s
[:length 
- len(ELLIPSES
)] + ELLIPSES
 
2296 def version_tuple(v
): 
2297     return tuple(int(e
) for e 
in re
.split(r
'[-.]', v
)) 
2300 def is_outdated_version(version
, limit
, assume_new
=True): 
2302         return not assume_new
 
2304         return version_tuple(version
) < version_tuple(limit
) 
2306         return not assume_new
 
2309 def ytdl_is_updateable(): 
2310     """ Returns if youtube-dl can be updated with -U """ 
2311     from zipimport 
import zipimporter
 
2313     return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen') 
2316 def args_to_str(args
): 
2317     # Get a short string representation for a subprocess command 
2318     return ' '.join(compat_shlex_quote(a
) for a 
in args
) 
2321 def error_to_compat_str(err
): 
2323     # On python 2 error byte string must be decoded with proper 
2324     # encoding rather than ascii 
2325     if sys
.version_info
[0] < 3: 
2326         err_str 
= err_str
.decode(preferredencoding()) 
2330 def mimetype2ext(mt
): 
2336         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 
2337         # it's the most popular one 
2338         'audio/mpeg': 'mp3', 
2343     _
, _
, res 
= mt
.rpartition('/') 
2344     res 
= res
.split(';')[0].strip().lower() 
2348         'smptett+xml': 'tt', 
2352         'x-mp4-fragmented': 'mp4', 
2355         'x-mpegurl': 'm3u8', 
2356         'vnd.apple.mpegurl': 'm3u8', 
2360         'vnd.ms-sstr+xml': 'ism', 
2366 def parse_codecs(codecs_str
): 
2367     # http://tools.ietf.org/html/rfc6381 
2370     splited_codecs 
= list(filter(None, map( 
2371         lambda str: str.strip(), codecs_str
.strip().strip(',').split(',')))) 
2372     vcodec
, acodec 
= None, None 
2373     for full_codec 
in splited_codecs
: 
2374         codec 
= full_codec
.split('.')[0] 
2375         if codec 
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): 
2378         elif codec 
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): 
2382             write_string('WARNING: Unknown codec %s\n' % full_codec
, sys
.stderr
) 
2383     if not vcodec 
and not acodec
: 
2384         if len(splited_codecs
) == 2: 
2389         elif len(splited_codecs
) == 1: 
2396             'vcodec': vcodec 
or 'none', 
2397             'acodec': acodec 
or 'none', 
2402 def urlhandle_detect_ext(url_handle
): 
2403     getheader 
= url_handle
.headers
.get
 
2405     cd 
= getheader('Content-Disposition') 
2407         m 
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
) 
2409             e 
= determine_ext(m
.group('filename'), default_ext
=None) 
2413     return mimetype2ext(getheader('Content-Type')) 
2416 def encode_data_uri(data
, mime_type
): 
2417     return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii')) 
2420 def age_restricted(content_limit
, age_limit
): 
2421     """ Returns True iff the content should be blocked """ 
2423     if age_limit 
is None:  # No limit set 
2425     if content_limit 
is None: 
2426         return False  # Content available for everyone 
2427     return age_limit 
< content_limit
 
2430 def is_html(first_bytes
): 
2431     """ Detect whether a file contains HTML by examining its first bytes. """ 
2434         (b
'\xef\xbb\xbf', 'utf-8'), 
2435         (b
'\x00\x00\xfe\xff', 'utf-32-be'), 
2436         (b
'\xff\xfe\x00\x00', 'utf-32-le'), 
2437         (b
'\xff\xfe', 'utf-16-le'), 
2438         (b
'\xfe\xff', 'utf-16-be'), 
2440     for bom
, enc 
in BOMS
: 
2441         if first_bytes
.startswith(bom
): 
2442             s 
= first_bytes
[len(bom
):].decode(enc
, 'replace') 
2445         s 
= first_bytes
.decode('utf-8', 'replace') 
2447     return re
.match(r
'^\s*<', s
) 
2450 def determine_protocol(info_dict
): 
2451     protocol 
= info_dict
.get('protocol') 
2452     if protocol 
is not None: 
2455     url 
= info_dict
['url'] 
2456     if url
.startswith('rtmp'): 
2458     elif url
.startswith('mms'): 
2460     elif url
.startswith('rtsp'): 
2463     ext 
= determine_ext(url
) 
2469     return compat_urllib_parse_urlparse(url
).scheme
 
2472 def render_table(header_row
, data
): 
2473     """ Render a list of rows, each as a list of values """ 
2474     table 
= [header_row
] + data
 
2475     max_lens 
= [max(len(compat_str(v
)) for v 
in col
) for col 
in zip(*table
)] 
2476     format_str 
= ' '.join('%-' + compat_str(ml 
+ 1) + 's' for ml 
in max_lens
[:-1]) + '%s' 
2477     return '\n'.join(format_str 
% tuple(row
) for row 
in table
) 
2480 def _match_one(filter_part
, dct
): 
2481     COMPARISON_OPERATORS 
= { 
2489     operator_rex 
= re
.compile(r
'''(?x)\s* 
2491         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
2493             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
2494             (?P<quote>["\'])(?P
<quotedstrval
>(?
:\\.|
(?
!(?P
=quote
)|
\\).)+?
)(?P
=quote
)|
 
2495             (?P
<strval
>(?
![0-9.])[a
-z0
-9A
-Z
]*) 
2498         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) 
2499     m = operator_rex.search(filter_part) 
2501         op = COMPARISON_OPERATORS[m.group('op')] 
2502         actual_value = dct.get(m.group('key')) 
2503         if (m.group('quotedstrval') is not None or 
2504             m.group('strval') is not None or 
2505             # If the original field is a string and matching comparisonvalue is 
2506             # a number we should respect the origin of the original field 
2507             # and process comparison value as a string (see 
2508             # https://github.com/rg3/youtube-dl/issues/11082). 
2509             actual_value is not None and m.group('intval') is not None and 
2510                 isinstance(actual_value, compat_str)): 
2511             if m.group('op') not in ('=', '!='): 
2513                     'Operator %s does not support string values!' % m.group('op')) 
2514             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') 
2515             quote = m.group('quote') 
2516             if quote is not None: 
2517                 comparison_value = comparison_value.replace(r'\%s' % quote, quote) 
2520                 comparison_value = int(m.group('intval')) 
2522                 comparison_value = parse_filesize(m.group('intval')) 
2523                 if comparison_value is None: 
2524                     comparison_value = parse_filesize(m.group('intval') + 'B') 
2525                 if comparison_value is None: 
2527                         'Invalid integer value %r in filter part %r' % ( 
2528                             m.group('intval'), filter_part)) 
2529         if actual_value is None: 
2530             return m.group('none_inclusive') 
2531         return op(actual_value, comparison_value) 
2534         '': lambda v: v is not None, 
2535         '!': lambda v: v is None, 
2537     operator_rex = re.compile(r'''(?x
)\s
* 
2538         (?P
<op
>%s)\s
*(?P
<key
>[a
-z_
]+) 
2540         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) 
2541     m = operator_rex.search(filter_part) 
2543         op = UNARY_OPERATORS[m.group('op')] 
2544         actual_value = dct.get(m.group('key')) 
2545         return op(actual_value) 
2547     raise ValueError('Invalid filter part %r' % filter_part) 
2550 def match_str(filter_str, dct): 
2551     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
2554         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) 
2557 def match_filter_func(filter_str): 
2558     def _match_func(info_dict): 
2559         if match_str(filter_str, info_dict): 
2562             video_title = info_dict.get('title', info_dict.get('id', 'video')) 
2563             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) 
2567 def parse_dfxp_time_expr(time_expr): 
2571     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) 
2573         return float(mobj.group('time_offset')) 
2575     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr) 
2577         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.')) 
2580 def srt_subtitles_timecode(seconds): 
2581     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) 
2584 def dfxp2srt(dfxp_data): 
2586     @param dfxp_data A 
bytes-like 
object containing DFXP data
 
2587     @returns A 
unicode object containing converted SRT data
 
2589     LEGACY_NAMESPACES = ( 
2590         (b'http://www.w3.org/ns/ttml', [ 
2591             b'http://www.w3.org/2004/11/ttaf1', 
2592             b'http://www.w3.org/2006/04/ttaf1', 
2593             b'http://www.w3.org/2006/10/ttaf1', 
2595         (b'http://www.w3.org/ns/ttml#styling', [ 
2596             b'http://www.w3.org/ns/ttml#style', 
2600     SUPPORTED_STYLING = [ 
2609     _x = functools.partial(xpath_with_ns, ns_map={ 
2610         'ttml': 'http://www.w3.org/ns/ttml', 
2611         'tts': 'http://www.w3.org/ns/ttml#styling', 
2617     class TTMLPElementParser(object): 
2619         _unclosed_elements = [] 
2620         _applied_styles = [] 
2622         def start(self, tag, attrib): 
2623             if tag in (_x('ttml:br'), 'br'): 
2626                 unclosed_elements = [] 
2628                 element_style_id = attrib.get('style') 
2630                     style.update(default_style) 
2631                 if element_style_id: 
2632                     style.update(styles.get(element_style_id, {})) 
2633                 for prop in SUPPORTED_STYLING: 
2634                     prop_val = attrib.get(_x('tts:' + prop)) 
2636                         style[prop] = prop_val 
2639                     for k, v in sorted(style.items()): 
2640                         if self._applied_styles and self._applied_styles[-1].get(k) == v: 
2643                             font += ' color="%s"' % v 
2644                         elif k == 'fontSize': 
2645                             font += ' size="%s"' % v 
2646                         elif k == 'fontFamily': 
2647                             font += ' face="%s"' % v 
2648                         elif k == 'fontWeight' and v == 'bold': 
2650                             unclosed_elements.append('b') 
2651                         elif k == 'fontStyle' and v == 'italic': 
2653                             unclosed_elements.append('i') 
2654                         elif k == 'textDecoration' and v == 'underline': 
2656                             unclosed_elements.append('u') 
2658                         self._out += '<font' + font + '>' 
2659                         unclosed_elements.append('font') 
2661                     if self._applied_styles: 
2662                         applied_style.update(self._applied_styles[-1]) 
2663                     applied_style.update(style) 
2664                     self._applied_styles.append(applied_style) 
2665                 self._unclosed_elements.append(unclosed_elements) 
2668             if tag not in (_x('ttml:br'), 'br'): 
2669                 unclosed_elements = self._unclosed_elements.pop() 
2670                 for element in reversed(unclosed_elements): 
2671                     self._out += '</%s>' % element 
2672                 if unclosed_elements and self._applied_styles: 
2673                     self._applied_styles.pop() 
2675         def data(self, data): 
2679             return self._out.strip() 
2681     def parse_node(node): 
2682         target = TTMLPElementParser() 
2683         parser = xml.etree.ElementTree.XMLParser(target=target) 
2684         parser.feed(xml.etree.ElementTree.tostring(node)) 
2685         return parser.close() 
2687     for k, v in LEGACY_NAMESPACES: 
2689             dfxp_data = dfxp_data.replace(ns, k) 
2691     dfxp = compat_etree_fromstring(dfxp_data) 
2693     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') 
2696         raise ValueError('Invalid dfxp/TTML subtitle') 
2700         for style in dfxp.findall(_x('.//ttml:style')): 
2701             style_id = style.get('id') 
2702             parent_style_id = style.get('style') 
2704                 if parent_style_id not in styles: 
2707                 styles[style_id] = styles[parent_style_id].copy() 
2708             for prop in SUPPORTED_STYLING: 
2709                 prop_val = style.get(_x('tts:' + prop)) 
2711                     styles.setdefault(style_id, {})[prop] = prop_val 
2717     for p in ('body', 'div'): 
2718         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p]) 
2721         style = styles.get(ele.get('style')) 
2724         default_style.update(style) 
2726     for para, index in zip(paras, itertools.count(1)): 
2727         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) 
2728         end_time = parse_dfxp_time_expr(para.attrib.get('end')) 
2729         dur = parse_dfxp_time_expr(para.attrib.get('dur')) 
2730         if begin_time is None: 
2735             end_time = begin_time + dur 
2736         out.append('%d\n%s --> %s\n%s\n\n' % ( 
2738             srt_subtitles_timecode(begin_time), 
2739             srt_subtitles_timecode(end_time), 
2745 def cli_option(params, command_option, param): 
2746     param = params.get(param) 
2748         param = compat_str(param) 
2749     return [command_option, param] if param is not None else [] 
2752 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): 
2753     param = params.get(param) 
2756     assert isinstance(param, bool) 
2758         return [command_option + separator + (true_value if param else false_value)] 
2759     return [command_option, true_value if param else false_value] 
2762 def cli_valueless_option(params, command_option, param, expected_value=True): 
2763     param = params.get(param) 
2764     return [command_option] if param == expected_value else [] 
2767 def cli_configuration_args(params, param, default=[]): 
2768     ex_args = params.get(param) 
2771     assert isinstance(ex_args, list) 
2775 class ISO639Utils(object): 
2776     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 
2965     def short2long(cls, code): 
2966         """Convert language code from ISO 639-1 to ISO 639-2/T""" 
2967         return cls._lang_map.get(code[:2]) 
2970     def long2short(cls, code): 
2971         """Convert language code from ISO 639-2/T to ISO 639-1""" 
2972         for short_name, long_name in cls._lang_map.items(): 
2973             if long_name == code: 
2977 class ISO3166Utils(object): 
2978     # From http://data.okfn.org/data/core/country-list 
2980         'AF': 'Afghanistan', 
2981         'AX': 'Åland Islands', 
2984         'AS': 'American Samoa', 
2989         'AG': 'Antigua and Barbuda', 
3006         'BO': 'Bolivia, Plurinational State of', 
3007         'BQ': 'Bonaire, Sint Eustatius and Saba', 
3008         'BA': 'Bosnia and Herzegovina', 
3010         'BV': 'Bouvet Island', 
3012         'IO': 'British Indian Ocean Territory', 
3013         'BN': 'Brunei Darussalam', 
3015         'BF': 'Burkina Faso', 
3021         'KY': 'Cayman Islands', 
3022         'CF': 'Central African Republic', 
3026         'CX': 'Christmas Island', 
3027         'CC': 'Cocos (Keeling) Islands', 
3031         'CD': 'Congo, the Democratic Republic of the', 
3032         'CK': 'Cook Islands', 
3034         'CI': 'Côte d\'Ivoire', 
3039         'CZ': 'Czech Republic', 
3043         'DO': 'Dominican Republic', 
3046         'SV': 'El Salvador', 
3047         'GQ': 'Equatorial Guinea', 
3051         'FK': 'Falkland Islands (Malvinas)', 
3052         'FO': 'Faroe Islands', 
3056         'GF': 'French Guiana', 
3057         'PF': 'French Polynesia', 
3058         'TF': 'French Southern Territories', 
3073         'GW': 'Guinea-Bissau', 
3076         'HM': 'Heard Island and McDonald Islands', 
3077         'VA': 'Holy See (Vatican City State)', 
3084         'IR': 'Iran, Islamic Republic of', 
3087         'IM': 'Isle of Man', 
3097         'KP': 'Korea, Democratic People\'s Republic of', 
3098         'KR': 'Korea, Republic of', 
3101         'LA': 'Lao People\'s Democratic Republic', 
3107         'LI': 'Liechtenstein', 
3111         'MK': 'Macedonia, the Former Yugoslav Republic of', 
3118         'MH': 'Marshall Islands', 
3124         'FM': 'Micronesia, Federated States of', 
3125         'MD': 'Moldova, Republic of', 
3136         'NL': 'Netherlands', 
3137         'NC': 'New Caledonia', 
3138         'NZ': 'New Zealand', 
3143         'NF': 'Norfolk Island', 
3144         'MP': 'Northern Mariana Islands', 
3149         'PS': 'Palestine, State of', 
3151         'PG': 'Papua New Guinea', 
3154         'PH': 'Philippines', 
3158         'PR': 'Puerto Rico', 
3162         'RU': 'Russian Federation', 
3164         'BL': 'Saint Barthélemy', 
3165         'SH': 'Saint Helena, Ascension and Tristan da Cunha', 
3166         'KN': 'Saint Kitts and Nevis', 
3167         'LC': 'Saint Lucia', 
3168         'MF': 'Saint Martin (French part)', 
3169         'PM': 'Saint Pierre and Miquelon', 
3170         'VC': 'Saint Vincent and the Grenadines', 
3173         'ST': 'Sao Tome and Principe', 
3174         'SA': 'Saudi Arabia', 
3178         'SL': 'Sierra Leone', 
3180         'SX': 'Sint Maarten (Dutch part)', 
3183         'SB': 'Solomon Islands', 
3185         'ZA': 'South Africa', 
3186         'GS': 'South Georgia and the South Sandwich Islands', 
3187         'SS': 'South Sudan', 
3192         'SJ': 'Svalbard and Jan Mayen', 
3195         'CH': 'Switzerland', 
3196         'SY': 'Syrian Arab Republic', 
3197         'TW': 'Taiwan, Province of China', 
3199         'TZ': 'Tanzania, United Republic of', 
3201         'TL': 'Timor-Leste', 
3205         'TT': 'Trinidad and Tobago', 
3208         'TM': 'Turkmenistan', 
3209         'TC': 'Turks and Caicos Islands', 
3213         'AE': 'United Arab Emirates', 
3214         'GB': 'United Kingdom', 
3215         'US': 'United States', 
3216         'UM': 'United States Minor Outlying Islands', 
3220         'VE': 'Venezuela, Bolivarian Republic of', 
3222         'VG': 'Virgin Islands, British', 
3223         'VI': 'Virgin Islands, U.S.', 
3224         'WF': 'Wallis and Futuna', 
3225         'EH': 'Western Sahara', 
3232     def short2full(cls, code): 
3233         """Convert an ISO 3166-2 country code to the corresponding full name""" 
3234         return cls._country_map.get(code.upper()) 
3237 class GeoUtils(object): 
3238     # Major IPv4 address blocks per country 
3240         'AD': '85.94.160.0/19', 
3241         'AE': '94.200.0.0/13', 
3242         'AF': '149.54.0.0/17', 
3243         'AG': '209.59.64.0/18', 
3244         'AI': '204.14.248.0/21', 
3245         'AL': '46.99.0.0/16', 
3246         'AM': '46.70.0.0/15', 
3247         'AO': '105.168.0.0/13', 
3248         'AP': '159.117.192.0/21', 
3249         'AR': '181.0.0.0/12', 
3250         'AS': '202.70.112.0/20', 
3251         'AT': '84.112.0.0/13', 
3252         'AU': '1.128.0.0/11', 
3253         'AW': '181.41.0.0/18', 
3254         'AZ': '5.191.0.0/16', 
3255         'BA': '31.176.128.0/17', 
3256         'BB': '65.48.128.0/17', 
3257         'BD': '114.130.0.0/16', 
3259         'BF': '129.45.128.0/17', 
3260         'BG': '95.42.0.0/15', 
3261         'BH': '37.131.0.0/17', 
3262         'BI': '154.117.192.0/18', 
3263         'BJ': '137.255.0.0/16', 
3264         'BL': '192.131.134.0/24', 
3265         'BM': '196.12.64.0/18', 
3266         'BN': '156.31.0.0/16', 
3267         'BO': '161.56.0.0/16', 
3268         'BQ': '161.0.80.0/20', 
3269         'BR': '152.240.0.0/12', 
3270         'BS': '24.51.64.0/18', 
3271         'BT': '119.2.96.0/19', 
3272         'BW': '168.167.0.0/16', 
3273         'BY': '178.120.0.0/13', 
3274         'BZ': '179.42.192.0/18', 
3275         'CA': '99.224.0.0/11', 
3276         'CD': '41.243.0.0/16', 
3277         'CF': '196.32.200.0/21', 
3278         'CG': '197.214.128.0/17', 
3279         'CH': '85.0.0.0/13', 
3280         'CI': '154.232.0.0/14', 
3281         'CK': '202.65.32.0/19', 
3282         'CL': '152.172.0.0/14', 
3283         'CM': '165.210.0.0/15', 
3284         'CN': '36.128.0.0/10', 
3285         'CO': '181.240.0.0/12', 
3286         'CR': '201.192.0.0/12', 
3287         'CU': '152.206.0.0/15', 
3288         'CV': '165.90.96.0/19', 
3289         'CW': '190.88.128.0/17', 
3290         'CY': '46.198.0.0/15', 
3291         'CZ': '88.100.0.0/14', 
3293         'DJ': '197.241.0.0/17', 
3294         'DK': '87.48.0.0/12', 
3295         'DM': '192.243.48.0/20', 
3296         'DO': '152.166.0.0/15', 
3297         'DZ': '41.96.0.0/12', 
3298         'EC': '186.68.0.0/15', 
3299         'EE': '90.190.0.0/15', 
3300         'EG': '156.160.0.0/11', 
3301         'ER': '196.200.96.0/20', 
3302         'ES': '88.0.0.0/11', 
3303         'ET': '196.188.0.0/14', 
3304         'EU': '2.16.0.0/13', 
3305         'FI': '91.152.0.0/13', 
3306         'FJ': '144.120.0.0/16', 
3307         'FM': '119.252.112.0/20', 
3308         'FO': '88.85.32.0/19', 
3310         'GA': '41.158.0.0/15', 
3312         'GD': '74.122.88.0/21', 
3313         'GE': '31.146.0.0/16', 
3314         'GF': '161.22.64.0/18', 
3315         'GG': '62.68.160.0/19', 
3316         'GH': '45.208.0.0/14', 
3317         'GI': '85.115.128.0/19', 
3318         'GL': '88.83.0.0/19', 
3319         'GM': '160.182.0.0/15', 
3320         'GN': '197.149.192.0/18', 
3321         'GP': '104.250.0.0/19', 
3322         'GQ': '105.235.224.0/20', 
3323         'GR': '94.64.0.0/13', 
3324         'GT': '168.234.0.0/16', 
3325         'GU': '168.123.0.0/16', 
3326         'GW': '197.214.80.0/20', 
3327         'GY': '181.41.64.0/18', 
3328         'HK': '113.252.0.0/14', 
3329         'HN': '181.210.0.0/16', 
3330         'HR': '93.136.0.0/13', 
3331         'HT': '148.102.128.0/17', 
3332         'HU': '84.0.0.0/14', 
3333         'ID': '39.192.0.0/10', 
3334         'IE': '87.32.0.0/12', 
3335         'IL': '79.176.0.0/13', 
3336         'IM': '5.62.80.0/20', 
3337         'IN': '117.192.0.0/10', 
3338         'IO': '203.83.48.0/21', 
3339         'IQ': '37.236.0.0/14', 
3340         'IR': '2.176.0.0/12', 
3341         'IS': '82.221.0.0/16', 
3342         'IT': '79.0.0.0/10', 
3343         'JE': '87.244.64.0/18', 
3344         'JM': '72.27.0.0/17', 
3345         'JO': '176.29.0.0/16', 
3346         'JP': '126.0.0.0/8', 
3347         'KE': '105.48.0.0/12', 
3348         'KG': '158.181.128.0/17', 
3349         'KH': '36.37.128.0/17', 
3350         'KI': '103.25.140.0/22', 
3351         'KM': '197.255.224.0/20', 
3352         'KN': '198.32.32.0/19', 
3353         'KP': '175.45.176.0/22', 
3354         'KR': '175.192.0.0/10', 
3355         'KW': '37.36.0.0/14', 
3356         'KY': '64.96.0.0/15', 
3357         'KZ': '2.72.0.0/13', 
3358         'LA': '115.84.64.0/18', 
3359         'LB': '178.135.0.0/16', 
3360         'LC': '192.147.231.0/24', 
3361         'LI': '82.117.0.0/19', 
3362         'LK': '112.134.0.0/15', 
3363         'LR': '41.86.0.0/19', 
3364         'LS': '129.232.0.0/17', 
3365         'LT': '78.56.0.0/13', 
3366         'LU': '188.42.0.0/16', 
3367         'LV': '46.109.0.0/16', 
3368         'LY': '41.252.0.0/14', 
3369         'MA': '105.128.0.0/11', 
3370         'MC': '88.209.64.0/18', 
3371         'MD': '37.246.0.0/16', 
3372         'ME': '178.175.0.0/17', 
3373         'MF': '74.112.232.0/21', 
3374         'MG': '154.126.0.0/17', 
3375         'MH': '117.103.88.0/21', 
3376         'MK': '77.28.0.0/15', 
3377         'ML': '154.118.128.0/18', 
3378         'MM': '37.111.0.0/17', 
3379         'MN': '49.0.128.0/17', 
3380         'MO': '60.246.0.0/16', 
3381         'MP': '202.88.64.0/20', 
3382         'MQ': '109.203.224.0/19', 
3383         'MR': '41.188.64.0/18', 
3384         'MS': '208.90.112.0/22', 
3385         'MT': '46.11.0.0/16', 
3386         'MU': '105.16.0.0/12', 
3387         'MV': '27.114.128.0/18', 
3388         'MW': '105.234.0.0/16', 
3389         'MX': '187.192.0.0/11', 
3390         'MY': '175.136.0.0/13', 
3391         'MZ': '197.218.0.0/15', 
3392         'NA': '41.182.0.0/16', 
3393         'NC': '101.101.0.0/18', 
3394         'NE': '197.214.0.0/18', 
3395         'NF': '203.17.240.0/22', 
3396         'NG': '105.112.0.0/12', 
3397         'NI': '186.76.0.0/15', 
3398         'NL': '145.96.0.0/11', 
3399         'NO': '84.208.0.0/13', 
3400         'NP': '36.252.0.0/15', 
3401         'NR': '203.98.224.0/19', 
3402         'NU': '49.156.48.0/22', 
3403         'NZ': '49.224.0.0/14', 
3404         'OM': '5.36.0.0/15', 
3405         'PA': '186.72.0.0/15', 
3406         'PE': '186.160.0.0/14', 
3407         'PF': '123.50.64.0/18', 
3408         'PG': '124.240.192.0/19', 
3409         'PH': '49.144.0.0/13', 
3410         'PK': '39.32.0.0/11', 
3411         'PL': '83.0.0.0/11', 
3412         'PM': '70.36.0.0/20', 
3413         'PR': '66.50.0.0/16', 
3414         'PS': '188.161.0.0/16', 
3415         'PT': '85.240.0.0/13', 
3416         'PW': '202.124.224.0/20', 
3417         'PY': '181.120.0.0/14', 
3418         'QA': '37.210.0.0/15', 
3419         'RE': '139.26.0.0/16', 
3420         'RO': '79.112.0.0/13', 
3421         'RS': '178.220.0.0/14', 
3422         'RU': '5.136.0.0/13', 
3423         'RW': '105.178.0.0/15', 
3424         'SA': '188.48.0.0/13', 
3425         'SB': '202.1.160.0/19', 
3426         'SC': '154.192.0.0/11', 
3427         'SD': '154.96.0.0/13', 
3428         'SE': '78.64.0.0/12', 
3429         'SG': '152.56.0.0/14', 
3430         'SI': '188.196.0.0/14', 
3431         'SK': '78.98.0.0/15', 
3432         'SL': '197.215.0.0/17', 
3433         'SM': '89.186.32.0/19', 
3434         'SN': '41.82.0.0/15', 
3435         'SO': '197.220.64.0/19', 
3436         'SR': '186.179.128.0/17', 
3437         'SS': '105.235.208.0/21', 
3438         'ST': '197.159.160.0/19', 
3439         'SV': '168.243.0.0/16', 
3440         'SX': '190.102.0.0/20', 
3442         'SZ': '41.84.224.0/19', 
3443         'TC': '65.255.48.0/20', 
3444         'TD': '154.68.128.0/19', 
3445         'TG': '196.168.0.0/14', 
3446         'TH': '171.96.0.0/13', 
3447         'TJ': '85.9.128.0/18', 
3448         'TK': '27.96.24.0/21', 
3449         'TL': '180.189.160.0/20', 
3450         'TM': '95.85.96.0/19', 
3451         'TN': '197.0.0.0/11', 
3452         'TO': '175.176.144.0/21', 
3453         'TR': '78.160.0.0/11', 
3454         'TT': '186.44.0.0/15', 
3455         'TV': '202.2.96.0/19', 
3456         'TW': '120.96.0.0/11', 
3457         'TZ': '156.156.0.0/14', 
3458         'UA': '93.72.0.0/13', 
3459         'UG': '154.224.0.0/13', 
3461         'UY': '167.56.0.0/13', 
3462         'UZ': '82.215.64.0/18', 
3463         'VA': '212.77.0.0/19', 
3464         'VC': '24.92.144.0/20', 
3465         'VE': '186.88.0.0/13', 
3466         'VG': '172.103.64.0/18', 
3467         'VI': '146.226.0.0/16', 
3468         'VN': '14.160.0.0/11', 
3469         'VU': '202.80.32.0/20', 
3470         'WF': '117.20.32.0/21', 
3471         'WS': '202.4.32.0/19', 
3472         'YE': '134.35.0.0/16', 
3473         'YT': '41.242.116.0/22', 
3474         'ZA': '41.0.0.0/11', 
3475         'ZM': '165.56.0.0/13', 
3476         'ZW': '41.85.192.0/19', 
3480     def random_ipv4(cls, code): 
3481         block = cls._country_ip_map.get(code.upper()) 
3484         addr, preflen = block.split('/') 
3485         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] 
3486         addr_max = addr_min | (0xffffffff >> int(preflen)) 
3487         return compat_str(socket.inet_ntoa( 
3488             compat_struct_pack('!L', random.randint(addr_min, addr_max)))) 
3491 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): 
3492     def __init__(self, proxies=None): 
3493         # Set default handlers 
3494         for type in ('http', 'https'): 
3495             setattr(self, '%s_open' % type, 
3496                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: 
3497                         meth(r, proxy, type)) 
3498         return compat_urllib_request.ProxyHandler.__init__(self, proxies) 
3500     def proxy_open(self, req, proxy, type): 
3501         req_proxy = req.headers.get('Ytdl-request-proxy') 
3502         if req_proxy is not None: 
3504             del req.headers['Ytdl-request-proxy'] 
3506         if proxy == '__noproxy__': 
3507             return None  # No Proxy 
3508         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 
3509             req.add_header('Ytdl-socks-proxy', proxy) 
3510             # youtube-dl's http/https handlers do wrapping the socket with socks 
3512         return compat_urllib_request.ProxyHandler.proxy_open( 
3513             self, req, proxy, type) 
3516 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is 
3517 # released into Public Domain 
3518 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 
3520 def long_to_bytes(n, blocksize=0): 
3521     """long_to_bytes(n:long, blocksize:int) : string 
3522     Convert a long integer to a byte string. 
3524     If optional blocksize is given and greater than zero, pad the front of the 
3525     byte string with binary zeros so that the length is a multiple of 
3528     # after much testing, this algorithm was deemed to be the fastest 
3532         s = compat_struct_pack('>I', n & 0xffffffff) + s 
3534     # strip off leading zeros 
3535     for i in range(len(s)): 
3536         if s[i] != b'\000'[0]: 
3539         # only happens when n == 0 
3543     # add back some pad bytes.  this could be done more efficiently w.r.t. the 
3544     # de-padding being done above, but sigh... 
3545     if blocksize > 0 and len(s) % blocksize: 
3546         s = (blocksize - len(s) % blocksize) * b'\000' + s 
3550 def bytes_to_long(s): 
3551     """bytes_to_long(string) : long 
3552     Convert a byte string to a long integer. 
3554     This is (essentially) the inverse of long_to_bytes(). 
3559         extra = (4 - length % 4) 
3560         s = b'\000' * extra + s 
3561         length = length + extra 
3562     for i in range(0, length, 4): 
3563         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0] 
3567 def ohdave_rsa_encrypt(data, exponent, modulus): 
3569     Implement OHDave
's RSA algorithm. See http://www.ohdave.com/rsa/ 
3572         data: data to encrypt, bytes-like object 
3573         exponent, modulus: parameter e and N of RSA algorithm, both integer 
3574     Output: hex string of encrypted data 
3576     Limitation: supports one block encryption only 
3579     payload = int(binascii.hexlify(data[::-1]), 16) 
3580     encrypted = pow(payload, exponent, modulus) 
3581     return '%x' % encrypted 
3584 def pkcs1pad(data, length): 
3586     Padding input data with PKCS#1 scheme 
3588     @param {int[]} data        input data 
3589     @param {int}   length      target length 
3590     @returns {int[]}           padded data 
3592     if len(data) > length - 11: 
3593         raise ValueError('Input data too 
long for PKCS
#1 padding') 
3595     pseudo_random 
= [random
.randint(0, 254) for _ 
in range(length 
- len(data
) - 3)] 
3596     return [0, 2] + pseudo_random 
+ [0] + data
 
3599 def encode_base_n(num
, n
, table
=None): 
3600     FULL_TABLE 
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 
3602         table 
= FULL_TABLE
[:n
] 
3605         raise ValueError('base %d exceeds table length %d' % (n
, len(table
))) 
3612         ret 
= table
[num 
% n
] + ret
 
3617 def decode_packed_codes(code
): 
3618     mobj 
= re
.search(PACKED_CODES_RE
, code
) 
3619     obfucasted_code
, base
, count
, symbols 
= mobj
.groups() 
3622     symbols 
= symbols
.split('|') 
3627         base_n_count 
= encode_base_n(count
, base
) 
3628         symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
 
3631         r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)], 
3635 def parse_m3u8_attributes(attrib
): 
3637     for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
): 
3638         if val
.startswith('"'): 
3644 def urshift(val
, n
): 
3645     return val 
>> n 
if val 
>= 0 else (val 
+ 0x100000000) >> n
 
3648 # Based on png2str() written by @gdkchan and improved by @yokrysty 
3649 # Originally posted at https://github.com/rg3/youtube-dl/issues/9706 
3650 def decode_png(png_data
): 
3651     # Reference: https://www.w3.org/TR/PNG/ 
3652     header 
= png_data
[8:] 
3654     if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR': 
3655         raise IOError('Not a valid PNG file.') 
3657     int_map 
= {1: '>B', 2: '>H', 4: '>I'} 
3658     unpack_integer 
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0] 
3663         length 
= unpack_integer(header
[:4]) 
3666         chunk_type 
= header
[:4] 
3669         chunk_data 
= header
[:length
] 
3670         header 
= header
[length
:] 
3672         header 
= header
[4:]  # Skip CRC 
3680     ihdr 
= chunks
[0]['data'] 
3682     width 
= unpack_integer(ihdr
[:4]) 
3683     height 
= unpack_integer(ihdr
[4:8]) 
3687     for chunk 
in chunks
: 
3688         if chunk
['type'] == b
'IDAT': 
3689             idat 
+= chunk
['data'] 
3692         raise IOError('Unable to read PNG data.') 
3694     decompressed_data 
= bytearray(zlib
.decompress(idat
)) 
3699     def _get_pixel(idx
): 
3704     for y 
in range(height
): 
3705         basePos 
= y 
* (1 + stride
) 
3706         filter_type 
= decompressed_data
[basePos
] 
3710         pixels
.append(current_row
) 
3712         for x 
in range(stride
): 
3713             color 
= decompressed_data
[1 + basePos 
+ x
] 
3714             basex 
= y 
* stride 
+ x
 
3719                 left 
= _get_pixel(basex 
- 3) 
3721                 up 
= _get_pixel(basex 
- stride
) 
3723             if filter_type 
== 1:  # Sub 
3724                 color 
= (color 
+ left
) & 0xff 
3725             elif filter_type 
== 2:  # Up 
3726                 color 
= (color 
+ up
) & 0xff 
3727             elif filter_type 
== 3:  # Average 
3728                 color 
= (color 
+ ((left 
+ up
) >> 1)) & 0xff 
3729             elif filter_type 
== 4:  # Paeth 
3735                     c 
= _get_pixel(basex 
- stride 
- 3) 
3743                 if pa 
<= pb 
and pa 
<= pc
: 
3744                     color 
= (color 
+ a
) & 0xff 
3746                     color 
= (color 
+ b
) & 0xff 
3748                     color 
= (color 
+ c
) & 0xff 
3750             current_row
.append(color
) 
3752     return width
, height
, pixels
 
3755 def write_xattr(path
, key
, value
): 
3756     # This mess below finds the best xattr tool for the job 
3758         # try the pyxattr module... 
3761         if hasattr(xattr
, 'set'):  # pyxattr 
3762             # Unicode arguments are not supported in python-pyxattr until 
3764             # See https://github.com/rg3/youtube-dl/issues/5498 
3765             pyxattr_required_version 
= '0.5.0' 
3766             if version_tuple(xattr
.__version
__) < version_tuple(pyxattr_required_version
): 
3767                 # TODO: fallback to CLI tools 
3768                 raise XAttrUnavailableError( 
3769                     'python-pyxattr is detected but is too old. ' 
3770                     'youtube-dl requires %s or above while your version is %s. ' 
3771                     'Falling back to other xattr implementations' % ( 
3772                         pyxattr_required_version
, xattr
.__version
__)) 
3774             setxattr 
= xattr
.set 
3776             setxattr 
= xattr
.setxattr
 
3779             setxattr(path
, key
, value
) 
3780         except EnvironmentError as e
: 
3781             raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3784         if compat_os_name 
== 'nt': 
3785             # Write xattrs to NTFS Alternate Data Streams: 
3786             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 
3787             assert ':' not in key
 
3788             assert os
.path
.exists(path
) 
3790             ads_fn 
= path 
+ ':' + key
 
3792                 with open(ads_fn
, 'wb') as f
: 
3794             except EnvironmentError as e
: 
3795                 raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3797             user_has_setfattr 
= check_executable('setfattr', ['--version']) 
3798             user_has_xattr 
= check_executable('xattr', ['-h']) 
3800             if user_has_setfattr 
or user_has_xattr
: 
3802                 value 
= value
.decode('utf-8') 
3803                 if user_has_setfattr
: 
3804                     executable 
= 'setfattr' 
3805                     opts 
= ['-n', key
, '-v', value
] 
3806                 elif user_has_xattr
: 
3807                     executable 
= 'xattr' 
3808                     opts 
= ['-w', key
, value
] 
3810                 cmd 
= ([encodeFilename(executable
, True)] + 
3811                        [encodeArgument(o
) for o 
in opts
] + 
3812                        [encodeFilename(path
, True)]) 
3815                     p 
= subprocess
.Popen( 
3816                         cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
) 
3817                 except EnvironmentError as e
: 
3818                     raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3819                 stdout
, stderr 
= p
.communicate() 
3820                 stderr 
= stderr
.decode('utf-8', 'replace') 
3821                 if p
.returncode 
!= 0: 
3822                     raise XAttrMetadataError(p
.returncode
, stderr
) 
3825                 # On Unix, and can't find pyxattr, setfattr, or xattr. 
3826                 if sys
.platform
.startswith('linux'): 
3827                     raise XAttrUnavailableError( 
3828                         "Couldn't find a tool to set the xattrs. " 
3829                         "Install either the python 'pyxattr' or 'xattr' " 
3830                         "modules, or the GNU 'attr' package " 
3831                         "(which contains the 'setfattr' tool).") 
3833                     raise XAttrUnavailableError( 
3834                         "Couldn't find a tool to set the xattrs. " 
3835                         "Install either the python 'xattr' module, " 
3836                         "or the 'xattr' binary.") 
3839 def random_birthday(year_field
, month_field
, day_field
): 
3841         year_field
: str(random
.randint(1950, 1995)), 
3842         month_field
: str(random
.randint(1, 12)), 
3843         day_field
: str(random
.randint(1, 31)),