4 from __future__ 
import unicode_literals
 
  35 import xml
.etree
.ElementTree
 
  42     compat_etree_fromstring
, 
  45     compat_html_entities_html5
, 
  51     compat_socket_create_connection
, 
  57     compat_urllib_parse_urlencode
, 
  58     compat_urllib_parse_urlparse
, 
  59     compat_urllib_parse_unquote_plus
, 
  60     compat_urllib_request
, 
  71 def register_socks_protocols(): 
  72     # "Register" SOCKS protocols 
  73     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 
  74     # URLs with protocols not in urlparse.uses_netloc are not handled correctly 
  75     for scheme 
in ('socks', 'socks4', 'socks4a', 'socks5'): 
  76         if scheme 
not in compat_urlparse
.uses_netloc
: 
  77             compat_urlparse
.uses_netloc
.append(scheme
) 
  80 # This is not clearly defined otherwise 
  81 compiled_regex_type 
= type(re
.compile('')) 
  84     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', 
  85     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  86     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  87     'Accept-Encoding': 'gzip, deflate', 
  88     'Accept-Language': 'en-us,en;q=0.5', 
  93     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', 
  99 ENGLISH_MONTH_NAMES 
= [ 
 100     'January', 'February', 'March', 'April', 'May', 'June', 
 101     'July', 'August', 'September', 'October', 'November', 'December'] 
 104     'en': ENGLISH_MONTH_NAMES
, 
 106         'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 
 107         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], 
 111     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 
 112     'flv', 'f4v', 'f4a', 'f4b', 
 113     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 
 114     'mkv', 'mka', 'mk3d', 
 123     'f4f', 'f4m', 'm3u8', 'smil') 
 125 # needed for sanitizing filenames in restricted mode 
 126 ACCENT_CHARS 
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', 
 127                         itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], 
 128                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 
 151     '%Y-%m-%d %H:%M:%S.%f', 
 154     '%Y-%m-%dT%H:%M:%SZ', 
 155     '%Y-%m-%dT%H:%M:%S.%fZ', 
 156     '%Y-%m-%dT%H:%M:%S.%f0Z', 
 158     '%Y-%m-%dT%H:%M:%S.%f', 
 161     '%b %d %Y at %H:%M:%S', 
 164 DATE_FORMATS_DAY_FIRST 
= list(DATE_FORMATS
) 
 165 DATE_FORMATS_DAY_FIRST
.extend([ 
 174 DATE_FORMATS_MONTH_FIRST 
= list(DATE_FORMATS
) 
 175 DATE_FORMATS_MONTH_FIRST
.extend([ 
 183 PACKED_CODES_RE 
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" 
 186 def preferredencoding(): 
 187     """Get preferred encoding. 
 189     Returns the best encoding scheme for the system, based on 
 190     locale.getpreferredencoding() and some further tweaks. 
 193         pref 
= locale
.getpreferredencoding() 
 201 def write_json_file(obj
, fn
): 
 202     """ Encode obj as JSON and write it to fn, atomically if possible """ 
 204     fn 
= encodeFilename(fn
) 
 205     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
 206         encoding 
= get_filesystem_encoding() 
 207         # os.path.basename returns a bytes object, but NamedTemporaryFile 
 208         # will fail if the filename contains non ascii characters unless we 
 209         # use a unicode object 
 210         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
 211         # the same for os.path.dirname 
 212         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
 214         path_basename 
= os
.path
.basename
 
 215         path_dirname 
= os
.path
.dirname
 
 219         'prefix': path_basename(fn
) + '.', 
 220         'dir': path_dirname(fn
), 
 224     # In Python 2.x, json.dump expects a bytestream. 
 225     # In Python 3.x, it writes to a character stream 
 226     if sys
.version_info 
< (3, 0): 
 234     tf 
= tempfile
.NamedTemporaryFile(**compat_kwargs(args
)) 
 239         if sys
.platform 
== 'win32': 
 240             # Need to remove existing file on Windows, else os.rename raises 
 241             # WindowsError or FileExistsError. 
 246         os
.rename(tf
.name
, fn
) 
 255 if sys
.version_info 
>= (2, 7): 
 256     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 257         """ Find the xpath xpath[@key=val] """ 
 258         assert re
.match(r
'^[a-zA-Z_-]+$', key
) 
 259         expr 
= xpath 
+ ('[@%s]' % key 
if val 
is None else "[@%s='%s']" % (key
, val
)) 
 260         return node
.find(expr
) 
 262     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 263         for f 
in node
.findall(compat_xpath(xpath
)): 
 264             if key 
not in f
.attrib
: 
 266             if val 
is None or f
.attrib
.get(key
) == val
: 
 270 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 271 # the namespace parameter 
 274 def xpath_with_ns(path
, ns_map
): 
 275     components 
= [c
.split(':') for c 
in path
.split('/')] 
 279             replaced
.append(c
[0]) 
 282             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 283     return '/'.join(replaced
) 
 286 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 287     def _find_xpath(xpath
): 
 288         return node
.find(compat_xpath(xpath
)) 
 290     if isinstance(xpath
, (str, compat_str
)): 
 291         n 
= _find_xpath(xpath
) 
 299         if default 
is not NO_DEFAULT
: 
 302             name 
= xpath 
if name 
is None else name
 
 303             raise ExtractorError('Could not find XML element %s' % name
) 
 309 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 310     n 
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
) 
 311     if n 
is None or n 
== default
: 
 314         if default 
is not NO_DEFAULT
: 
 317             name 
= xpath 
if name 
is None else name
 
 318             raise ExtractorError('Could not find XML element\'s text %s' % name
) 
 324 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 325     n 
= find_xpath_attr(node
, xpath
, key
) 
 327         if default 
is not NO_DEFAULT
: 
 330             name 
= '%s[@%s]' % (xpath
, key
) if name 
is None else name
 
 331             raise ExtractorError('Could not find XML attribute %s' % name
) 
 337 def get_element_by_id(id, html
): 
 338     """Return the content of the tag with the specified ID in the passed HTML document""" 
 339     return get_element_by_attribute('id', id, html
) 
 342 def get_element_by_class(class_name
, html
): 
 343     """Return the content of the first tag with the specified class in the passed HTML document""" 
 344     retval 
= get_elements_by_class(class_name
, html
) 
 345     return retval
[0] if retval 
else None 
 348 def get_element_by_attribute(attribute
, value
, html
, escape_value
=True): 
 349     retval 
= get_elements_by_attribute(attribute
, value
, html
, escape_value
) 
 350     return retval
[0] if retval 
else None 
 353 def get_elements_by_class(class_name
, html
): 
 354     """Return the content of all tags with the specified class in the passed HTML document as a list""" 
 355     return get_elements_by_attribute( 
 356         'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 
 357         html, escape_value=False) 
 360 def get_elements_by_attribute(attribute, value, html, escape_value=True): 
 361     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 363     value = re.escape(value) if escape_value else value 
 366     for m in re.finditer(r'''(?xs) 
 368          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'))*?
 
 370          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'))*? 
 374     ''' % (re.escape(attribute), value), html): 
 375         res = m.group('content
') 
 377         if res.startswith('"') or res.startswith("'"): 
 380         retlist.append(unescapeHTML(res)) 
 385 class HTMLAttributeParser(compat_HTMLParser): 
 386     """Trivial HTML parser to gather the attributes for a single element""" 
 389         compat_HTMLParser.__init__(self) 
 391     def handle_starttag(self, tag, attrs): 
 392         self.attrs = dict(attrs) 
 395 def extract_attributes(html_element): 
 396     """Given a string for an HTML element such as 
 398          a="foo" B="bar" c="&98;az" d=boz 
 399          empty= noval entity="&" 
 402     Decode and return a dictionary of attributes. 
 404         'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
', 
 405         'empty
': '', 'noval
': None, 'entity
': '&', 
 406         'sq
': '"', 'dq': '\'' 
 408     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 
 409     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 
 411     parser = HTMLAttributeParser() 
 412     parser.feed(html_element) 
 417 def clean_html(html): 
 418     """Clean an HTML snippet into a readable string""" 
 420     if html is None:  # Convenience for sanitizing descriptions etc. 
 424     html = html.replace('\n', ' ') 
 425     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) 
 426     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 
 428     html = re.sub('<.*?>', '', html) 
 429     # Replace html entities 
 430     html = unescapeHTML(html) 
 434 def sanitize_open(filename, open_mode): 
 435     """Try to open the given filename, and slightly tweak it if this fails. 
 437     Attempts to open the given filename. If this fails, it tries to change 
 438     the filename slightly, step by step, until it's either able to open it 
 439     or it fails and raises a final exception, like the standard open() 
 442     It returns the tuple (stream, definitive_file_name). 
 446             if sys.platform == 'win32': 
 448                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 
 449             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 
 450         stream = open(encodeFilename(filename), open_mode) 
 451         return (stream, filename) 
 452     except (IOError, OSError) as err: 
 453         if err.errno in (errno.EACCES,): 
 456         # In case of error, try to remove win32 forbidden chars 
 457         alt_filename = sanitize_path(filename) 
 458         if alt_filename == filename: 
 461             # An exception here should be caught in the caller 
 462             stream = open(encodeFilename(alt_filename), open_mode) 
 463             return (stream, alt_filename) 
 466 def timeconvert(timestr): 
 467     """Convert RFC 2822 defined time string into system timestamp""" 
 469     timetuple = email.utils.parsedate_tz(timestr) 
 470     if timetuple is not None: 
 471         timestamp = email.utils.mktime_tz(timetuple) 
 475 def sanitize_filename(s, restricted=False, is_id=False): 
 476     """Sanitizes a string so it could be used as part of a filename. 
 477     If restricted is set, use a stricter subset of allowed characters. 
 478     Set is_id if this is not an arbitrary string, but an ID that should be kept 
 481     def replace_insane(char): 
 482         if restricted and char in ACCENT_CHARS: 
 483             return ACCENT_CHARS[char] 
 484         if char == '?' or ord(char) < 32 or ord(char) == 127: 
 487             return '' if restricted else '\'' 
 489             return '_
-' if restricted else ' -' 
 490         elif char in '\\/|
*<>': 
 492         if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()): 
 494         if restricted 
and ord(char
) > 127: 
 499     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 500     result 
= ''.join(map(replace_insane
, s
)) 
 502         while '__' in result
: 
 503             result 
= result
.replace('__', '_') 
 504         result 
= result
.strip('_') 
 505         # Common case of "Foreign band name - English song title" 
 506         if restricted 
and result
.startswith('-_'): 
 508         if result
.startswith('-'): 
 509             result 
= '_' + result
[len('-'):] 
 510         result 
= result
.lstrip('.') 
 516 def sanitize_path(s
): 
 517     """Sanitizes and normalizes path on Windows""" 
 518     if sys
.platform 
!= 'win32': 
 520     drive_or_unc
, _ 
= os
.path
.splitdrive(s
) 
 521     if sys
.version_info 
< (2, 7) and not drive_or_unc
: 
 522         drive_or_unc
, _ 
= os
.path
.splitunc(s
) 
 523     norm_path 
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
) 
 527         path_part 
if path_part 
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
) 
 528         for path_part 
in norm_path
] 
 530         sanitized_path
.insert(0, drive_or_unc 
+ os
.path
.sep
) 
 531     return os
.path
.join(*sanitized_path
) 
 534 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of 
 535 # unwanted failures due to missing protocol 
 536 def sanitize_url(url
): 
 537     return 'http:%s' % url 
if url
.startswith('//') else url
 
 540 def sanitized_Request(url
, *args
, **kwargs
): 
 541     return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
) 
 545     """Expand shell variables and ~""" 
 546     return os
.path
.expandvars(compat_expanduser(s
)) 
 549 def orderedSet(iterable
): 
 550     """ Remove all duplicates from the input iterable """ 
 558 def _htmlentity_transform(entity_with_semicolon
): 
 559     """Transforms an HTML entity to a character.""" 
 560     entity 
= entity_with_semicolon
[:-1] 
 562     # Known non-numeric HTML entity 
 563     if entity 
in compat_html_entities
.name2codepoint
: 
 564         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 566     # TODO: HTML5 allows entities without a semicolon. For example, 
 567     # 'Éric' should be decoded as 'Éric'. 
 568     if entity_with_semicolon 
in compat_html_entities_html5
: 
 569         return compat_html_entities_html5
[entity_with_semicolon
] 
 571     mobj 
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
) 
 573         numstr 
= mobj
.group(1) 
 574         if numstr
.startswith('x'): 
 576             numstr 
= '0%s' % numstr
 
 579         # See https://github.com/rg3/youtube-dl/issues/7518 
 581             return compat_chr(int(numstr
, base
)) 
 585     # Unknown entity in name, return its literal representation 
 586     return '&%s;' % entity
 
 592     assert type(s
) == compat_str
 
 595         r
'&([^;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 598 def get_subprocess_encoding(): 
 599     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 600         # For subprocess calls, encode with locale encoding 
 601         # Refer to http://stackoverflow.com/a/9951851/35070 
 602         encoding 
= preferredencoding() 
 604         encoding 
= sys
.getfilesystemencoding() 
 610 def encodeFilename(s
, for_subprocess
=False): 
 612     @param s The name of the file 
 615     assert type(s
) == compat_str
 
 617     # Python 3 has a Unicode API 
 618     if sys
.version_info 
>= (3, 0): 
 621     # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 622     # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 623     # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 624     if not for_subprocess 
and sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 627     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 
 628     if sys
.platform
.startswith('java'): 
 631     return s
.encode(get_subprocess_encoding(), 'ignore') 
 634 def decodeFilename(b
, for_subprocess
=False): 
 636     if sys
.version_info 
>= (3, 0): 
 639     if not isinstance(b
, bytes): 
 642     return b
.decode(get_subprocess_encoding(), 'ignore') 
 645 def encodeArgument(s
): 
 646     if not isinstance(s
, compat_str
): 
 647         # Legacy code that uses byte strings 
 648         # Uncomment the following line after fixing all post processors 
 649         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 650         s 
= s
.decode('ascii') 
 651     return encodeFilename(s
, True) 
 654 def decodeArgument(b
): 
 655     return decodeFilename(b
, True) 
 658 def decodeOption(optval
): 
 661     if isinstance(optval
, bytes): 
 662         optval 
= optval
.decode(preferredencoding()) 
 664     assert isinstance(optval
, compat_str
) 
 668 def formatSeconds(secs
): 
 670         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 672         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 677 def make_HTTPS_handler(params
, **kwargs
): 
 678     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 679     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 680         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 681         if opts_no_check_certificate
: 
 682             context
.check_hostname 
= False 
 683             context
.verify_mode 
= ssl
.CERT_NONE
 
 685             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 688             # (create_default_context present but HTTPSHandler has no context=) 
 691     if sys
.version_info 
< (3, 2): 
 692         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 694         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 695         context
.verify_mode 
= (ssl
.CERT_NONE
 
 696                                if opts_no_check_certificate
 
 697                                else ssl
.CERT_REQUIRED
) 
 698         context
.set_default_verify_paths() 
 699         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 702 def bug_reports_message(): 
 703     if ytdl_is_updateable(): 
 704         update_cmd 
= 'type  youtube-dl -U  to update' 
 706         update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 707     msg 
= '; please report this issue on https://yt-dl.org/bug .' 
 708     msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 709     msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 713 class YoutubeDLError(Exception): 
 714     """Base exception for YoutubeDL errors.""" 
 718 class ExtractorError(YoutubeDLError
): 
 719     """Error during info extraction.""" 
 721     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 722         """ tb, if given, is the original traceback (so that it can be printed out). 
 723         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 726         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 728         if video_id 
is not None: 
 729             msg 
= video_id 
+ ': ' + msg
 
 731             msg 
+= ' (caused by %r)' % cause
 
 733             msg 
+= bug_reports_message() 
 734         super(ExtractorError
, self
).__init
__(msg
) 
 737         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 739         self
.video_id 
= video_id
 
 741     def format_traceback(self
): 
 742         if self
.traceback 
is None: 
 744         return ''.join(traceback
.format_tb(self
.traceback
)) 
 747 class UnsupportedError(ExtractorError
): 
 748     def __init__(self
, url
): 
 749         super(UnsupportedError
, self
).__init
__( 
 750             'Unsupported URL: %s' % url
, expected
=True) 
 754 class RegexNotFoundError(ExtractorError
): 
 755     """Error when a regex didn't match""" 
 759 class GeoRestrictedError(ExtractorError
): 
 760     """Geographic restriction Error exception. 
 762     This exception may be thrown when a video is not available from your 
 763     geographic location due to geographic restrictions imposed by a website. 
 765     def __init__(self
, msg
, countries
=None): 
 766         super(GeoRestrictedError
, self
).__init
__(msg
, expected
=True) 
 768         self
.countries 
= countries
 
 771 class DownloadError(YoutubeDLError
): 
 772     """Download Error exception. 
 774     This exception may be thrown by FileDownloader objects if they are not 
 775     configured to continue on errors. They will contain the appropriate 
 779     def __init__(self
, msg
, exc_info
=None): 
 780         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 781         super(DownloadError
, self
).__init
__(msg
) 
 782         self
.exc_info 
= exc_info
 
 785 class SameFileError(YoutubeDLError
): 
 786     """Same File exception. 
 788     This exception will be thrown by FileDownloader objects if they detect 
 789     multiple files would have to be downloaded to the same file on disk. 
 794 class PostProcessingError(YoutubeDLError
): 
 795     """Post Processing exception. 
 797     This exception may be raised by PostProcessor's .run() method to 
 798     indicate an error in the postprocessing task. 
 801     def __init__(self
, msg
): 
 802         super(PostProcessingError
, self
).__init
__(msg
) 
 806 class MaxDownloadsReached(YoutubeDLError
): 
 807     """ --max-downloads limit has been reached. """ 
 811 class UnavailableVideoError(YoutubeDLError
): 
 812     """Unavailable Format exception. 
 814     This exception will be thrown when a video is requested 
 815     in a format that is not available for that video. 
 820 class ContentTooShortError(YoutubeDLError
): 
 821     """Content Too Short exception. 
 823     This exception may be raised by FileDownloader objects when a file they 
 824     download is too small for what the server announced first, indicating 
 825     the connection was probably interrupted. 
 828     def __init__(self
, downloaded
, expected
): 
 829         super(ContentTooShortError
, self
).__init
__( 
 830             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded
, expected
) 
 833         self
.downloaded 
= downloaded
 
 834         self
.expected 
= expected
 
 837 class XAttrMetadataError(YoutubeDLError
): 
 838     def __init__(self
, code
=None, msg
='Unknown error'): 
 839         super(XAttrMetadataError
, self
).__init
__(msg
) 
 843         # Parsing code and msg 
 844         if (self
.code 
in (errno
.ENOSPC
, errno
.EDQUOT
) or 
 845                 'No space left' in self
.msg 
or 'Disk quota excedded' in self
.msg
): 
 846             self
.reason 
= 'NO_SPACE' 
 847         elif self
.code 
== errno
.E2BIG 
or 'Argument list too long' in self
.msg
: 
 848             self
.reason 
= 'VALUE_TOO_LONG' 
 850             self
.reason 
= 'NOT_SUPPORTED' 
 853 class XAttrUnavailableError(YoutubeDLError
): 
 857 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 858     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 
 859     # expected HTTP responses to meet HTTP/1.0 or later (see also 
 860     # https://github.com/rg3/youtube-dl/issues/6727) 
 861     if sys
.version_info 
< (3, 0): 
 862         kwargs
[b
'strict'] = True 
 863     hc 
= http_class(*args
, **kwargs
) 
 864     source_address 
= ydl_handler
._params
.get('source_address') 
 865     if source_address 
is not None: 
 866         sa 
= (source_address
, 0) 
 867         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 868             hc
.source_address 
= sa
 
 870             def _hc_connect(self
, *args
, **kwargs
): 
 871                 sock 
= compat_socket_create_connection( 
 872                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 874                     self
.sock 
= ssl
.wrap_socket( 
 875                         sock
, self
.key_file
, self
.cert_file
, 
 876                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 879             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 884 def handle_youtubedl_headers(headers
): 
 885     filtered_headers 
= headers
 
 887     if 'Youtubedl-no-compression' in filtered_headers
: 
 888         filtered_headers 
= dict((k
, v
) for k
, v 
in filtered_headers
.items() if k
.lower() != 'accept-encoding') 
 889         del filtered_headers
['Youtubedl-no-compression'] 
 891     return filtered_headers
 
 894 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 895     """Handler for HTTP requests and responses. 
 897     This class, when installed with an OpenerDirector, automatically adds 
 898     the standard headers to every HTTP request and handles gzipped and 
 899     deflated responses from web servers. If compression is to be avoided in 
 900     a particular request, the original request in the program code only has 
 901     to include the HTTP header "Youtubedl-no-compression", which will be 
 902     removed before making the real request. 
 904     Part of this code was copied from: 
 906     http://techknack.net/python-urllib2-handlers/ 
 908     Andrew Rowls, the author of that code, agreed to release it to the 
 912     def __init__(self
, params
, *args
, **kwargs
): 
 913         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 914         self
._params 
= params
 
 916     def http_open(self
, req
): 
 917         conn_class 
= compat_http_client
.HTTPConnection
 
 919         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
 921             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
 922             del req
.headers
['Ytdl-socks-proxy'] 
 924         return self
.do_open(functools
.partial( 
 925             _create_http_connection
, self
, conn_class
, False), 
 931             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 933             return zlib
.decompress(data
) 
 936     def addinfourl_wrapper(stream
, headers
, url
, code
): 
 937         if hasattr(compat_urllib_request
.addinfourl
, 'getcode'): 
 938             return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
) 
 939         ret 
= compat_urllib_request
.addinfourl(stream
, headers
, url
) 
 943     def http_request(self
, req
): 
 944         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 
 945         # always respected by websites, some tend to give out URLs with non percent-encoded 
 946         # non-ASCII characters (see telemb.py, ard.py [#3412]) 
 947         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 
 948         # To work around aforementioned issue we will replace request's original URL with 
 949         # percent-encoded one 
 950         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 
 951         # the code of this workaround has been moved here from YoutubeDL.urlopen() 
 952         url 
= req
.get_full_url() 
 953         url_escaped 
= escape_url(url
) 
 955         # Substitute URL if any change after escaping 
 956         if url 
!= url_escaped
: 
 957             req 
= update_Request(req
, url
=url_escaped
) 
 959         for h
, v 
in std_headers
.items(): 
 960             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
 961             # The dict keys are capitalized because of this bug by urllib 
 962             if h
.capitalize() not in req
.headers
: 
 965         req
.headers 
= handle_youtubedl_headers(req
.headers
) 
 967         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 968             # Python 2.6 is brain-dead when it comes to fragments 
 969             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 970             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 974     def http_response(self
, req
, resp
): 
 977         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 978             content 
= resp
.read() 
 979             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 981                 uncompressed 
= io
.BytesIO(gz
.read()) 
 982             except IOError as original_ioerror
: 
 983                 # There may be junk add the end of the file 
 984                 # See http://stackoverflow.com/q/4928560/35070 for details 
 985                 for i 
in range(1, 1024): 
 987                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 988                         uncompressed 
= io
.BytesIO(gz
.read()) 
 993                     raise original_ioerror
 
 994             resp 
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 995             resp
.msg 
= old_resp
.msg
 
 996             del resp
.headers
['Content-encoding'] 
 998         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 999             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
1000             resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
1001             resp
.msg 
= old_resp
.msg
 
1002             del resp
.headers
['Content-encoding'] 
1003         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 
1004         # https://github.com/rg3/youtube-dl/issues/6457). 
1005         if 300 <= resp
.code 
< 400: 
1006             location 
= resp
.headers
.get('Location') 
1008                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 
1009                 if sys
.version_info 
>= (3, 0): 
1010                     location 
= location
.encode('iso-8859-1').decode('utf-8') 
1012                     location 
= location
.decode('utf-8') 
1013                 location_escaped 
= escape_url(location
) 
1014                 if location 
!= location_escaped
: 
1015                     del resp
.headers
['Location'] 
1016                     if sys
.version_info 
< (3, 0): 
1017                         location_escaped 
= location_escaped
.encode('utf-8') 
1018                     resp
.headers
['Location'] = location_escaped
 
1021     https_request 
= http_request
 
1022     https_response 
= http_response
 
1025 def make_socks_conn_class(base_class
, socks_proxy
): 
1026     assert issubclass(base_class
, ( 
1027         compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
)) 
1029     url_components 
= compat_urlparse
.urlparse(socks_proxy
) 
1030     if url_components
.scheme
.lower() == 'socks5': 
1031         socks_type 
= ProxyType
.SOCKS5
 
1032     elif url_components
.scheme
.lower() in ('socks', 'socks4'): 
1033         socks_type 
= ProxyType
.SOCKS4
 
1034     elif url_components
.scheme
.lower() == 'socks4a': 
1035         socks_type 
= ProxyType
.SOCKS4A
 
1037     def unquote_if_non_empty(s
): 
1040         return compat_urllib_parse_unquote_plus(s
) 
1044         url_components
.hostname
, url_components
.port 
or 1080, 
1046         unquote_if_non_empty(url_components
.username
), 
1047         unquote_if_non_empty(url_components
.password
), 
1050     class SocksConnection(base_class
): 
1052             self
.sock 
= sockssocket() 
1053             self
.sock
.setproxy(*proxy_args
) 
1054             if type(self
.timeout
) in (int, float): 
1055                 self
.sock
.settimeout(self
.timeout
) 
1056             self
.sock
.connect((self
.host
, self
.port
)) 
1058             if isinstance(self
, compat_http_client
.HTTPSConnection
): 
1059                 if hasattr(self
, '_context'):  # Python > 2.6 
1060                     self
.sock 
= self
._context
.wrap_socket( 
1061                         self
.sock
, server_hostname
=self
.host
) 
1063                     self
.sock 
= ssl
.wrap_socket(self
.sock
) 
1065     return SocksConnection
 
1068 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
1069     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
1070         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
1071         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
1072         self
._params 
= params
 
1074     def https_open(self
, req
): 
1076         conn_class 
= self
._https
_conn
_class
 
1078         if hasattr(self
, '_context'):  # python > 2.6 
1079             kwargs
['context'] = self
._context
 
1080         if hasattr(self
, '_check_hostname'):  # python 3.x 
1081             kwargs
['check_hostname'] = self
._check
_hostname
 
1083         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
1085             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
1086             del req
.headers
['Ytdl-socks-proxy'] 
1088         return self
.do_open(functools
.partial( 
1089             _create_http_connection
, self
, conn_class
, True), 
1093 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
): 
1094     def __init__(self
, cookiejar
=None): 
1095         compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
) 
1097     def http_response(self
, request
, response
): 
1098         # Python 2 will choke on next HTTP request in row if there are non-ASCII 
1099         # characters in Set-Cookie HTTP header of last response (see 
1100         # https://github.com/rg3/youtube-dl/issues/6769). 
1101         # In order to at least prevent crashing we will percent encode Set-Cookie 
1102         # header before HTTPCookieProcessor starts processing it. 
1103         # if sys.version_info < (3, 0) and response.headers: 
1104         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 
1105         #         set_cookie = response.headers.get(set_cookie_header) 
1107         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 
1108         #             if set_cookie != set_cookie_escaped: 
1109         #                 del response.headers[set_cookie_header] 
1110         #                 response.headers[set_cookie_header] = set_cookie_escaped 
1111         return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
) 
1113     https_request 
= compat_urllib_request
.HTTPCookieProcessor
.http_request
 
1114     https_response 
= http_response
 
1117 def extract_timezone(date_str
): 
1119         r
'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
1122         timezone 
= datetime
.timedelta() 
1124         date_str 
= date_str
[:-len(m
.group('tz'))] 
1125         if not m
.group('sign'): 
1126             timezone 
= datetime
.timedelta() 
1128             sign 
= 1 if m
.group('sign') == '+' else -1 
1129             timezone 
= datetime
.timedelta( 
1130                 hours
=sign 
* int(m
.group('hours')), 
1131                 minutes
=sign 
* int(m
.group('minutes'))) 
1132     return timezone
, date_str
 
1135 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
1136     """ Return a UNIX timestamp from the given date """ 
1138     if date_str 
is None: 
1141     date_str 
= re
.sub(r
'\.[0-9]+', '', date_str
) 
1143     if timezone 
is None: 
1144         timezone
, date_str 
= extract_timezone(date_str
) 
1147         date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
1148         dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
1149         return calendar
.timegm(dt
.timetuple()) 
1154 def date_formats(day_first
=True): 
1155     return DATE_FORMATS_DAY_FIRST 
if day_first 
else DATE_FORMATS_MONTH_FIRST
 
1158 def unified_strdate(date_str
, day_first
=True): 
1159     """Return a string with the date in the format YYYYMMDD""" 
1161     if date_str 
is None: 
1165     date_str 
= date_str
.replace(',', ' ') 
1166     # Remove AM/PM + timezone 
1167     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1168     _
, date_str 
= extract_timezone(date_str
) 
1170     for expression 
in date_formats(day_first
): 
1172             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
1175     if upload_date 
is None: 
1176         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1179                 upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
1182     if upload_date 
is not None: 
1183         return compat_str(upload_date
) 
1186 def unified_timestamp(date_str
, day_first
=True): 
1187     if date_str 
is None: 
1190     date_str 
= date_str
.replace(',', ' ') 
1192     pm_delta 
= 12 if re
.search(r
'(?i)PM', date_str
) else 0 
1193     timezone
, date_str 
= extract_timezone(date_str
) 
1195     # Remove AM/PM + timezone 
1196     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1198     # Remove unrecognized timezones from ISO 8601 alike timestamps 
1199     m 
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
) 
1201         date_str 
= date_str
[:-len(m
.group('tz'))] 
1203     for expression 
in date_formats(day_first
): 
1205             dt 
= datetime
.datetime
.strptime(date_str
, expression
) - timezone 
+ datetime
.timedelta(hours
=pm_delta
) 
1206             return calendar
.timegm(dt
.timetuple()) 
1209     timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1211         return calendar
.timegm(timetuple
) + pm_delta 
* 3600 
1214 def determine_ext(url
, default_ext
='unknown_video'): 
1217     guess 
= url
.partition('?')[0].rpartition('.')[2] 
1218     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
1220     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 
1221     elif guess
.rstrip('/') in KNOWN_EXTENSIONS
: 
1222         return guess
.rstrip('/') 
1227 def subtitles_filename(filename
, sub_lang
, sub_format
): 
1228     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
1231 def date_from_str(date_str
): 
1233     Return a datetime object from a string in the format YYYYMMDD or 
1234     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
1235     today 
= datetime
.date
.today() 
1236     if date_str 
in ('now', 'today'): 
1238     if date_str 
== 'yesterday': 
1239         return today 
- datetime
.timedelta(days
=1) 
1240     match 
= re
.match(r
'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
1241     if match 
is not None: 
1242         sign 
= match
.group('sign') 
1243         time 
= int(match
.group('time')) 
1246         unit 
= match
.group('unit') 
1247         # A bad approximation? 
1251         elif unit 
== 'year': 
1255         delta 
= datetime
.timedelta(**{unit
: time
}) 
1256         return today 
+ delta
 
1257     return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date() 
1260 def hyphenate_date(date_str
): 
1262     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
1263     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
1264     if match 
is not None: 
1265         return '-'.join(match
.groups()) 
1270 class DateRange(object): 
1271     """Represents a time interval between two dates""" 
1273     def __init__(self
, start
=None, end
=None): 
1274         """start and end must be strings in the format accepted by date""" 
1275         if start 
is not None: 
1276             self
.start 
= date_from_str(start
) 
1278             self
.start 
= datetime
.datetime
.min.date() 
1280             self
.end 
= date_from_str(end
) 
1282             self
.end 
= datetime
.datetime
.max.date() 
1283         if self
.start 
> self
.end
: 
1284             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
1288         """Returns a range that only contains the given day""" 
1289         return cls(day
, day
) 
1291     def __contains__(self
, date
): 
1292         """Check if the date is in the range""" 
1293         if not isinstance(date
, datetime
.date
): 
1294             date 
= date_from_str(date
) 
1295         return self
.start 
<= date 
<= self
.end
 
1298         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
1301 def platform_name(): 
1302     """ Returns the platform name as a compat_str """ 
1303     res 
= platform
.platform() 
1304     if isinstance(res
, bytes): 
1305         res 
= res
.decode(preferredencoding()) 
1307     assert isinstance(res
, compat_str
) 
1311 def _windows_write_string(s
, out
): 
1312     """ Returns True if the string was written using special methods, 
1313     False if it has yet to be written out.""" 
1314     # Adapted from http://stackoverflow.com/a/3259271/35070 
1317     import ctypes
.wintypes
 
1325         fileno 
= out
.fileno() 
1326     except AttributeError: 
1327         # If the output stream doesn't have a fileno, it's virtual 
1329     except io
.UnsupportedOperation
: 
1330         # Some strange Windows pseudo files? 
1332     if fileno 
not in WIN_OUTPUT_IDS
: 
1335     GetStdHandle 
= ctypes
.WINFUNCTYPE( 
1336         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
1337         (b
'GetStdHandle', ctypes
.windll
.kernel32
)) 
1338     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
1340     WriteConsoleW 
= ctypes
.WINFUNCTYPE( 
1341         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
1342         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
1343         ctypes
.wintypes
.LPVOID
)((b
'WriteConsoleW', ctypes
.windll
.kernel32
)) 
1344     written 
= ctypes
.wintypes
.DWORD(0) 
1346     GetFileType 
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
'GetFileType', ctypes
.windll
.kernel32
)) 
1347     FILE_TYPE_CHAR 
= 0x0002 
1348     FILE_TYPE_REMOTE 
= 0x8000 
1349     GetConsoleMode 
= ctypes
.WINFUNCTYPE( 
1350         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
1351         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
1352         (b
'GetConsoleMode', ctypes
.windll
.kernel32
)) 
1353     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
1355     def not_a_console(handle
): 
1356         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
1358         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR 
or 
1359                 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
1361     if not_a_console(h
): 
1364     def next_nonbmp_pos(s
): 
1366             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
1367         except StopIteration: 
1371         count 
= min(next_nonbmp_pos(s
), 1024) 
1373         ret 
= WriteConsoleW( 
1374             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
1376             raise OSError('Failed to write string') 
1377         if not count
:  # We just wrote a non-BMP character 
1378             assert written
.value 
== 2 
1381             assert written
.value 
> 0 
1382             s 
= s
[written
.value
:] 
1386 def write_string(s
, out
=None, encoding
=None): 
1389     assert type(s
) == compat_str
 
1391     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
1392         if _windows_write_string(s
, out
): 
1395     if ('b' in getattr(out
, 'mode', '') or 
1396             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
1397         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
1399     elif hasattr(out
, 'buffer'): 
1400         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
1401         byt 
= s
.encode(enc
, 'ignore') 
1402         out
.buffer.write(byt
) 
1408 def bytes_to_intlist(bs
): 
1411     if isinstance(bs
[0], int):  # Python 3 
1414         return [ord(c
) for c 
in bs
] 
1417 def intlist_to_bytes(xs
): 
1420     return compat_struct_pack('%dB' % len(xs
), *xs
) 
1423 # Cross-platform file locking 
1424 if sys
.platform 
== 'win32': 
1425     import ctypes
.wintypes
 
1428     class OVERLAPPED(ctypes
.Structure
): 
1430             ('Internal', ctypes
.wintypes
.LPVOID
), 
1431             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
1432             ('Offset', ctypes
.wintypes
.DWORD
), 
1433             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
1434             ('hEvent', ctypes
.wintypes
.HANDLE
), 
1437     kernel32 
= ctypes
.windll
.kernel32
 
1438     LockFileEx 
= kernel32
.LockFileEx
 
1439     LockFileEx
.argtypes 
= [ 
1440         ctypes
.wintypes
.HANDLE
,     # hFile 
1441         ctypes
.wintypes
.DWORD
,      # dwFlags 
1442         ctypes
.wintypes
.DWORD
,      # dwReserved 
1443         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1444         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1445         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1447     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1448     UnlockFileEx 
= kernel32
.UnlockFileEx
 
1449     UnlockFileEx
.argtypes 
= [ 
1450         ctypes
.wintypes
.HANDLE
,     # hFile 
1451         ctypes
.wintypes
.DWORD
,      # dwReserved 
1452         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1453         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1454         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1456     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1457     whole_low 
= 0xffffffff 
1458     whole_high 
= 0x7fffffff 
1460     def _lock_file(f
, exclusive
): 
1461         overlapped 
= OVERLAPPED() 
1462         overlapped
.Offset 
= 0 
1463         overlapped
.OffsetHigh 
= 0 
1464         overlapped
.hEvent 
= 0 
1465         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1466         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1467         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1468                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1469             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1471     def _unlock_file(f
): 
1472         assert f
._lock
_file
_overlapped
_p
 
1473         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1474         if not UnlockFileEx(handle
, 0, 
1475                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1476             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1479     # Some platforms, such as Jython, is missing fcntl 
1483         def _lock_file(f
, exclusive
): 
1484             fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1486         def _unlock_file(f
): 
1487             fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1489         UNSUPPORTED_MSG 
= 'file locking is not supported on this platform' 
1491         def _lock_file(f
, exclusive
): 
1492             raise IOError(UNSUPPORTED_MSG
) 
1494         def _unlock_file(f
): 
1495             raise IOError(UNSUPPORTED_MSG
) 
1498 class locked_file(object): 
1499     def __init__(self
, filename
, mode
, encoding
=None): 
1500         assert mode 
in ['r', 'a', 'w'] 
1501         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1504     def __enter__(self
): 
1505         exclusive 
= self
.mode 
!= 'r' 
1507             _lock_file(self
.f
, exclusive
) 
1513     def __exit__(self
, etype
, value
, traceback
): 
1515             _unlock_file(self
.f
) 
1522     def write(self
, *args
): 
1523         return self
.f
.write(*args
) 
1525     def read(self
, *args
): 
1526         return self
.f
.read(*args
) 
1529 def get_filesystem_encoding(): 
1530     encoding 
= sys
.getfilesystemencoding() 
1531     return encoding 
if encoding 
is not None else 'utf-8' 
1534 def shell_quote(args
): 
1536     encoding 
= get_filesystem_encoding() 
1538         if isinstance(a
, bytes): 
1539             # We may get a filename encoded with 'encodeFilename' 
1540             a 
= a
.decode(encoding
) 
1541         quoted_args
.append(pipes
.quote(a
)) 
1542     return ' '.join(quoted_args
) 
1545 def smuggle_url(url
, data
): 
1546     """ Pass additional data in a URL for internal use. """ 
1548     url
, idata 
= unsmuggle_url(url
, {}) 
1550     sdata 
= compat_urllib_parse_urlencode( 
1551         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1552     return url 
+ '#' + sdata
 
1555 def unsmuggle_url(smug_url
, default
=None): 
1556     if '#__youtubedl_smuggle' not in smug_url
: 
1557         return smug_url
, default
 
1558     url
, _
, sdata 
= smug_url
.rpartition('#') 
1559     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1560     data 
= json
.loads(jsond
) 
1564 def format_bytes(bytes): 
1567     if type(bytes) is str: 
1568         bytes = float(bytes) 
1572         exponent 
= int(math
.log(bytes, 1024.0)) 
1573     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1574     converted 
= float(bytes) / float(1024 ** exponent
) 
1575     return '%.2f%s' % (converted
, suffix
) 
1578 def lookup_unit_table(unit_table
, s
): 
1579     units_re 
= '|'.join(re
.escape(u
) for u 
in unit_table
) 
1581         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
) 
1584     num_str 
= m
.group('num').replace(',', '.') 
1585     mult 
= unit_table
[m
.group('unit')] 
1586     return int(float(num_str
) * mult
) 
1589 def parse_filesize(s
): 
1593     # The lower-case forms are of course incorrect and unofficial, 
1594     # but we support those too 
1611         'megabytes': 1000 ** 2, 
1612         'mebibytes': 1024 ** 2, 
1618         'gigabytes': 1000 ** 3, 
1619         'gibibytes': 1024 ** 3, 
1625         'terabytes': 1000 ** 4, 
1626         'tebibytes': 1024 ** 4, 
1632         'petabytes': 1000 ** 5, 
1633         'pebibytes': 1024 ** 5, 
1639         'exabytes': 1000 ** 6, 
1640         'exbibytes': 1024 ** 6, 
1646         'zettabytes': 1000 ** 7, 
1647         'zebibytes': 1024 ** 7, 
1653         'yottabytes': 1000 ** 8, 
1654         'yobibytes': 1024 ** 8, 
1657     return lookup_unit_table(_UNIT_TABLE
, s
) 
1666     if re
.match(r
'^[\d,.]+$', s
): 
1667         return str_to_int(s
) 
1678     return lookup_unit_table(_UNIT_TABLE
, s
) 
1681 def month_by_name(name
, lang
='en'): 
1682     """ Return the number of a month by (locale-independently) English name """ 
1684     month_names 
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en']) 
1687         return month_names
.index(name
) + 1 
1692 def month_by_abbreviation(abbrev
): 
1693     """ Return the number of a month by (locale-independently) English 
1697         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
1702 def fix_xml_ampersands(xml_str
): 
1703     """Replace all the '&' by '&' in XML""" 
1705         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1710 def setproctitle(title
): 
1711     assert isinstance(title
, compat_str
) 
1713     # ctypes in Jython is not complete 
1714     # http://bugs.jython.org/issue2148 
1715     if sys
.platform
.startswith('java'): 
1719         libc 
= ctypes
.cdll
.LoadLibrary('libc.so.6') 
1723         # LoadLibrary in Windows Python 2.7.13 only expects 
1724         # a bytestring, but since unicode_literals turns 
1725         # every string into a unicode string, it fails. 
1727     title_bytes 
= title
.encode('utf-8') 
1728     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1729     buf
.value 
= title_bytes
 
1731         libc
.prctl(15, buf
, 0, 0, 0) 
1732     except AttributeError: 
1733         return  # Strange libc, just skip this 
1736 def remove_start(s
, start
): 
1737     return s
[len(start
):] if s 
is not None and s
.startswith(start
) else s
 
1740 def remove_end(s
, end
): 
1741     return s
[:-len(end
)] if s 
is not None and s
.endswith(end
) else s
 
1744 def remove_quotes(s
): 
1745     if s 
is None or len(s
) < 2: 
1747     for quote 
in ('"', "'", ): 
1748         if s
[0] == quote 
and s
[-1] == quote
: 
1753 def url_basename(url
): 
1754     path 
= compat_urlparse
.urlparse(url
).path
 
1755     return path
.strip('/').split('/')[-1] 
1759     return re
.match(r
'https?://[^?#&]+/', url
).group() 
1762 def urljoin(base
, path
): 
1763     if isinstance(path
, bytes): 
1764         path 
= path
.decode('utf-8') 
1765     if not isinstance(path
, compat_str
) or not path
: 
1767     if re
.match(r
'^(?:https?:)?//', path
): 
1769     if isinstance(base
, bytes): 
1770         base 
= base
.decode('utf-8') 
1771     if not isinstance(base
, compat_str
) or not re
.match( 
1772             r
'^(?:https?:)?//', base
): 
1774     return compat_urlparse
.urljoin(base
, path
) 
1777 class HEADRequest(compat_urllib_request
.Request
): 
1778     def get_method(self
): 
1782 class PUTRequest(compat_urllib_request
.Request
): 
1783     def get_method(self
): 
1787 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1790             v 
= getattr(v
, get_attr
, None) 
1796         return int(v
) * invscale 
// scale
 
1801 def str_or_none(v
, default
=None): 
1802     return default 
if v 
is None else compat_str(v
) 
1805 def str_to_int(int_str
): 
1806     """ A more relaxed version of int_or_none """ 
1809     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1813 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1817         return float(v
) * invscale 
/ scale
 
1822 def strip_or_none(v
): 
1823     return None if v 
is None else v
.strip() 
1826 def parse_duration(s
): 
1827     if not isinstance(s
, compat_basestring
): 
1832     days
, hours
, mins
, secs
, ms 
= [None] * 5 
1833     m 
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s
) 
1835         days
, hours
, mins
, secs
, ms 
= m
.groups() 
1840                     (?P<days>[0-9]+)\s*d(?:ays?)?\s* 
1843                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 
1846                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 
1849                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 
1852             days
, hours
, mins
, secs
, ms 
= m
.groups() 
1854             m 
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
) 
1856                 hours
, mins 
= m
.groups() 
1862         duration 
+= float(secs
) 
1864         duration 
+= float(mins
) * 60 
1866         duration 
+= float(hours
) * 60 * 60 
1868         duration 
+= float(days
) * 24 * 60 * 60 
1870         duration 
+= float(ms
) 
1874 def prepend_extension(filename
, ext
, expected_real_ext
=None): 
1875     name
, real_ext 
= os
.path
.splitext(filename
) 
1877         '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1878         if not expected_real_ext 
or real_ext
[1:] == expected_real_ext
 
1879         else '{0}.{1}'.format(filename
, ext
)) 
1882 def replace_extension(filename
, ext
, expected_real_ext
=None): 
1883     name
, real_ext 
= os
.path
.splitext(filename
) 
1884     return '{0}.{1}'.format( 
1885         name 
if not expected_real_ext 
or real_ext
[1:] == expected_real_ext 
else filename
, 
1889 def check_executable(exe
, args
=[]): 
1890     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1891     args can be a list of arguments for a short output (like -version) """ 
1893         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1899 def get_exe_version(exe
, args
=['--version'], 
1900                     version_re
=None, unrecognized
='present'): 
1901     """ Returns the version of the specified executable, 
1902     or False if the executable is not present """ 
1904         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers 
1905         # SIGTTOU if youtube-dl is run in the background. 
1906         # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656 
1907         out
, _ 
= subprocess
.Popen( 
1908             [encodeArgument(exe
)] + args
, 
1909             stdin
=subprocess
.PIPE
, 
1910             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1913     if isinstance(out
, bytes):  # Python 2.x 
1914         out 
= out
.decode('ascii', 'ignore') 
1915     return detect_exe_version(out
, version_re
, unrecognized
) 
1918 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
1919     assert isinstance(output
, compat_str
) 
1920     if version_re 
is None: 
1921         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
1922     m 
= re
.search(version_re
, output
) 
1929 class PagedList(object): 
1931         # This is only useful for tests 
1932         return len(self
.getslice()) 
1935 class OnDemandPagedList(PagedList
): 
1936     def __init__(self
, pagefunc
, pagesize
, use_cache
=False): 
1937         self
._pagefunc 
= pagefunc
 
1938         self
._pagesize 
= pagesize
 
1939         self
._use
_cache 
= use_cache
 
1943     def getslice(self
, start
=0, end
=None): 
1945         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1946             firstid 
= pagenum 
* self
._pagesize
 
1947             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
1948             if start 
>= nextfirstid
: 
1953                 page_results 
= self
._cache
.get(pagenum
) 
1954             if page_results 
is None: 
1955                 page_results 
= list(self
._pagefunc
(pagenum
)) 
1957                 self
._cache
[pagenum
] = page_results
 
1960                 start 
% self
._pagesize
 
1961                 if firstid 
<= start 
< nextfirstid
 
1965                 ((end 
- 1) % self
._pagesize
) + 1 
1966                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
1969             if startv 
!= 0 or endv 
is not None: 
1970                 page_results 
= page_results
[startv
:endv
] 
1971             res
.extend(page_results
) 
1973             # A little optimization - if current page is not "full", ie. does 
1974             # not contain page_size videos then we can assume that this page 
1975             # is the last one - there are no more ids on further pages - 
1976             # i.e. no need to query again. 
1977             if len(page_results
) + startv 
< self
._pagesize
: 
1980             # If we got the whole page, but the next page is not interesting, 
1981             # break out early as well 
1982             if end 
== nextfirstid
: 
1987 class InAdvancePagedList(PagedList
): 
1988     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
1989         self
._pagefunc 
= pagefunc
 
1990         self
._pagecount 
= pagecount
 
1991         self
._pagesize 
= pagesize
 
1993     def getslice(self
, start
=0, end
=None): 
1995         start_page 
= start 
// self
._pagesize
 
1997             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
1998         skip_elems 
= start 
- start_page 
* self
._pagesize
 
1999         only_more 
= None if end 
is None else end 
- start
 
2000         for pagenum 
in range(start_page
, end_page
): 
2001             page 
= list(self
._pagefunc
(pagenum
)) 
2003                 page 
= page
[skip_elems
:] 
2005             if only_more 
is not None: 
2006                 if len(page
) < only_more
: 
2007                     only_more 
-= len(page
) 
2009                     page 
= page
[:only_more
] 
2016 def uppercase_escape(s
): 
2017     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2019         r
'\\U[0-9a-fA-F]{8}', 
2020         lambda m
: unicode_escape(m
.group(0))[0], 
2024 def lowercase_escape(s
): 
2025     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2027         r
'\\u[0-9a-fA-F]{4}', 
2028         lambda m
: unicode_escape(m
.group(0))[0], 
2032 def escape_rfc3986(s
): 
2033     """Escape non-ASCII characters as suggested by RFC 3986""" 
2034     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
2035         s 
= s
.encode('utf-8') 
2036     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
2039 def escape_url(url
): 
2040     """Escape URL as suggested by RFC 3986""" 
2041     url_parsed 
= compat_urllib_parse_urlparse(url
) 
2042     return url_parsed
._replace
( 
2043         netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'), 
2044         path
=escape_rfc3986(url_parsed
.path
), 
2045         params
=escape_rfc3986(url_parsed
.params
), 
2046         query
=escape_rfc3986(url_parsed
.query
), 
2047         fragment
=escape_rfc3986(url_parsed
.fragment
) 
2051 def read_batch_urls(batch_fd
): 
2053         if not isinstance(url
, compat_str
): 
2054             url 
= url
.decode('utf-8', 'replace') 
2055         BOM_UTF8 
= '\xef\xbb\xbf' 
2056         if url
.startswith(BOM_UTF8
): 
2057             url 
= url
[len(BOM_UTF8
):] 
2059         if url
.startswith(('#', ';', ']')): 
2063     with contextlib
.closing(batch_fd
) as fd
: 
2064         return [url 
for url 
in map(fixup
, fd
) if url
] 
2067 def urlencode_postdata(*args
, **kargs
): 
2068     return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii') 
2071 def update_url_query(url
, query
): 
2074     parsed_url 
= compat_urlparse
.urlparse(url
) 
2075     qs 
= compat_parse_qs(parsed_url
.query
) 
2077     return compat_urlparse
.urlunparse(parsed_url
._replace
( 
2078         query
=compat_urllib_parse_urlencode(qs
, True))) 
2081 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}): 
2082     req_headers 
= req
.headers
.copy() 
2083     req_headers
.update(headers
) 
2084     req_data 
= data 
or req
.data
 
2085     req_url 
= update_url_query(url 
or req
.get_full_url(), query
) 
2086     req_get_method 
= req
.get_method() 
2087     if req_get_method 
== 'HEAD': 
2088         req_type 
= HEADRequest
 
2089     elif req_get_method 
== 'PUT': 
2090         req_type 
= PUTRequest
 
2092         req_type 
= compat_urllib_request
.Request
 
2094         req_url
, data
=req_data
, headers
=req_headers
, 
2095         origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
) 
2096     if hasattr(req
, 'timeout'): 
2097         new_req
.timeout 
= req
.timeout
 
2101 def _multipart_encode_impl(data
, boundary
): 
2102     content_type 
= 'multipart/form-data; boundary=%s' % boundary
 
2105     for k
, v 
in data
.items(): 
2106         out 
+= b
'--' + boundary
.encode('ascii') + b
'\r\n' 
2107         if isinstance(k
, compat_str
): 
2108             k 
= k
.encode('utf-8') 
2109         if isinstance(v
, compat_str
): 
2110             v 
= v
.encode('utf-8') 
2111         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 
2112         # suggests sending UTF-8 directly. Firefox sends UTF-8, too 
2113         content 
= b
'Content-Disposition: form-data; name="' + k 
+ b
'"\r\n\r\n' + v 
+ b
'\r\n' 
2114         if boundary
.encode('ascii') in content
: 
2115             raise ValueError('Boundary overlaps with data') 
2118     out 
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n' 
2120     return out
, content_type
 
2123 def multipart_encode(data
, boundary
=None): 
2125     Encode a dict to RFC 7578-compliant form-data 
2128         A dict where keys and values can be either Unicode or bytes-like 
2131         If specified a Unicode object, it's used as the boundary. Otherwise 
2132         a random boundary is generated. 
2134     Reference: https://tools.ietf.org/html/rfc7578 
2136     has_specified_boundary 
= boundary 
is not None 
2139         if boundary 
is None: 
2140             boundary 
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff)) 
2143             out
, content_type 
= _multipart_encode_impl(data
, boundary
) 
2146             if has_specified_boundary
: 
2150     return out
, content_type
 
2153 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True): 
2154     if isinstance(key_or_keys
, (list, tuple)): 
2155         for key 
in key_or_keys
: 
2156             if key 
not in d 
or d
[key
] is None or skip_false_values 
and not d
[key
]: 
2160     return d
.get(key_or_keys
, default
) 
2163 def try_get(src
, getter
, expected_type
=None): 
2164     if not isinstance(getter
, (list, tuple)): 
2169         except (AttributeError, KeyError, TypeError, IndexError): 
2172             if expected_type 
is None or isinstance(v
, expected_type
): 
2176 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'): 
2177     return string 
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
) 
2189 TV_PARENTAL_GUIDELINES 
= { 
2199 def parse_age_limit(s
): 
2201         return s 
if 0 <= s 
<= 21 else None 
2202     if not isinstance(s
, compat_basestring
): 
2204     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
2206         return int(m
.group('age')) 
2208         return US_RATINGS
[s
] 
2209     return TV_PARENTAL_GUIDELINES
.get(s
) 
2212 def strip_jsonp(code
): 
2214         r
'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
) 
2217 def js_to_json(code
): 
2218     COMMENT_RE 
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*' 
2219     SKIP_RE 
= r
'\s*(?:{comment})?\s*'.format(comment
=COMMENT_RE
) 
2221         (r
'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip
=SKIP_RE
), 16), 
2222         (r
'(?s)^(0+[0-7]+){skip}:?$'.format(skip
=SKIP_RE
), 8), 
2227         if v 
in ('true', 'false', 'null'): 
2229         elif v
.startswith('/*') or v
.startswith('//') or v 
== ',': 
2232         if v
[0] in ("'", '"'): 
2233             v 
= re
.sub(r
'(?s)\\.|"', lambda m
: { 
2238             }.get(m
.group(0), m
.group(0)), v
[1:-1]) 
2240         for regex
, base 
in INTEGER_TABLE
: 
2241             im 
= re
.match(regex
, v
) 
2243                 i 
= int(im
.group(1), base
) 
2244                 return '"%d":' % i 
if v
.endswith(':') else '%d' % i
 
2248     return re
.sub(r
'''(?sx) 
2249         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 
2250         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 
2251         {comment}|,(?={skip}[\]}}])| 
2252         [a-zA-Z_][.a-zA-Z_0-9]*| 
2253         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| 
2255         '''.format(comment
=COMMENT_RE
, skip
=SKIP_RE
), fix_kv
, code
) 
2258 def qualities(quality_ids
): 
2259     """ Get a numeric quality value out of a list of possible values """ 
2262             return quality_ids
.index(qid
) 
2268 DEFAULT_OUTTMPL 
= '%(title)s-%(id)s.%(ext)s' 
2271 def limit_length(s
, length
): 
2272     """ Add ellipses to overly long strings """ 
2277         return s
[:length 
- len(ELLIPSES
)] + ELLIPSES
 
2281 def version_tuple(v
): 
2282     return tuple(int(e
) for e 
in re
.split(r
'[-.]', v
)) 
2285 def is_outdated_version(version
, limit
, assume_new
=True): 
2287         return not assume_new
 
2289         return version_tuple(version
) < version_tuple(limit
) 
2291         return not assume_new
 
2294 def ytdl_is_updateable(): 
2295     """ Returns if youtube-dl can be updated with -U """ 
2296     from zipimport 
import zipimporter
 
2298     return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen') 
2301 def args_to_str(args
): 
2302     # Get a short string representation for a subprocess command 
2303     return ' '.join(compat_shlex_quote(a
) for a 
in args
) 
2306 def error_to_compat_str(err
): 
2308     # On python 2 error byte string must be decoded with proper 
2309     # encoding rather than ascii 
2310     if sys
.version_info
[0] < 3: 
2311         err_str 
= err_str
.decode(preferredencoding()) 
2315 def mimetype2ext(mt
): 
2321         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 
2322         # it's the most popular one 
2323         'audio/mpeg': 'mp3', 
2328     _
, _
, res 
= mt
.rpartition('/') 
2329     res 
= res
.split(';')[0].strip().lower() 
2333         'smptett+xml': 'tt', 
2337         'x-mp4-fragmented': 'mp4', 
2340         'x-mpegurl': 'm3u8', 
2341         'vnd.apple.mpegurl': 'm3u8', 
2345         'vnd.ms-sstr+xml': 'ism', 
2351 def parse_codecs(codecs_str
): 
2352     # http://tools.ietf.org/html/rfc6381 
2355     splited_codecs 
= list(filter(None, map( 
2356         lambda str: str.strip(), codecs_str
.strip().strip(',').split(',')))) 
2357     vcodec
, acodec 
= None, None 
2358     for full_codec 
in splited_codecs
: 
2359         codec 
= full_codec
.split('.')[0] 
2360         if codec 
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): 
2363         elif codec 
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): 
2367             write_string('WARNING: Unknown codec %s\n' % full_codec
, sys
.stderr
) 
2368     if not vcodec 
and not acodec
: 
2369         if len(splited_codecs
) == 2: 
2374         elif len(splited_codecs
) == 1: 
2381             'vcodec': vcodec 
or 'none', 
2382             'acodec': acodec 
or 'none', 
2387 def urlhandle_detect_ext(url_handle
): 
2388     getheader 
= url_handle
.headers
.get
 
2390     cd 
= getheader('Content-Disposition') 
2392         m 
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
) 
2394             e 
= determine_ext(m
.group('filename'), default_ext
=None) 
2398     return mimetype2ext(getheader('Content-Type')) 
2401 def encode_data_uri(data
, mime_type
): 
2402     return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii')) 
2405 def age_restricted(content_limit
, age_limit
): 
2406     """ Returns True iff the content should be blocked """ 
2408     if age_limit 
is None:  # No limit set 
2410     if content_limit 
is None: 
2411         return False  # Content available for everyone 
2412     return age_limit 
< content_limit
 
2415 def is_html(first_bytes
): 
2416     """ Detect whether a file contains HTML by examining its first bytes. """ 
2419         (b
'\xef\xbb\xbf', 'utf-8'), 
2420         (b
'\x00\x00\xfe\xff', 'utf-32-be'), 
2421         (b
'\xff\xfe\x00\x00', 'utf-32-le'), 
2422         (b
'\xff\xfe', 'utf-16-le'), 
2423         (b
'\xfe\xff', 'utf-16-be'), 
2425     for bom
, enc 
in BOMS
: 
2426         if first_bytes
.startswith(bom
): 
2427             s 
= first_bytes
[len(bom
):].decode(enc
, 'replace') 
2430         s 
= first_bytes
.decode('utf-8', 'replace') 
2432     return re
.match(r
'^\s*<', s
) 
2435 def determine_protocol(info_dict
): 
2436     protocol 
= info_dict
.get('protocol') 
2437     if protocol 
is not None: 
2440     url 
= info_dict
['url'] 
2441     if url
.startswith('rtmp'): 
2443     elif url
.startswith('mms'): 
2445     elif url
.startswith('rtsp'): 
2448     ext 
= determine_ext(url
) 
2454     return compat_urllib_parse_urlparse(url
).scheme
 
2457 def render_table(header_row
, data
): 
2458     """ Render a list of rows, each as a list of values """ 
2459     table 
= [header_row
] + data
 
2460     max_lens 
= [max(len(compat_str(v
)) for v 
in col
) for col 
in zip(*table
)] 
2461     format_str 
= ' '.join('%-' + compat_str(ml 
+ 1) + 's' for ml 
in max_lens
[:-1]) + '%s' 
2462     return '\n'.join(format_str 
% tuple(row
) for row 
in table
) 
2465 def _match_one(filter_part
, dct
): 
2466     COMPARISON_OPERATORS 
= { 
2474     operator_rex 
= re
.compile(r
'''(?x)\s* 
2476         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
2478             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
2479             (?P<quote>["\'])(?P
<quotedstrval
>(?
:\\.|
(?
!(?P
=quote
)|
\\).)+?
)(?P
=quote
)|
 
2480             (?P
<strval
>(?
![0-9.])[a
-z0
-9A
-Z
]*) 
2483         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) 
2484     m = operator_rex.search(filter_part) 
2486         op = COMPARISON_OPERATORS[m.group('op')] 
2487         actual_value = dct.get(m.group('key')) 
2488         if (m.group('quotedstrval') is not None or 
2489             m.group('strval') is not None or 
2490             # If the original field is a string and matching comparisonvalue is 
2491             # a number we should respect the origin of the original field 
2492             # and process comparison value as a string (see 
2493             # https://github.com/rg3/youtube-dl/issues/11082). 
2494             actual_value is not None and m.group('intval') is not None and 
2495                 isinstance(actual_value, compat_str)): 
2496             if m.group('op') not in ('=', '!='): 
2498                     'Operator %s does not support string values!' % m.group('op')) 
2499             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') 
2500             quote = m.group('quote') 
2501             if quote is not None: 
2502                 comparison_value = comparison_value.replace(r'\%s' % quote, quote) 
2505                 comparison_value = int(m.group('intval')) 
2507                 comparison_value = parse_filesize(m.group('intval')) 
2508                 if comparison_value is None: 
2509                     comparison_value = parse_filesize(m.group('intval') + 'B') 
2510                 if comparison_value is None: 
2512                         'Invalid integer value %r in filter part %r' % ( 
2513                             m.group('intval'), filter_part)) 
2514         if actual_value is None: 
2515             return m.group('none_inclusive') 
2516         return op(actual_value, comparison_value) 
2519         '': lambda v: v is not None, 
2520         '!': lambda v: v is None, 
2522     operator_rex = re.compile(r'''(?x
)\s
* 
2523         (?P
<op
>%s)\s
*(?P
<key
>[a
-z_
]+) 
2525         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) 
2526     m = operator_rex.search(filter_part) 
2528         op = UNARY_OPERATORS[m.group('op')] 
2529         actual_value = dct.get(m.group('key')) 
2530         return op(actual_value) 
2532     raise ValueError('Invalid filter part %r' % filter_part) 
2535 def match_str(filter_str, dct): 
2536     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
2539         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) 
2542 def match_filter_func(filter_str): 
2543     def _match_func(info_dict): 
2544         if match_str(filter_str, info_dict): 
2547             video_title = info_dict.get('title', info_dict.get('id', 'video')) 
2548             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) 
2552 def parse_dfxp_time_expr(time_expr): 
2556     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) 
2558         return float(mobj.group('time_offset')) 
2560     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr) 
2562         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.')) 
2565 def srt_subtitles_timecode(seconds): 
2566     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) 
2569 def dfxp2srt(dfxp_data): 
2570     LEGACY_NAMESPACES = ( 
2571         ('http://www.w3.org/ns/ttml', [ 
2572             'http://www.w3.org/2004/11/ttaf1', 
2573             'http://www.w3.org/2006/04/ttaf1', 
2574             'http://www.w3.org/2006/10/ttaf1', 
2576         ('http://www.w3.org/ns/ttml#styling', [ 
2577             'http://www.w3.org/ns/ttml#style', 
2581     SUPPORTED_STYLING = [ 
2590     _x = functools.partial(xpath_with_ns, ns_map={ 
2591         'ttml': 'http://www.w3.org/ns/ttml', 
2592         'tts': 'http://www.w3.org/ns/ttml#styling', 
2598     class TTMLPElementParser(object): 
2600         _unclosed_elements = [] 
2601         _applied_styles = [] 
2603         def start(self, tag, attrib): 
2604             if tag in (_x('ttml:br'), 'br'): 
2607                 unclosed_elements = [] 
2609                 element_style_id = attrib.get('style') 
2611                     style.update(default_style) 
2612                 if element_style_id: 
2613                     style.update(styles.get(element_style_id, {})) 
2614                 for prop in SUPPORTED_STYLING: 
2615                     prop_val = attrib.get(_x('tts:' + prop)) 
2617                         style[prop] = prop_val 
2620                     for k, v in sorted(style.items()): 
2621                         if self._applied_styles and self._applied_styles[-1].get(k) == v: 
2624                             font += ' color="%s"' % v 
2625                         elif k == 'fontSize': 
2626                             font += ' size="%s"' % v 
2627                         elif k == 'fontFamily': 
2628                             font += ' face="%s"' % v 
2629                         elif k == 'fontWeight' and v == 'bold': 
2631                             unclosed_elements.append('b') 
2632                         elif k == 'fontStyle' and v == 'italic': 
2634                             unclosed_elements.append('i') 
2635                         elif k == 'textDecoration' and v == 'underline': 
2637                             unclosed_elements.append('u') 
2639                         self._out += '<font' + font + '>' 
2640                         unclosed_elements.append('font') 
2642                     if self._applied_styles: 
2643                         applied_style.update(self._applied_styles[-1]) 
2644                     applied_style.update(style) 
2645                     self._applied_styles.append(applied_style) 
2646                 self._unclosed_elements.append(unclosed_elements) 
2649             if tag not in (_x('ttml:br'), 'br'): 
2650                 unclosed_elements = self._unclosed_elements.pop() 
2651                 for element in reversed(unclosed_elements): 
2652                     self._out += '</%s>' % element 
2653                 if unclosed_elements and self._applied_styles: 
2654                     self._applied_styles.pop() 
2656         def data(self, data): 
2660             return self._out.strip() 
2662     def parse_node(node): 
2663         target = TTMLPElementParser() 
2664         parser = xml.etree.ElementTree.XMLParser(target=target) 
2665         parser.feed(xml.etree.ElementTree.tostring(node)) 
2666         return parser.close() 
2668     for k, v in LEGACY_NAMESPACES: 
2670             dfxp_data = dfxp_data.replace(ns, k) 
2672     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) 
2674     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') 
2677         raise ValueError('Invalid dfxp/TTML subtitle') 
2681         for style in dfxp.findall(_x('.//ttml:style')): 
2682             style_id = style.get('id') 
2683             parent_style_id = style.get('style') 
2685                 if parent_style_id not in styles: 
2688                 styles[style_id] = styles[parent_style_id].copy() 
2689             for prop in SUPPORTED_STYLING: 
2690                 prop_val = style.get(_x('tts:' + prop)) 
2692                     styles.setdefault(style_id, {})[prop] = prop_val 
2698     for p in ('body', 'div'): 
2699         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p]) 
2702         style = styles.get(ele.get('style')) 
2705         default_style.update(style) 
2707     for para, index in zip(paras, itertools.count(1)): 
2708         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) 
2709         end_time = parse_dfxp_time_expr(para.attrib.get('end')) 
2710         dur = parse_dfxp_time_expr(para.attrib.get('dur')) 
2711         if begin_time is None: 
2716             end_time = begin_time + dur 
2717         out.append('%d\n%s --> %s\n%s\n\n' % ( 
2719             srt_subtitles_timecode(begin_time), 
2720             srt_subtitles_timecode(end_time), 
2726 def cli_option(params, command_option, param): 
2727     param = params.get(param) 
2729         param = compat_str(param) 
2730     return [command_option, param] if param is not None else [] 
2733 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): 
2734     param = params.get(param) 
2735     assert isinstance(param, bool) 
2737         return [command_option + separator + (true_value if param else false_value)] 
2738     return [command_option, true_value if param else false_value] 
2741 def cli_valueless_option(params, command_option, param, expected_value=True): 
2742     param = params.get(param) 
2743     return [command_option] if param == expected_value else [] 
2746 def cli_configuration_args(params, param, default=[]): 
2747     ex_args = params.get(param) 
2750     assert isinstance(ex_args, list) 
2754 class ISO639Utils(object): 
2755     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 
2944     def short2long(cls, code): 
2945         """Convert language code from ISO 639-1 to ISO 639-2/T""" 
2946         return cls._lang_map.get(code[:2]) 
2949     def long2short(cls, code): 
2950         """Convert language code from ISO 639-2/T to ISO 639-1""" 
2951         for short_name, long_name in cls._lang_map.items(): 
2952             if long_name == code: 
2956 class ISO3166Utils(object): 
2957     # From http://data.okfn.org/data/core/country-list 
2959         'AF': 'Afghanistan', 
2960         'AX': 'Åland Islands', 
2963         'AS': 'American Samoa', 
2968         'AG': 'Antigua and Barbuda', 
2985         'BO': 'Bolivia, Plurinational State of', 
2986         'BQ': 'Bonaire, Sint Eustatius and Saba', 
2987         'BA': 'Bosnia and Herzegovina', 
2989         'BV': 'Bouvet Island', 
2991         'IO': 'British Indian Ocean Territory', 
2992         'BN': 'Brunei Darussalam', 
2994         'BF': 'Burkina Faso', 
3000         'KY': 'Cayman Islands', 
3001         'CF': 'Central African Republic', 
3005         'CX': 'Christmas Island', 
3006         'CC': 'Cocos (Keeling) Islands', 
3010         'CD': 'Congo, the Democratic Republic of the', 
3011         'CK': 'Cook Islands', 
3013         'CI': 'Côte d\'Ivoire', 
3018         'CZ': 'Czech Republic', 
3022         'DO': 'Dominican Republic', 
3025         'SV': 'El Salvador', 
3026         'GQ': 'Equatorial Guinea', 
3030         'FK': 'Falkland Islands (Malvinas)', 
3031         'FO': 'Faroe Islands', 
3035         'GF': 'French Guiana', 
3036         'PF': 'French Polynesia', 
3037         'TF': 'French Southern Territories', 
3052         'GW': 'Guinea-Bissau', 
3055         'HM': 'Heard Island and McDonald Islands', 
3056         'VA': 'Holy See (Vatican City State)', 
3063         'IR': 'Iran, Islamic Republic of', 
3066         'IM': 'Isle of Man', 
3076         'KP': 'Korea, Democratic People\'s Republic of', 
3077         'KR': 'Korea, Republic of', 
3080         'LA': 'Lao People\'s Democratic Republic', 
3086         'LI': 'Liechtenstein', 
3090         'MK': 'Macedonia, the Former Yugoslav Republic of', 
3097         'MH': 'Marshall Islands', 
3103         'FM': 'Micronesia, Federated States of', 
3104         'MD': 'Moldova, Republic of', 
3115         'NL': 'Netherlands', 
3116         'NC': 'New Caledonia', 
3117         'NZ': 'New Zealand', 
3122         'NF': 'Norfolk Island', 
3123         'MP': 'Northern Mariana Islands', 
3128         'PS': 'Palestine, State of', 
3130         'PG': 'Papua New Guinea', 
3133         'PH': 'Philippines', 
3137         'PR': 'Puerto Rico', 
3141         'RU': 'Russian Federation', 
3143         'BL': 'Saint Barthélemy', 
3144         'SH': 'Saint Helena, Ascension and Tristan da Cunha', 
3145         'KN': 'Saint Kitts and Nevis', 
3146         'LC': 'Saint Lucia', 
3147         'MF': 'Saint Martin (French part)', 
3148         'PM': 'Saint Pierre and Miquelon', 
3149         'VC': 'Saint Vincent and the Grenadines', 
3152         'ST': 'Sao Tome and Principe', 
3153         'SA': 'Saudi Arabia', 
3157         'SL': 'Sierra Leone', 
3159         'SX': 'Sint Maarten (Dutch part)', 
3162         'SB': 'Solomon Islands', 
3164         'ZA': 'South Africa', 
3165         'GS': 'South Georgia and the South Sandwich Islands', 
3166         'SS': 'South Sudan', 
3171         'SJ': 'Svalbard and Jan Mayen', 
3174         'CH': 'Switzerland', 
3175         'SY': 'Syrian Arab Republic', 
3176         'TW': 'Taiwan, Province of China', 
3178         'TZ': 'Tanzania, United Republic of', 
3180         'TL': 'Timor-Leste', 
3184         'TT': 'Trinidad and Tobago', 
3187         'TM': 'Turkmenistan', 
3188         'TC': 'Turks and Caicos Islands', 
3192         'AE': 'United Arab Emirates', 
3193         'GB': 'United Kingdom', 
3194         'US': 'United States', 
3195         'UM': 'United States Minor Outlying Islands', 
3199         'VE': 'Venezuela, Bolivarian Republic of', 
3201         'VG': 'Virgin Islands, British', 
3202         'VI': 'Virgin Islands, U.S.', 
3203         'WF': 'Wallis and Futuna', 
3204         'EH': 'Western Sahara', 
3211     def short2full(cls, code): 
3212         """Convert an ISO 3166-2 country code to the corresponding full name""" 
3213         return cls._country_map.get(code.upper()) 
3216 class GeoUtils(object): 
3217     # Major IPv4 address blocks per country 
3219         'AD': '85.94.160.0/19', 
3220         'AE': '94.200.0.0/13', 
3221         'AF': '149.54.0.0/17', 
3222         'AG': '209.59.64.0/18', 
3223         'AI': '204.14.248.0/21', 
3224         'AL': '46.99.0.0/16', 
3225         'AM': '46.70.0.0/15', 
3226         'AO': '105.168.0.0/13', 
3227         'AP': '159.117.192.0/21', 
3228         'AR': '181.0.0.0/12', 
3229         'AS': '202.70.112.0/20', 
3230         'AT': '84.112.0.0/13', 
3231         'AU': '1.128.0.0/11', 
3232         'AW': '181.41.0.0/18', 
3233         'AZ': '5.191.0.0/16', 
3234         'BA': '31.176.128.0/17', 
3235         'BB': '65.48.128.0/17', 
3236         'BD': '114.130.0.0/16', 
3238         'BF': '129.45.128.0/17', 
3239         'BG': '95.42.0.0/15', 
3240         'BH': '37.131.0.0/17', 
3241         'BI': '154.117.192.0/18', 
3242         'BJ': '137.255.0.0/16', 
3243         'BL': '192.131.134.0/24', 
3244         'BM': '196.12.64.0/18', 
3245         'BN': '156.31.0.0/16', 
3246         'BO': '161.56.0.0/16', 
3247         'BQ': '161.0.80.0/20', 
3248         'BR': '152.240.0.0/12', 
3249         'BS': '24.51.64.0/18', 
3250         'BT': '119.2.96.0/19', 
3251         'BW': '168.167.0.0/16', 
3252         'BY': '178.120.0.0/13', 
3253         'BZ': '179.42.192.0/18', 
3254         'CA': '99.224.0.0/11', 
3255         'CD': '41.243.0.0/16', 
3256         'CF': '196.32.200.0/21', 
3257         'CG': '197.214.128.0/17', 
3258         'CH': '85.0.0.0/13', 
3259         'CI': '154.232.0.0/14', 
3260         'CK': '202.65.32.0/19', 
3261         'CL': '152.172.0.0/14', 
3262         'CM': '165.210.0.0/15', 
3263         'CN': '36.128.0.0/10', 
3264         'CO': '181.240.0.0/12', 
3265         'CR': '201.192.0.0/12', 
3266         'CU': '152.206.0.0/15', 
3267         'CV': '165.90.96.0/19', 
3268         'CW': '190.88.128.0/17', 
3269         'CY': '46.198.0.0/15', 
3270         'CZ': '88.100.0.0/14', 
3272         'DJ': '197.241.0.0/17', 
3273         'DK': '87.48.0.0/12', 
3274         'DM': '192.243.48.0/20', 
3275         'DO': '152.166.0.0/15', 
3276         'DZ': '41.96.0.0/12', 
3277         'EC': '186.68.0.0/15', 
3278         'EE': '90.190.0.0/15', 
3279         'EG': '156.160.0.0/11', 
3280         'ER': '196.200.96.0/20', 
3281         'ES': '88.0.0.0/11', 
3282         'ET': '196.188.0.0/14', 
3283         'EU': '2.16.0.0/13', 
3284         'FI': '91.152.0.0/13', 
3285         'FJ': '144.120.0.0/16', 
3286         'FM': '119.252.112.0/20', 
3287         'FO': '88.85.32.0/19', 
3289         'GA': '41.158.0.0/15', 
3291         'GD': '74.122.88.0/21', 
3292         'GE': '31.146.0.0/16', 
3293         'GF': '161.22.64.0/18', 
3294         'GG': '62.68.160.0/19', 
3295         'GH': '45.208.0.0/14', 
3296         'GI': '85.115.128.0/19', 
3297         'GL': '88.83.0.0/19', 
3298         'GM': '160.182.0.0/15', 
3299         'GN': '197.149.192.0/18', 
3300         'GP': '104.250.0.0/19', 
3301         'GQ': '105.235.224.0/20', 
3302         'GR': '94.64.0.0/13', 
3303         'GT': '168.234.0.0/16', 
3304         'GU': '168.123.0.0/16', 
3305         'GW': '197.214.80.0/20', 
3306         'GY': '181.41.64.0/18', 
3307         'HK': '113.252.0.0/14', 
3308         'HN': '181.210.0.0/16', 
3309         'HR': '93.136.0.0/13', 
3310         'HT': '148.102.128.0/17', 
3311         'HU': '84.0.0.0/14', 
3312         'ID': '39.192.0.0/10', 
3313         'IE': '87.32.0.0/12', 
3314         'IL': '79.176.0.0/13', 
3315         'IM': '5.62.80.0/20', 
3316         'IN': '117.192.0.0/10', 
3317         'IO': '203.83.48.0/21', 
3318         'IQ': '37.236.0.0/14', 
3319         'IR': '2.176.0.0/12', 
3320         'IS': '82.221.0.0/16', 
3321         'IT': '79.0.0.0/10', 
3322         'JE': '87.244.64.0/18', 
3323         'JM': '72.27.0.0/17', 
3324         'JO': '176.29.0.0/16', 
3325         'JP': '126.0.0.0/8', 
3326         'KE': '105.48.0.0/12', 
3327         'KG': '158.181.128.0/17', 
3328         'KH': '36.37.128.0/17', 
3329         'KI': '103.25.140.0/22', 
3330         'KM': '197.255.224.0/20', 
3331         'KN': '198.32.32.0/19', 
3332         'KP': '175.45.176.0/22', 
3333         'KR': '175.192.0.0/10', 
3334         'KW': '37.36.0.0/14', 
3335         'KY': '64.96.0.0/15', 
3336         'KZ': '2.72.0.0/13', 
3337         'LA': '115.84.64.0/18', 
3338         'LB': '178.135.0.0/16', 
3339         'LC': '192.147.231.0/24', 
3340         'LI': '82.117.0.0/19', 
3341         'LK': '112.134.0.0/15', 
3342         'LR': '41.86.0.0/19', 
3343         'LS': '129.232.0.0/17', 
3344         'LT': '78.56.0.0/13', 
3345         'LU': '188.42.0.0/16', 
3346         'LV': '46.109.0.0/16', 
3347         'LY': '41.252.0.0/14', 
3348         'MA': '105.128.0.0/11', 
3349         'MC': '88.209.64.0/18', 
3350         'MD': '37.246.0.0/16', 
3351         'ME': '178.175.0.0/17', 
3352         'MF': '74.112.232.0/21', 
3353         'MG': '154.126.0.0/17', 
3354         'MH': '117.103.88.0/21', 
3355         'MK': '77.28.0.0/15', 
3356         'ML': '154.118.128.0/18', 
3357         'MM': '37.111.0.0/17', 
3358         'MN': '49.0.128.0/17', 
3359         'MO': '60.246.0.0/16', 
3360         'MP': '202.88.64.0/20', 
3361         'MQ': '109.203.224.0/19', 
3362         'MR': '41.188.64.0/18', 
3363         'MS': '208.90.112.0/22', 
3364         'MT': '46.11.0.0/16', 
3365         'MU': '105.16.0.0/12', 
3366         'MV': '27.114.128.0/18', 
3367         'MW': '105.234.0.0/16', 
3368         'MX': '187.192.0.0/11', 
3369         'MY': '175.136.0.0/13', 
3370         'MZ': '197.218.0.0/15', 
3371         'NA': '41.182.0.0/16', 
3372         'NC': '101.101.0.0/18', 
3373         'NE': '197.214.0.0/18', 
3374         'NF': '203.17.240.0/22', 
3375         'NG': '105.112.0.0/12', 
3376         'NI': '186.76.0.0/15', 
3377         'NL': '145.96.0.0/11', 
3378         'NO': '84.208.0.0/13', 
3379         'NP': '36.252.0.0/15', 
3380         'NR': '203.98.224.0/19', 
3381         'NU': '49.156.48.0/22', 
3382         'NZ': '49.224.0.0/14', 
3383         'OM': '5.36.0.0/15', 
3384         'PA': '186.72.0.0/15', 
3385         'PE': '186.160.0.0/14', 
3386         'PF': '123.50.64.0/18', 
3387         'PG': '124.240.192.0/19', 
3388         'PH': '49.144.0.0/13', 
3389         'PK': '39.32.0.0/11', 
3390         'PL': '83.0.0.0/11', 
3391         'PM': '70.36.0.0/20', 
3392         'PR': '66.50.0.0/16', 
3393         'PS': '188.161.0.0/16', 
3394         'PT': '85.240.0.0/13', 
3395         'PW': '202.124.224.0/20', 
3396         'PY': '181.120.0.0/14', 
3397         'QA': '37.210.0.0/15', 
3398         'RE': '139.26.0.0/16', 
3399         'RO': '79.112.0.0/13', 
3400         'RS': '178.220.0.0/14', 
3401         'RU': '5.136.0.0/13', 
3402         'RW': '105.178.0.0/15', 
3403         'SA': '188.48.0.0/13', 
3404         'SB': '202.1.160.0/19', 
3405         'SC': '154.192.0.0/11', 
3406         'SD': '154.96.0.0/13', 
3407         'SE': '78.64.0.0/12', 
3408         'SG': '152.56.0.0/14', 
3409         'SI': '188.196.0.0/14', 
3410         'SK': '78.98.0.0/15', 
3411         'SL': '197.215.0.0/17', 
3412         'SM': '89.186.32.0/19', 
3413         'SN': '41.82.0.0/15', 
3414         'SO': '197.220.64.0/19', 
3415         'SR': '186.179.128.0/17', 
3416         'SS': '105.235.208.0/21', 
3417         'ST': '197.159.160.0/19', 
3418         'SV': '168.243.0.0/16', 
3419         'SX': '190.102.0.0/20', 
3421         'SZ': '41.84.224.0/19', 
3422         'TC': '65.255.48.0/20', 
3423         'TD': '154.68.128.0/19', 
3424         'TG': '196.168.0.0/14', 
3425         'TH': '171.96.0.0/13', 
3426         'TJ': '85.9.128.0/18', 
3427         'TK': '27.96.24.0/21', 
3428         'TL': '180.189.160.0/20', 
3429         'TM': '95.85.96.0/19', 
3430         'TN': '197.0.0.0/11', 
3431         'TO': '175.176.144.0/21', 
3432         'TR': '78.160.0.0/11', 
3433         'TT': '186.44.0.0/15', 
3434         'TV': '202.2.96.0/19', 
3435         'TW': '120.96.0.0/11', 
3436         'TZ': '156.156.0.0/14', 
3437         'UA': '93.72.0.0/13', 
3438         'UG': '154.224.0.0/13', 
3440         'UY': '167.56.0.0/13', 
3441         'UZ': '82.215.64.0/18', 
3442         'VA': '212.77.0.0/19', 
3443         'VC': '24.92.144.0/20', 
3444         'VE': '186.88.0.0/13', 
3445         'VG': '172.103.64.0/18', 
3446         'VI': '146.226.0.0/16', 
3447         'VN': '14.160.0.0/11', 
3448         'VU': '202.80.32.0/20', 
3449         'WF': '117.20.32.0/21', 
3450         'WS': '202.4.32.0/19', 
3451         'YE': '134.35.0.0/16', 
3452         'YT': '41.242.116.0/22', 
3453         'ZA': '41.0.0.0/11', 
3454         'ZM': '165.56.0.0/13', 
3455         'ZW': '41.85.192.0/19', 
3459     def random_ipv4(cls, code): 
3460         block = cls._country_ip_map.get(code.upper()) 
3463         addr, preflen = block.split('/') 
3464         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] 
3465         addr_max = addr_min | (0xffffffff >> int(preflen)) 
3466         return compat_str(socket.inet_ntoa( 
3467             compat_struct_pack('!L', random.randint(addr_min, addr_max)))) 
3470 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): 
3471     def __init__(self, proxies=None): 
3472         # Set default handlers 
3473         for type in ('http', 'https'): 
3474             setattr(self, '%s_open' % type, 
3475                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: 
3476                         meth(r, proxy, type)) 
3477         return compat_urllib_request.ProxyHandler.__init__(self, proxies) 
3479     def proxy_open(self, req, proxy, type): 
3480         req_proxy = req.headers.get('Ytdl-request-proxy') 
3481         if req_proxy is not None: 
3483             del req.headers['Ytdl-request-proxy'] 
3485         if proxy == '__noproxy__': 
3486             return None  # No Proxy 
3487         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 
3488             req.add_header('Ytdl-socks-proxy', proxy) 
3489             # youtube-dl's http/https handlers do wrapping the socket with socks 
3491         return compat_urllib_request.ProxyHandler.proxy_open( 
3492             self, req, proxy, type) 
3495 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is 
3496 # released into Public Domain 
3497 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 
3499 def long_to_bytes(n, blocksize=0): 
3500     """long_to_bytes(n:long, blocksize:int) : string 
3501     Convert a long integer to a byte string. 
3503     If optional blocksize is given and greater than zero, pad the front of the 
3504     byte string with binary zeros so that the length is a multiple of 
3507     # after much testing, this algorithm was deemed to be the fastest 
3511         s = compat_struct_pack('>I', n & 0xffffffff) + s 
3513     # strip off leading zeros 
3514     for i in range(len(s)): 
3515         if s[i] != b'\000'[0]: 
3518         # only happens when n == 0 
3522     # add back some pad bytes.  this could be done more efficiently w.r.t. the 
3523     # de-padding being done above, but sigh... 
3524     if blocksize > 0 and len(s) % blocksize: 
3525         s = (blocksize - len(s) % blocksize) * b'\000' + s 
3529 def bytes_to_long(s): 
3530     """bytes_to_long(string) : long 
3531     Convert a byte string to a long integer. 
3533     This is (essentially) the inverse of long_to_bytes(). 
3538         extra = (4 - length % 4) 
3539         s = b'\000' * extra + s 
3540         length = length + extra 
3541     for i in range(0, length, 4): 
3542         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0] 
3546 def ohdave_rsa_encrypt(data, exponent, modulus): 
3548     Implement OHDave
's RSA algorithm. See http://www.ohdave.com/rsa/ 
3551         data: data to encrypt, bytes-like object 
3552         exponent, modulus: parameter e and N of RSA algorithm, both integer 
3553     Output: hex string of encrypted data 
3555     Limitation: supports one block encryption only 
3558     payload = int(binascii.hexlify(data[::-1]), 16) 
3559     encrypted = pow(payload, exponent, modulus) 
3560     return '%x' % encrypted 
3563 def pkcs1pad(data, length): 
3565     Padding input data with PKCS#1 scheme 
3567     @param {int[]} data        input data 
3568     @param {int}   length      target length 
3569     @returns {int[]}           padded data 
3571     if len(data) > length - 11: 
3572         raise ValueError('Input data too 
long for PKCS
#1 padding') 
3574     pseudo_random 
= [random
.randint(0, 254) for _ 
in range(length 
- len(data
) - 3)] 
3575     return [0, 2] + pseudo_random 
+ [0] + data
 
3578 def encode_base_n(num
, n
, table
=None): 
3579     FULL_TABLE 
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 
3581         table 
= FULL_TABLE
[:n
] 
3584         raise ValueError('base %d exceeds table length %d' % (n
, len(table
))) 
3591         ret 
= table
[num 
% n
] + ret
 
3596 def decode_packed_codes(code
): 
3597     mobj 
= re
.search(PACKED_CODES_RE
, code
) 
3598     obfucasted_code
, base
, count
, symbols 
= mobj
.groups() 
3601     symbols 
= symbols
.split('|') 
3606         base_n_count 
= encode_base_n(count
, base
) 
3607         symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
 
3610         r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)], 
3614 def parse_m3u8_attributes(attrib
): 
3616     for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
): 
3617         if val
.startswith('"'): 
3623 def urshift(val
, n
): 
3624     return val 
>> n 
if val 
>= 0 else (val 
+ 0x100000000) >> n
 
3627 # Based on png2str() written by @gdkchan and improved by @yokrysty 
3628 # Originally posted at https://github.com/rg3/youtube-dl/issues/9706 
3629 def decode_png(png_data
): 
3630     # Reference: https://www.w3.org/TR/PNG/ 
3631     header 
= png_data
[8:] 
3633     if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR': 
3634         raise IOError('Not a valid PNG file.') 
3636     int_map 
= {1: '>B', 2: '>H', 4: '>I'} 
3637     unpack_integer 
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0] 
3642         length 
= unpack_integer(header
[:4]) 
3645         chunk_type 
= header
[:4] 
3648         chunk_data 
= header
[:length
] 
3649         header 
= header
[length
:] 
3651         header 
= header
[4:]  # Skip CRC 
3659     ihdr 
= chunks
[0]['data'] 
3661     width 
= unpack_integer(ihdr
[:4]) 
3662     height 
= unpack_integer(ihdr
[4:8]) 
3666     for chunk 
in chunks
: 
3667         if chunk
['type'] == b
'IDAT': 
3668             idat 
+= chunk
['data'] 
3671         raise IOError('Unable to read PNG data.') 
3673     decompressed_data 
= bytearray(zlib
.decompress(idat
)) 
3678     def _get_pixel(idx
): 
3683     for y 
in range(height
): 
3684         basePos 
= y 
* (1 + stride
) 
3685         filter_type 
= decompressed_data
[basePos
] 
3689         pixels
.append(current_row
) 
3691         for x 
in range(stride
): 
3692             color 
= decompressed_data
[1 + basePos 
+ x
] 
3693             basex 
= y 
* stride 
+ x
 
3698                 left 
= _get_pixel(basex 
- 3) 
3700                 up 
= _get_pixel(basex 
- stride
) 
3702             if filter_type 
== 1:  # Sub 
3703                 color 
= (color 
+ left
) & 0xff 
3704             elif filter_type 
== 2:  # Up 
3705                 color 
= (color 
+ up
) & 0xff 
3706             elif filter_type 
== 3:  # Average 
3707                 color 
= (color 
+ ((left 
+ up
) >> 1)) & 0xff 
3708             elif filter_type 
== 4:  # Paeth 
3714                     c 
= _get_pixel(basex 
- stride 
- 3) 
3722                 if pa 
<= pb 
and pa 
<= pc
: 
3723                     color 
= (color 
+ a
) & 0xff 
3725                     color 
= (color 
+ b
) & 0xff 
3727                     color 
= (color 
+ c
) & 0xff 
3729             current_row
.append(color
) 
3731     return width
, height
, pixels
 
3734 def write_xattr(path
, key
, value
): 
3735     # This mess below finds the best xattr tool for the job 
3737         # try the pyxattr module... 
3740         if hasattr(xattr
, 'set'):  # pyxattr 
3741             # Unicode arguments are not supported in python-pyxattr until 
3743             # See https://github.com/rg3/youtube-dl/issues/5498 
3744             pyxattr_required_version 
= '0.5.0' 
3745             if version_tuple(xattr
.__version
__) < version_tuple(pyxattr_required_version
): 
3746                 # TODO: fallback to CLI tools 
3747                 raise XAttrUnavailableError( 
3748                     'python-pyxattr is detected but is too old. ' 
3749                     'youtube-dl requires %s or above while your version is %s. ' 
3750                     'Falling back to other xattr implementations' % ( 
3751                         pyxattr_required_version
, xattr
.__version
__)) 
3753             setxattr 
= xattr
.set 
3755             setxattr 
= xattr
.setxattr
 
3758             setxattr(path
, key
, value
) 
3759         except EnvironmentError as e
: 
3760             raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3763         if compat_os_name 
== 'nt': 
3764             # Write xattrs to NTFS Alternate Data Streams: 
3765             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 
3766             assert ':' not in key
 
3767             assert os
.path
.exists(path
) 
3769             ads_fn 
= path 
+ ':' + key
 
3771                 with open(ads_fn
, 'wb') as f
: 
3773             except EnvironmentError as e
: 
3774                 raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3776             user_has_setfattr 
= check_executable('setfattr', ['--version']) 
3777             user_has_xattr 
= check_executable('xattr', ['-h']) 
3779             if user_has_setfattr 
or user_has_xattr
: 
3781                 value 
= value
.decode('utf-8') 
3782                 if user_has_setfattr
: 
3783                     executable 
= 'setfattr' 
3784                     opts 
= ['-n', key
, '-v', value
] 
3785                 elif user_has_xattr
: 
3786                     executable 
= 'xattr' 
3787                     opts 
= ['-w', key
, value
] 
3789                 cmd 
= ([encodeFilename(executable
, True)] + 
3790                        [encodeArgument(o
) for o 
in opts
] + 
3791                        [encodeFilename(path
, True)]) 
3794                     p 
= subprocess
.Popen( 
3795                         cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
) 
3796                 except EnvironmentError as e
: 
3797                     raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3798                 stdout
, stderr 
= p
.communicate() 
3799                 stderr 
= stderr
.decode('utf-8', 'replace') 
3800                 if p
.returncode 
!= 0: 
3801                     raise XAttrMetadataError(p
.returncode
, stderr
) 
3804                 # On Unix, and can't find pyxattr, setfattr, or xattr. 
3805                 if sys
.platform
.startswith('linux'): 
3806                     raise XAttrUnavailableError( 
3807                         "Couldn't find a tool to set the xattrs. " 
3808                         "Install either the python 'pyxattr' or 'xattr' " 
3809                         "modules, or the GNU 'attr' package " 
3810                         "(which contains the 'setfattr' tool).") 
3812                     raise XAttrUnavailableError( 
3813                         "Couldn't find a tool to set the xattrs. " 
3814                         "Install either the python 'xattr' module, " 
3815                         "or the 'xattr' binary.") 
3818 def random_birthday(year_field
, month_field
, day_field
): 
3820         year_field
: str(random
.randint(1950, 1995)), 
3821         month_field
: str(random
.randint(1, 12)), 
3822         day_field
: str(random
.randint(1, 31)),