4 from __future__ 
import unicode_literals
 
  34 import xml
.etree
.ElementTree
 
  41     compat_etree_fromstring
, 
  43     compat_html_entities_html5
, 
  49     compat_socket_create_connection
, 
  55     compat_urllib_parse_urlencode
, 
  56     compat_urllib_parse_urlparse
, 
  57     compat_urllib_parse_unquote_plus
, 
  58     compat_urllib_request
, 
  69 def register_socks_protocols(): 
  70     # "Register" SOCKS protocols 
  71     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 
  72     # URLs with protocols not in urlparse.uses_netloc are not handled correctly 
  73     for scheme 
in ('socks', 'socks4', 'socks4a', 'socks5'): 
  74         if scheme 
not in compat_urlparse
.uses_netloc
: 
  75             compat_urlparse
.uses_netloc
.append(scheme
) 
  78 # This is not clearly defined otherwise 
  79 compiled_regex_type 
= type(re
.compile('')) 
  82     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', 
  83     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  84     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  85     'Accept-Encoding': 'gzip, deflate', 
  86     'Accept-Language': 'en-us,en;q=0.5', 
  91     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', 
  97 ENGLISH_MONTH_NAMES 
= [ 
  98     'January', 'February', 'March', 'April', 'May', 'June', 
  99     'July', 'August', 'September', 'October', 'November', 'December'] 
 102     'en': ENGLISH_MONTH_NAMES
, 
 104         'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 
 105         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], 
 109     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 
 110     'flv', 'f4v', 'f4a', 'f4b', 
 111     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 
 112     'mkv', 'mka', 'mk3d', 
 121     'f4f', 'f4m', 'm3u8', 'smil') 
 123 # needed for sanitizing filenames in restricted mode 
 124 ACCENT_CHARS 
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', 
 125                         itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], 
 126                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 
 149     '%Y-%m-%d %H:%M:%S.%f', 
 152     '%Y-%m-%dT%H:%M:%SZ', 
 153     '%Y-%m-%dT%H:%M:%S.%fZ', 
 154     '%Y-%m-%dT%H:%M:%S.%f0Z', 
 156     '%Y-%m-%dT%H:%M:%S.%f', 
 159     '%b %d %Y at %H:%M:%S', 
 162 DATE_FORMATS_DAY_FIRST 
= list(DATE_FORMATS
) 
 163 DATE_FORMATS_DAY_FIRST
.extend([ 
 172 DATE_FORMATS_MONTH_FIRST 
= list(DATE_FORMATS
) 
 173 DATE_FORMATS_MONTH_FIRST
.extend([ 
 181 PACKED_CODES_RE 
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" 
 184 def preferredencoding(): 
 185     """Get preferred encoding. 
 187     Returns the best encoding scheme for the system, based on 
 188     locale.getpreferredencoding() and some further tweaks. 
 191         pref 
= locale
.getpreferredencoding() 
 199 def write_json_file(obj
, fn
): 
 200     """ Encode obj as JSON and write it to fn, atomically if possible """ 
 202     fn 
= encodeFilename(fn
) 
 203     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
 204         encoding 
= get_filesystem_encoding() 
 205         # os.path.basename returns a bytes object, but NamedTemporaryFile 
 206         # will fail if the filename contains non ascii characters unless we 
 207         # use a unicode object 
 208         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
 209         # the same for os.path.dirname 
 210         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
 212         path_basename 
= os
.path
.basename
 
 213         path_dirname 
= os
.path
.dirname
 
 217         'prefix': path_basename(fn
) + '.', 
 218         'dir': path_dirname(fn
), 
 222     # In Python 2.x, json.dump expects a bytestream. 
 223     # In Python 3.x, it writes to a character stream 
 224     if sys
.version_info 
< (3, 0): 
 232     tf 
= tempfile
.NamedTemporaryFile(**compat_kwargs(args
)) 
 237         if sys
.platform 
== 'win32': 
 238             # Need to remove existing file on Windows, else os.rename raises 
 239             # WindowsError or FileExistsError. 
 244         os
.rename(tf
.name
, fn
) 
 253 if sys
.version_info 
>= (2, 7): 
 254     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 255         """ Find the xpath xpath[@key=val] """ 
 256         assert re
.match(r
'^[a-zA-Z_-]+$', key
) 
 257         expr 
= xpath 
+ ('[@%s]' % key 
if val 
is None else "[@%s='%s']" % (key
, val
)) 
 258         return node
.find(expr
) 
 260     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 261         for f 
in node
.findall(compat_xpath(xpath
)): 
 262             if key 
not in f
.attrib
: 
 264             if val 
is None or f
.attrib
.get(key
) == val
: 
 268 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 269 # the namespace parameter 
 272 def xpath_with_ns(path
, ns_map
): 
 273     components 
= [c
.split(':') for c 
in path
.split('/')] 
 277             replaced
.append(c
[0]) 
 280             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 281     return '/'.join(replaced
) 
 284 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 285     def _find_xpath(xpath
): 
 286         return node
.find(compat_xpath(xpath
)) 
 288     if isinstance(xpath
, (str, compat_str
)): 
 289         n 
= _find_xpath(xpath
) 
 297         if default 
is not NO_DEFAULT
: 
 300             name 
= xpath 
if name 
is None else name
 
 301             raise ExtractorError('Could not find XML element %s' % name
) 
 307 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 308     n 
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
) 
 309     if n 
is None or n 
== default
: 
 312         if default 
is not NO_DEFAULT
: 
 315             name 
= xpath 
if name 
is None else name
 
 316             raise ExtractorError('Could not find XML element\'s text %s' % name
) 
 322 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 323     n 
= find_xpath_attr(node
, xpath
, key
) 
 325         if default 
is not NO_DEFAULT
: 
 328             name 
= '%s[@%s]' % (xpath
, key
) if name 
is None else name
 
 329             raise ExtractorError('Could not find XML attribute %s' % name
) 
 335 def get_element_by_id(id, html
): 
 336     """Return the content of the tag with the specified ID in the passed HTML document""" 
 337     return get_element_by_attribute('id', id, html
) 
 340 def get_element_by_class(class_name
, html
): 
 341     """Return the content of the first tag with the specified class in the passed HTML document""" 
 342     retval 
= get_elements_by_class(class_name
, html
) 
 343     return retval
[0] if retval 
else None 
 346 def get_element_by_attribute(attribute
, value
, html
, escape_value
=True): 
 347     retval 
= get_elements_by_attribute(attribute
, value
, html
, escape_value
) 
 348     return retval
[0] if retval 
else None 
 351 def get_elements_by_class(class_name
, html
): 
 352     """Return the content of all tags with the specified class in the passed HTML document as a list""" 
 353     return get_elements_by_attribute( 
 354         'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 
 355         html, escape_value=False) 
 358 def get_elements_by_attribute(attribute, value, html, escape_value=True): 
 359     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 361     value = re.escape(value) if escape_value else value 
 364     for m in re.finditer(r'''(?xs) 
 366          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'))*?
 
 368          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'))*? 
 372     ''' % (re.escape(attribute), value), html): 
 373         res = m.group('content
') 
 375         if res.startswith('"') or res.startswith("'"): 
 378         retlist.append(unescapeHTML(res)) 
 383 class HTMLAttributeParser(compat_HTMLParser): 
 384     """Trivial HTML parser to gather the attributes for a single element""" 
 387         compat_HTMLParser.__init__(self) 
 389     def handle_starttag(self, tag, attrs): 
 390         self.attrs = dict(attrs) 
 393 def extract_attributes(html_element): 
 394     """Given a string for an HTML element such as 
 396          a="foo" B="bar" c="&98;az" d=boz 
 397          empty= noval entity="&" 
 400     Decode and return a dictionary of attributes. 
 402         'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
', 
 403         'empty
': '', 'noval
': None, 'entity
': '&', 
 404         'sq
': '"', 'dq': '\'' 
 406     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 
 407     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 
 409     parser = HTMLAttributeParser() 
 410     parser.feed(html_element) 
 415 def clean_html(html): 
 416     """Clean an HTML snippet into a readable string""" 
 418     if html is None:  # Convenience for sanitizing descriptions etc. 
 422     html = html.replace('\n', ' ') 
 423     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) 
 424     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 
 426     html = re.sub('<.*?>', '', html) 
 427     # Replace html entities 
 428     html = unescapeHTML(html) 
 432 def sanitize_open(filename, open_mode): 
 433     """Try to open the given filename, and slightly tweak it if this fails. 
 435     Attempts to open the given filename. If this fails, it tries to change 
 436     the filename slightly, step by step, until it's either able to open it 
 437     or it fails and raises a final exception, like the standard open() 
 440     It returns the tuple (stream, definitive_file_name). 
 444             if sys.platform == 'win32': 
 446                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 
 447             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 
 448         stream = open(encodeFilename(filename), open_mode) 
 449         return (stream, filename) 
 450     except (IOError, OSError) as err: 
 451         if err.errno in (errno.EACCES,): 
 454         # In case of error, try to remove win32 forbidden chars 
 455         alt_filename = sanitize_path(filename) 
 456         if alt_filename == filename: 
 459             # An exception here should be caught in the caller 
 460             stream = open(encodeFilename(alt_filename), open_mode) 
 461             return (stream, alt_filename) 
 464 def timeconvert(timestr): 
 465     """Convert RFC 2822 defined time string into system timestamp""" 
 467     timetuple = email.utils.parsedate_tz(timestr) 
 468     if timetuple is not None: 
 469         timestamp = email.utils.mktime_tz(timetuple) 
 473 def sanitize_filename(s, restricted=False, is_id=False): 
 474     """Sanitizes a string so it could be used as part of a filename. 
 475     If restricted is set, use a stricter subset of allowed characters. 
 476     Set is_id if this is not an arbitrary string, but an ID that should be kept 
 479     def replace_insane(char): 
 480         if restricted and char in ACCENT_CHARS: 
 481             return ACCENT_CHARS[char] 
 482         if char == '?' or ord(char) < 32 or ord(char) == 127: 
 485             return '' if restricted else '\'' 
 487             return '_
-' if restricted else ' -' 
 488         elif char in '\\/|
*<>': 
 490         if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()): 
 492         if restricted 
and ord(char
) > 127: 
 497     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 498     result 
= ''.join(map(replace_insane
, s
)) 
 500         while '__' in result
: 
 501             result 
= result
.replace('__', '_') 
 502         result 
= result
.strip('_') 
 503         # Common case of "Foreign band name - English song title" 
 504         if restricted 
and result
.startswith('-_'): 
 506         if result
.startswith('-'): 
 507             result 
= '_' + result
[len('-'):] 
 508         result 
= result
.lstrip('.') 
 514 def sanitize_path(s
): 
 515     """Sanitizes and normalizes path on Windows""" 
 516     if sys
.platform 
!= 'win32': 
 518     drive_or_unc
, _ 
= os
.path
.splitdrive(s
) 
 519     if sys
.version_info 
< (2, 7) and not drive_or_unc
: 
 520         drive_or_unc
, _ 
= os
.path
.splitunc(s
) 
 521     norm_path 
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
) 
 525         path_part 
if path_part 
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
) 
 526         for path_part 
in norm_path
] 
 528         sanitized_path
.insert(0, drive_or_unc 
+ os
.path
.sep
) 
 529     return os
.path
.join(*sanitized_path
) 
 532 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of 
 533 # unwanted failures due to missing protocol 
 534 def sanitize_url(url
): 
 535     return 'http:%s' % url 
if url
.startswith('//') else url
 
 538 def sanitized_Request(url
, *args
, **kwargs
): 
 539     return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
) 
 542 def orderedSet(iterable
): 
 543     """ Remove all duplicates from the input iterable """ 
 551 def _htmlentity_transform(entity_with_semicolon
): 
 552     """Transforms an HTML entity to a character.""" 
 553     entity 
= entity_with_semicolon
[:-1] 
 555     # Known non-numeric HTML entity 
 556     if entity 
in compat_html_entities
.name2codepoint
: 
 557         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 559     # TODO: HTML5 allows entities without a semicolon. For example, 
 560     # 'Éric' should be decoded as 'Éric'. 
 561     if entity_with_semicolon 
in compat_html_entities_html5
: 
 562         return compat_html_entities_html5
[entity_with_semicolon
] 
 564     mobj 
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
) 
 566         numstr 
= mobj
.group(1) 
 567         if numstr
.startswith('x'): 
 569             numstr 
= '0%s' % numstr
 
 572         # See https://github.com/rg3/youtube-dl/issues/7518 
 574             return compat_chr(int(numstr
, base
)) 
 578     # Unknown entity in name, return its literal representation 
 579     return '&%s;' % entity
 
 585     assert type(s
) == compat_str
 
 588         r
'&([^;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 591 def get_subprocess_encoding(): 
 592     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 593         # For subprocess calls, encode with locale encoding 
 594         # Refer to http://stackoverflow.com/a/9951851/35070 
 595         encoding 
= preferredencoding() 
 597         encoding 
= sys
.getfilesystemencoding() 
 603 def encodeFilename(s
, for_subprocess
=False): 
 605     @param s The name of the file 
 608     assert type(s
) == compat_str
 
 610     # Python 3 has a Unicode API 
 611     if sys
.version_info 
>= (3, 0): 
 614     # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 615     # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 616     # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 617     if not for_subprocess 
and sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 620     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 
 621     if sys
.platform
.startswith('java'): 
 624     return s
.encode(get_subprocess_encoding(), 'ignore') 
 627 def decodeFilename(b
, for_subprocess
=False): 
 629     if sys
.version_info 
>= (3, 0): 
 632     if not isinstance(b
, bytes): 
 635     return b
.decode(get_subprocess_encoding(), 'ignore') 
 638 def encodeArgument(s
): 
 639     if not isinstance(s
, compat_str
): 
 640         # Legacy code that uses byte strings 
 641         # Uncomment the following line after fixing all post processors 
 642         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 643         s 
= s
.decode('ascii') 
 644     return encodeFilename(s
, True) 
 647 def decodeArgument(b
): 
 648     return decodeFilename(b
, True) 
 651 def decodeOption(optval
): 
 654     if isinstance(optval
, bytes): 
 655         optval 
= optval
.decode(preferredencoding()) 
 657     assert isinstance(optval
, compat_str
) 
 661 def formatSeconds(secs
): 
 663         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 665         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 670 def make_HTTPS_handler(params
, **kwargs
): 
 671     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 672     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 673         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 674         if opts_no_check_certificate
: 
 675             context
.check_hostname 
= False 
 676             context
.verify_mode 
= ssl
.CERT_NONE
 
 678             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 681             # (create_default_context present but HTTPSHandler has no context=) 
 684     if sys
.version_info 
< (3, 2): 
 685         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 687         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 688         context
.verify_mode 
= (ssl
.CERT_NONE
 
 689                                if opts_no_check_certificate
 
 690                                else ssl
.CERT_REQUIRED
) 
 691         context
.set_default_verify_paths() 
 692         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 695 def bug_reports_message(): 
 696     if ytdl_is_updateable(): 
 697         update_cmd 
= 'type  youtube-dl -U  to update' 
 699         update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 700     msg 
= '; please report this issue on https://yt-dl.org/bug .' 
 701     msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 702     msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 706 class YoutubeDLError(Exception): 
 707     """Base exception for YoutubeDL errors.""" 
 711 class ExtractorError(YoutubeDLError
): 
 712     """Error during info extraction.""" 
 714     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 715         """ tb, if given, is the original traceback (so that it can be printed out). 
 716         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 719         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 721         if video_id 
is not None: 
 722             msg 
= video_id 
+ ': ' + msg
 
 724             msg 
+= ' (caused by %r)' % cause
 
 726             msg 
+= bug_reports_message() 
 727         super(ExtractorError
, self
).__init
__(msg
) 
 730         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 732         self
.video_id 
= video_id
 
 734     def format_traceback(self
): 
 735         if self
.traceback 
is None: 
 737         return ''.join(traceback
.format_tb(self
.traceback
)) 
 740 class UnsupportedError(ExtractorError
): 
 741     def __init__(self
, url
): 
 742         super(UnsupportedError
, self
).__init
__( 
 743             'Unsupported URL: %s' % url
, expected
=True) 
 747 class RegexNotFoundError(ExtractorError
): 
 748     """Error when a regex didn't match""" 
 752 class GeoRestrictedError(ExtractorError
): 
 753     """Geographic restriction Error exception. 
 755     This exception may be thrown when a video is not available from your 
 756     geographic location due to geographic restrictions imposed by a website. 
 758     def __init__(self
, msg
, countries
=None): 
 759         super(GeoRestrictedError
, self
).__init
__(msg
, expected
=True) 
 761         self
.countries 
= countries
 
 764 class DownloadError(YoutubeDLError
): 
 765     """Download Error exception. 
 767     This exception may be thrown by FileDownloader objects if they are not 
 768     configured to continue on errors. They will contain the appropriate 
 772     def __init__(self
, msg
, exc_info
=None): 
 773         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 774         super(DownloadError
, self
).__init
__(msg
) 
 775         self
.exc_info 
= exc_info
 
 778 class SameFileError(YoutubeDLError
): 
 779     """Same File exception. 
 781     This exception will be thrown by FileDownloader objects if they detect 
 782     multiple files would have to be downloaded to the same file on disk. 
 787 class PostProcessingError(YoutubeDLError
): 
 788     """Post Processing exception. 
 790     This exception may be raised by PostProcessor's .run() method to 
 791     indicate an error in the postprocessing task. 
 794     def __init__(self
, msg
): 
 795         super(PostProcessingError
, self
).__init
__(msg
) 
 799 class MaxDownloadsReached(YoutubeDLError
): 
 800     """ --max-downloads limit has been reached. """ 
 804 class UnavailableVideoError(YoutubeDLError
): 
 805     """Unavailable Format exception. 
 807     This exception will be thrown when a video is requested 
 808     in a format that is not available for that video. 
 813 class ContentTooShortError(YoutubeDLError
): 
 814     """Content Too Short exception. 
 816     This exception may be raised by FileDownloader objects when a file they 
 817     download is too small for what the server announced first, indicating 
 818     the connection was probably interrupted. 
 821     def __init__(self
, downloaded
, expected
): 
 822         super(ContentTooShortError
, self
).__init
__( 
 823             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded
, expected
) 
 826         self
.downloaded 
= downloaded
 
 827         self
.expected 
= expected
 
 830 class XAttrMetadataError(YoutubeDLError
): 
 831     def __init__(self
, code
=None, msg
='Unknown error'): 
 832         super(XAttrMetadataError
, self
).__init
__(msg
) 
 836         # Parsing code and msg 
 837         if (self
.code 
in (errno
.ENOSPC
, errno
.EDQUOT
) or 
 838                 'No space left' in self
.msg 
or 'Disk quota excedded' in self
.msg
): 
 839             self
.reason 
= 'NO_SPACE' 
 840         elif self
.code 
== errno
.E2BIG 
or 'Argument list too long' in self
.msg
: 
 841             self
.reason 
= 'VALUE_TOO_LONG' 
 843             self
.reason 
= 'NOT_SUPPORTED' 
 846 class XAttrUnavailableError(YoutubeDLError
): 
 850 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 851     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 
 852     # expected HTTP responses to meet HTTP/1.0 or later (see also 
 853     # https://github.com/rg3/youtube-dl/issues/6727) 
 854     if sys
.version_info 
< (3, 0): 
 855         kwargs
[b
'strict'] = True 
 856     hc 
= http_class(*args
, **kwargs
) 
 857     source_address 
= ydl_handler
._params
.get('source_address') 
 858     if source_address 
is not None: 
 859         sa 
= (source_address
, 0) 
 860         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 861             hc
.source_address 
= sa
 
 863             def _hc_connect(self
, *args
, **kwargs
): 
 864                 sock 
= compat_socket_create_connection( 
 865                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 867                     self
.sock 
= ssl
.wrap_socket( 
 868                         sock
, self
.key_file
, self
.cert_file
, 
 869                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 872             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 877 def handle_youtubedl_headers(headers
): 
 878     filtered_headers 
= headers
 
 880     if 'Youtubedl-no-compression' in filtered_headers
: 
 881         filtered_headers 
= dict((k
, v
) for k
, v 
in filtered_headers
.items() if k
.lower() != 'accept-encoding') 
 882         del filtered_headers
['Youtubedl-no-compression'] 
 884     return filtered_headers
 
 887 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 888     """Handler for HTTP requests and responses. 
 890     This class, when installed with an OpenerDirector, automatically adds 
 891     the standard headers to every HTTP request and handles gzipped and 
 892     deflated responses from web servers. If compression is to be avoided in 
 893     a particular request, the original request in the program code only has 
 894     to include the HTTP header "Youtubedl-no-compression", which will be 
 895     removed before making the real request. 
 897     Part of this code was copied from: 
 899     http://techknack.net/python-urllib2-handlers/ 
 901     Andrew Rowls, the author of that code, agreed to release it to the 
 905     def __init__(self
, params
, *args
, **kwargs
): 
 906         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 907         self
._params 
= params
 
 909     def http_open(self
, req
): 
 910         conn_class 
= compat_http_client
.HTTPConnection
 
 912         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
 914             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
 915             del req
.headers
['Ytdl-socks-proxy'] 
 917         return self
.do_open(functools
.partial( 
 918             _create_http_connection
, self
, conn_class
, False), 
 924             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 926             return zlib
.decompress(data
) 
 929     def addinfourl_wrapper(stream
, headers
, url
, code
): 
 930         if hasattr(compat_urllib_request
.addinfourl
, 'getcode'): 
 931             return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
) 
 932         ret 
= compat_urllib_request
.addinfourl(stream
, headers
, url
) 
 936     def http_request(self
, req
): 
 937         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 
 938         # always respected by websites, some tend to give out URLs with non percent-encoded 
 939         # non-ASCII characters (see telemb.py, ard.py [#3412]) 
 940         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 
 941         # To work around aforementioned issue we will replace request's original URL with 
 942         # percent-encoded one 
 943         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 
 944         # the code of this workaround has been moved here from YoutubeDL.urlopen() 
 945         url 
= req
.get_full_url() 
 946         url_escaped 
= escape_url(url
) 
 948         # Substitute URL if any change after escaping 
 949         if url 
!= url_escaped
: 
 950             req 
= update_Request(req
, url
=url_escaped
) 
 952         for h
, v 
in std_headers
.items(): 
 953             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
 954             # The dict keys are capitalized because of this bug by urllib 
 955             if h
.capitalize() not in req
.headers
: 
 958         req
.headers 
= handle_youtubedl_headers(req
.headers
) 
 960         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 961             # Python 2.6 is brain-dead when it comes to fragments 
 962             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 963             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 967     def http_response(self
, req
, resp
): 
 970         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 971             content 
= resp
.read() 
 972             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 974                 uncompressed 
= io
.BytesIO(gz
.read()) 
 975             except IOError as original_ioerror
: 
 976                 # There may be junk add the end of the file 
 977                 # See http://stackoverflow.com/q/4928560/35070 for details 
 978                 for i 
in range(1, 1024): 
 980                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 981                         uncompressed 
= io
.BytesIO(gz
.read()) 
 986                     raise original_ioerror
 
 987             resp 
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 988             resp
.msg 
= old_resp
.msg
 
 989             del resp
.headers
['Content-encoding'] 
 991         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 992             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
 993             resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 994             resp
.msg 
= old_resp
.msg
 
 995             del resp
.headers
['Content-encoding'] 
 996         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 
 997         # https://github.com/rg3/youtube-dl/issues/6457). 
 998         if 300 <= resp
.code 
< 400: 
 999             location 
= resp
.headers
.get('Location') 
1001                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 
1002                 if sys
.version_info 
>= (3, 0): 
1003                     location 
= location
.encode('iso-8859-1').decode('utf-8') 
1005                     location 
= location
.decode('utf-8') 
1006                 location_escaped 
= escape_url(location
) 
1007                 if location 
!= location_escaped
: 
1008                     del resp
.headers
['Location'] 
1009                     if sys
.version_info 
< (3, 0): 
1010                         location_escaped 
= location_escaped
.encode('utf-8') 
1011                     resp
.headers
['Location'] = location_escaped
 
1014     https_request 
= http_request
 
1015     https_response 
= http_response
 
1018 def make_socks_conn_class(base_class
, socks_proxy
): 
1019     assert issubclass(base_class
, ( 
1020         compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
)) 
1022     url_components 
= compat_urlparse
.urlparse(socks_proxy
) 
1023     if url_components
.scheme
.lower() == 'socks5': 
1024         socks_type 
= ProxyType
.SOCKS5
 
1025     elif url_components
.scheme
.lower() in ('socks', 'socks4'): 
1026         socks_type 
= ProxyType
.SOCKS4
 
1027     elif url_components
.scheme
.lower() == 'socks4a': 
1028         socks_type 
= ProxyType
.SOCKS4A
 
1030     def unquote_if_non_empty(s
): 
1033         return compat_urllib_parse_unquote_plus(s
) 
1037         url_components
.hostname
, url_components
.port 
or 1080, 
1039         unquote_if_non_empty(url_components
.username
), 
1040         unquote_if_non_empty(url_components
.password
), 
1043     class SocksConnection(base_class
): 
1045             self
.sock 
= sockssocket() 
1046             self
.sock
.setproxy(*proxy_args
) 
1047             if type(self
.timeout
) in (int, float): 
1048                 self
.sock
.settimeout(self
.timeout
) 
1049             self
.sock
.connect((self
.host
, self
.port
)) 
1051             if isinstance(self
, compat_http_client
.HTTPSConnection
): 
1052                 if hasattr(self
, '_context'):  # Python > 2.6 
1053                     self
.sock 
= self
._context
.wrap_socket( 
1054                         self
.sock
, server_hostname
=self
.host
) 
1056                     self
.sock 
= ssl
.wrap_socket(self
.sock
) 
1058     return SocksConnection
 
1061 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
1062     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
1063         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
1064         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
1065         self
._params 
= params
 
1067     def https_open(self
, req
): 
1069         conn_class 
= self
._https
_conn
_class
 
1071         if hasattr(self
, '_context'):  # python > 2.6 
1072             kwargs
['context'] = self
._context
 
1073         if hasattr(self
, '_check_hostname'):  # python 3.x 
1074             kwargs
['check_hostname'] = self
._check
_hostname
 
1076         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
1078             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
1079             del req
.headers
['Ytdl-socks-proxy'] 
1081         return self
.do_open(functools
.partial( 
1082             _create_http_connection
, self
, conn_class
, True), 
1086 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
): 
1087     def __init__(self
, cookiejar
=None): 
1088         compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
) 
1090     def http_response(self
, request
, response
): 
1091         # Python 2 will choke on next HTTP request in row if there are non-ASCII 
1092         # characters in Set-Cookie HTTP header of last response (see 
1093         # https://github.com/rg3/youtube-dl/issues/6769). 
1094         # In order to at least prevent crashing we will percent encode Set-Cookie 
1095         # header before HTTPCookieProcessor starts processing it. 
1096         # if sys.version_info < (3, 0) and response.headers: 
1097         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 
1098         #         set_cookie = response.headers.get(set_cookie_header) 
1100         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 
1101         #             if set_cookie != set_cookie_escaped: 
1102         #                 del response.headers[set_cookie_header] 
1103         #                 response.headers[set_cookie_header] = set_cookie_escaped 
1104         return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
) 
1106     https_request 
= compat_urllib_request
.HTTPCookieProcessor
.http_request
 
1107     https_response 
= http_response
 
1110 def extract_timezone(date_str
): 
1112         r
'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
1115         timezone 
= datetime
.timedelta() 
1117         date_str 
= date_str
[:-len(m
.group('tz'))] 
1118         if not m
.group('sign'): 
1119             timezone 
= datetime
.timedelta() 
1121             sign 
= 1 if m
.group('sign') == '+' else -1 
1122             timezone 
= datetime
.timedelta( 
1123                 hours
=sign 
* int(m
.group('hours')), 
1124                 minutes
=sign 
* int(m
.group('minutes'))) 
1125     return timezone
, date_str
 
1128 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
1129     """ Return a UNIX timestamp from the given date """ 
1131     if date_str 
is None: 
1134     date_str 
= re
.sub(r
'\.[0-9]+', '', date_str
) 
1136     if timezone 
is None: 
1137         timezone
, date_str 
= extract_timezone(date_str
) 
1140         date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
1141         dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
1142         return calendar
.timegm(dt
.timetuple()) 
1147 def date_formats(day_first
=True): 
1148     return DATE_FORMATS_DAY_FIRST 
if day_first 
else DATE_FORMATS_MONTH_FIRST
 
1151 def unified_strdate(date_str
, day_first
=True): 
1152     """Return a string with the date in the format YYYYMMDD""" 
1154     if date_str 
is None: 
1158     date_str 
= date_str
.replace(',', ' ') 
1159     # Remove AM/PM + timezone 
1160     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1161     _
, date_str 
= extract_timezone(date_str
) 
1163     for expression 
in date_formats(day_first
): 
1165             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
1168     if upload_date 
is None: 
1169         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1172                 upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
1175     if upload_date 
is not None: 
1176         return compat_str(upload_date
) 
1179 def unified_timestamp(date_str
, day_first
=True): 
1180     if date_str 
is None: 
1183     date_str 
= date_str
.replace(',', ' ') 
1185     pm_delta 
= 12 if re
.search(r
'(?i)PM', date_str
) else 0 
1186     timezone
, date_str 
= extract_timezone(date_str
) 
1188     # Remove AM/PM + timezone 
1189     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1191     for expression 
in date_formats(day_first
): 
1193             dt 
= datetime
.datetime
.strptime(date_str
, expression
) - timezone 
+ datetime
.timedelta(hours
=pm_delta
) 
1194             return calendar
.timegm(dt
.timetuple()) 
1197     timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1199         return calendar
.timegm(timetuple
) + pm_delta 
* 3600 
1202 def determine_ext(url
, default_ext
='unknown_video'): 
1205     guess 
= url
.partition('?')[0].rpartition('.')[2] 
1206     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
1208     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 
1209     elif guess
.rstrip('/') in KNOWN_EXTENSIONS
: 
1210         return guess
.rstrip('/') 
1215 def subtitles_filename(filename
, sub_lang
, sub_format
): 
1216     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
1219 def date_from_str(date_str
): 
1221     Return a datetime object from a string in the format YYYYMMDD or 
1222     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
1223     today 
= datetime
.date
.today() 
1224     if date_str 
in ('now', 'today'): 
1226     if date_str 
== 'yesterday': 
1227         return today 
- datetime
.timedelta(days
=1) 
1228     match 
= re
.match(r
'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
1229     if match 
is not None: 
1230         sign 
= match
.group('sign') 
1231         time 
= int(match
.group('time')) 
1234         unit 
= match
.group('unit') 
1235         # A bad approximation? 
1239         elif unit 
== 'year': 
1243         delta 
= datetime
.timedelta(**{unit
: time
}) 
1244         return today 
+ delta
 
1245     return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date() 
1248 def hyphenate_date(date_str
): 
1250     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
1251     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
1252     if match 
is not None: 
1253         return '-'.join(match
.groups()) 
1258 class DateRange(object): 
1259     """Represents a time interval between two dates""" 
1261     def __init__(self
, start
=None, end
=None): 
1262         """start and end must be strings in the format accepted by date""" 
1263         if start 
is not None: 
1264             self
.start 
= date_from_str(start
) 
1266             self
.start 
= datetime
.datetime
.min.date() 
1268             self
.end 
= date_from_str(end
) 
1270             self
.end 
= datetime
.datetime
.max.date() 
1271         if self
.start 
> self
.end
: 
1272             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
1276         """Returns a range that only contains the given day""" 
1277         return cls(day
, day
) 
1279     def __contains__(self
, date
): 
1280         """Check if the date is in the range""" 
1281         if not isinstance(date
, datetime
.date
): 
1282             date 
= date_from_str(date
) 
1283         return self
.start 
<= date 
<= self
.end
 
1286         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
1289 def platform_name(): 
1290     """ Returns the platform name as a compat_str """ 
1291     res 
= platform
.platform() 
1292     if isinstance(res
, bytes): 
1293         res 
= res
.decode(preferredencoding()) 
1295     assert isinstance(res
, compat_str
) 
1299 def _windows_write_string(s
, out
): 
1300     """ Returns True if the string was written using special methods, 
1301     False if it has yet to be written out.""" 
1302     # Adapted from http://stackoverflow.com/a/3259271/35070 
1305     import ctypes
.wintypes
 
1313         fileno 
= out
.fileno() 
1314     except AttributeError: 
1315         # If the output stream doesn't have a fileno, it's virtual 
1317     except io
.UnsupportedOperation
: 
1318         # Some strange Windows pseudo files? 
1320     if fileno 
not in WIN_OUTPUT_IDS
: 
1323     GetStdHandle 
= ctypes
.WINFUNCTYPE( 
1324         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
1325         (b
'GetStdHandle', ctypes
.windll
.kernel32
)) 
1326     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
1328     WriteConsoleW 
= ctypes
.WINFUNCTYPE( 
1329         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
1330         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
1331         ctypes
.wintypes
.LPVOID
)((b
'WriteConsoleW', ctypes
.windll
.kernel32
)) 
1332     written 
= ctypes
.wintypes
.DWORD(0) 
1334     GetFileType 
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
'GetFileType', ctypes
.windll
.kernel32
)) 
1335     FILE_TYPE_CHAR 
= 0x0002 
1336     FILE_TYPE_REMOTE 
= 0x8000 
1337     GetConsoleMode 
= ctypes
.WINFUNCTYPE( 
1338         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
1339         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
1340         (b
'GetConsoleMode', ctypes
.windll
.kernel32
)) 
1341     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
1343     def not_a_console(handle
): 
1344         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
1346         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR 
or 
1347                 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
1349     if not_a_console(h
): 
1352     def next_nonbmp_pos(s
): 
1354             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
1355         except StopIteration: 
1359         count 
= min(next_nonbmp_pos(s
), 1024) 
1361         ret 
= WriteConsoleW( 
1362             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
1364             raise OSError('Failed to write string') 
1365         if not count
:  # We just wrote a non-BMP character 
1366             assert written
.value 
== 2 
1369             assert written
.value 
> 0 
1370             s 
= s
[written
.value
:] 
1374 def write_string(s
, out
=None, encoding
=None): 
1377     assert type(s
) == compat_str
 
1379     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
1380         if _windows_write_string(s
, out
): 
1383     if ('b' in getattr(out
, 'mode', '') or 
1384             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
1385         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
1387     elif hasattr(out
, 'buffer'): 
1388         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
1389         byt 
= s
.encode(enc
, 'ignore') 
1390         out
.buffer.write(byt
) 
1396 def bytes_to_intlist(bs
): 
1399     if isinstance(bs
[0], int):  # Python 3 
1402         return [ord(c
) for c 
in bs
] 
1405 def intlist_to_bytes(xs
): 
1408     return compat_struct_pack('%dB' % len(xs
), *xs
) 
1411 # Cross-platform file locking 
1412 if sys
.platform 
== 'win32': 
1413     import ctypes
.wintypes
 
1416     class OVERLAPPED(ctypes
.Structure
): 
1418             ('Internal', ctypes
.wintypes
.LPVOID
), 
1419             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
1420             ('Offset', ctypes
.wintypes
.DWORD
), 
1421             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
1422             ('hEvent', ctypes
.wintypes
.HANDLE
), 
1425     kernel32 
= ctypes
.windll
.kernel32
 
1426     LockFileEx 
= kernel32
.LockFileEx
 
1427     LockFileEx
.argtypes 
= [ 
1428         ctypes
.wintypes
.HANDLE
,     # hFile 
1429         ctypes
.wintypes
.DWORD
,      # dwFlags 
1430         ctypes
.wintypes
.DWORD
,      # dwReserved 
1431         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1432         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1433         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1435     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1436     UnlockFileEx 
= kernel32
.UnlockFileEx
 
1437     UnlockFileEx
.argtypes 
= [ 
1438         ctypes
.wintypes
.HANDLE
,     # hFile 
1439         ctypes
.wintypes
.DWORD
,      # dwReserved 
1440         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1441         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1442         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1444     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1445     whole_low 
= 0xffffffff 
1446     whole_high 
= 0x7fffffff 
1448     def _lock_file(f
, exclusive
): 
1449         overlapped 
= OVERLAPPED() 
1450         overlapped
.Offset 
= 0 
1451         overlapped
.OffsetHigh 
= 0 
1452         overlapped
.hEvent 
= 0 
1453         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1454         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1455         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1456                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1457             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1459     def _unlock_file(f
): 
1460         assert f
._lock
_file
_overlapped
_p
 
1461         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1462         if not UnlockFileEx(handle
, 0, 
1463                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1464             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1467     # Some platforms, such as Jython, is missing fcntl 
1471         def _lock_file(f
, exclusive
): 
1472             fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1474         def _unlock_file(f
): 
1475             fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1477         UNSUPPORTED_MSG 
= 'file locking is not supported on this platform' 
1479         def _lock_file(f
, exclusive
): 
1480             raise IOError(UNSUPPORTED_MSG
) 
1482         def _unlock_file(f
): 
1483             raise IOError(UNSUPPORTED_MSG
) 
1486 class locked_file(object): 
1487     def __init__(self
, filename
, mode
, encoding
=None): 
1488         assert mode 
in ['r', 'a', 'w'] 
1489         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1492     def __enter__(self
): 
1493         exclusive 
= self
.mode 
!= 'r' 
1495             _lock_file(self
.f
, exclusive
) 
1501     def __exit__(self
, etype
, value
, traceback
): 
1503             _unlock_file(self
.f
) 
1510     def write(self
, *args
): 
1511         return self
.f
.write(*args
) 
1513     def read(self
, *args
): 
1514         return self
.f
.read(*args
) 
1517 def get_filesystem_encoding(): 
1518     encoding 
= sys
.getfilesystemencoding() 
1519     return encoding 
if encoding 
is not None else 'utf-8' 
1522 def shell_quote(args
): 
1524     encoding 
= get_filesystem_encoding() 
1526         if isinstance(a
, bytes): 
1527             # We may get a filename encoded with 'encodeFilename' 
1528             a 
= a
.decode(encoding
) 
1529         quoted_args
.append(pipes
.quote(a
)) 
1530     return ' '.join(quoted_args
) 
1533 def smuggle_url(url
, data
): 
1534     """ Pass additional data in a URL for internal use. """ 
1536     url
, idata 
= unsmuggle_url(url
, {}) 
1538     sdata 
= compat_urllib_parse_urlencode( 
1539         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1540     return url 
+ '#' + sdata
 
1543 def unsmuggle_url(smug_url
, default
=None): 
1544     if '#__youtubedl_smuggle' not in smug_url
: 
1545         return smug_url
, default
 
1546     url
, _
, sdata 
= smug_url
.rpartition('#') 
1547     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1548     data 
= json
.loads(jsond
) 
1552 def format_bytes(bytes): 
1555     if type(bytes) is str: 
1556         bytes = float(bytes) 
1560         exponent 
= int(math
.log(bytes, 1024.0)) 
1561     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1562     converted 
= float(bytes) / float(1024 ** exponent
) 
1563     return '%.2f%s' % (converted
, suffix
) 
1566 def lookup_unit_table(unit_table
, s
): 
1567     units_re 
= '|'.join(re
.escape(u
) for u 
in unit_table
) 
1569         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
) 
1572     num_str 
= m
.group('num').replace(',', '.') 
1573     mult 
= unit_table
[m
.group('unit')] 
1574     return int(float(num_str
) * mult
) 
1577 def parse_filesize(s
): 
1581     # The lower-case forms are of course incorrect and unofficial, 
1582     # but we support those too 
1599         'megabytes': 1000 ** 2, 
1600         'mebibytes': 1024 ** 2, 
1606         'gigabytes': 1000 ** 3, 
1607         'gibibytes': 1024 ** 3, 
1613         'terabytes': 1000 ** 4, 
1614         'tebibytes': 1024 ** 4, 
1620         'petabytes': 1000 ** 5, 
1621         'pebibytes': 1024 ** 5, 
1627         'exabytes': 1000 ** 6, 
1628         'exbibytes': 1024 ** 6, 
1634         'zettabytes': 1000 ** 7, 
1635         'zebibytes': 1024 ** 7, 
1641         'yottabytes': 1000 ** 8, 
1642         'yobibytes': 1024 ** 8, 
1645     return lookup_unit_table(_UNIT_TABLE
, s
) 
1654     if re
.match(r
'^[\d,.]+$', s
): 
1655         return str_to_int(s
) 
1666     return lookup_unit_table(_UNIT_TABLE
, s
) 
1669 def month_by_name(name
, lang
='en'): 
1670     """ Return the number of a month by (locale-independently) English name """ 
1672     month_names 
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en']) 
1675         return month_names
.index(name
) + 1 
1680 def month_by_abbreviation(abbrev
): 
1681     """ Return the number of a month by (locale-independently) English 
1685         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
1690 def fix_xml_ampersands(xml_str
): 
1691     """Replace all the '&' by '&' in XML""" 
1693         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1698 def setproctitle(title
): 
1699     assert isinstance(title
, compat_str
) 
1701     # ctypes in Jython is not complete 
1702     # http://bugs.jython.org/issue2148 
1703     if sys
.platform
.startswith('java'): 
1707         libc 
= ctypes
.cdll
.LoadLibrary('libc.so.6') 
1711         # LoadLibrary in Windows Python 2.7.13 only expects 
1712         # a bytestring, but since unicode_literals turns 
1713         # every string into a unicode string, it fails. 
1715     title_bytes 
= title
.encode('utf-8') 
1716     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1717     buf
.value 
= title_bytes
 
1719         libc
.prctl(15, buf
, 0, 0, 0) 
1720     except AttributeError: 
1721         return  # Strange libc, just skip this 
1724 def remove_start(s
, start
): 
1725     return s
[len(start
):] if s 
is not None and s
.startswith(start
) else s
 
1728 def remove_end(s
, end
): 
1729     return s
[:-len(end
)] if s 
is not None and s
.endswith(end
) else s
 
1732 def remove_quotes(s
): 
1733     if s 
is None or len(s
) < 2: 
1735     for quote 
in ('"', "'", ): 
1736         if s
[0] == quote 
and s
[-1] == quote
: 
1741 def url_basename(url
): 
1742     path 
= compat_urlparse
.urlparse(url
).path
 
1743     return path
.strip('/').split('/')[-1] 
1747     return re
.match(r
'https?://[^?#&]+/', url
).group() 
1750 def urljoin(base
, path
): 
1751     if isinstance(path
, bytes): 
1752         path 
= path
.decode('utf-8') 
1753     if not isinstance(path
, compat_str
) or not path
: 
1755     if re
.match(r
'^(?:https?:)?//', path
): 
1757     if isinstance(base
, bytes): 
1758         base 
= base
.decode('utf-8') 
1759     if not isinstance(base
, compat_str
) or not re
.match( 
1760             r
'^(?:https?:)?//', base
): 
1762     return compat_urlparse
.urljoin(base
, path
) 
1765 class HEADRequest(compat_urllib_request
.Request
): 
1766     def get_method(self
): 
1770 class PUTRequest(compat_urllib_request
.Request
): 
1771     def get_method(self
): 
1775 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1778             v 
= getattr(v
, get_attr
, None) 
1784         return int(v
) * invscale 
// scale
 
1789 def str_or_none(v
, default
=None): 
1790     return default 
if v 
is None else compat_str(v
) 
1793 def str_to_int(int_str
): 
1794     """ A more relaxed version of int_or_none """ 
1797     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1801 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1805         return float(v
) * invscale 
/ scale
 
1810 def strip_or_none(v
): 
1811     return None if v 
is None else v
.strip() 
1814 def parse_duration(s
): 
1815     if not isinstance(s
, compat_basestring
): 
1820     days
, hours
, mins
, secs
, ms 
= [None] * 5 
1821     m 
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s
) 
1823         days
, hours
, mins
, secs
, ms 
= m
.groups() 
1828                     (?P<days>[0-9]+)\s*d(?:ays?)?\s* 
1831                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 
1834                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 
1837                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 
1840             days
, hours
, mins
, secs
, ms 
= m
.groups() 
1842             m 
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
) 
1844                 hours
, mins 
= m
.groups() 
1850         duration 
+= float(secs
) 
1852         duration 
+= float(mins
) * 60 
1854         duration 
+= float(hours
) * 60 * 60 
1856         duration 
+= float(days
) * 24 * 60 * 60 
1858         duration 
+= float(ms
) 
1862 def prepend_extension(filename
, ext
, expected_real_ext
=None): 
1863     name
, real_ext 
= os
.path
.splitext(filename
) 
1865         '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1866         if not expected_real_ext 
or real_ext
[1:] == expected_real_ext
 
1867         else '{0}.{1}'.format(filename
, ext
)) 
1870 def replace_extension(filename
, ext
, expected_real_ext
=None): 
1871     name
, real_ext 
= os
.path
.splitext(filename
) 
1872     return '{0}.{1}'.format( 
1873         name 
if not expected_real_ext 
or real_ext
[1:] == expected_real_ext 
else filename
, 
1877 def check_executable(exe
, args
=[]): 
1878     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1879     args can be a list of arguments for a short output (like -version) """ 
1881         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1887 def get_exe_version(exe
, args
=['--version'], 
1888                     version_re
=None, unrecognized
='present'): 
1889     """ Returns the version of the specified executable, 
1890     or False if the executable is not present """ 
1892         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers 
1893         # SIGTTOU if youtube-dl is run in the background. 
1894         # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656 
1895         out
, _ 
= subprocess
.Popen( 
1896             [encodeArgument(exe
)] + args
, 
1897             stdin
=subprocess
.PIPE
, 
1898             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1901     if isinstance(out
, bytes):  # Python 2.x 
1902         out 
= out
.decode('ascii', 'ignore') 
1903     return detect_exe_version(out
, version_re
, unrecognized
) 
1906 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
1907     assert isinstance(output
, compat_str
) 
1908     if version_re 
is None: 
1909         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
1910     m 
= re
.search(version_re
, output
) 
1917 class PagedList(object): 
1919         # This is only useful for tests 
1920         return len(self
.getslice()) 
1923 class OnDemandPagedList(PagedList
): 
1924     def __init__(self
, pagefunc
, pagesize
, use_cache
=False): 
1925         self
._pagefunc 
= pagefunc
 
1926         self
._pagesize 
= pagesize
 
1927         self
._use
_cache 
= use_cache
 
1931     def getslice(self
, start
=0, end
=None): 
1933         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1934             firstid 
= pagenum 
* self
._pagesize
 
1935             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
1936             if start 
>= nextfirstid
: 
1941                 page_results 
= self
._cache
.get(pagenum
) 
1942             if page_results 
is None: 
1943                 page_results 
= list(self
._pagefunc
(pagenum
)) 
1945                 self
._cache
[pagenum
] = page_results
 
1948                 start 
% self
._pagesize
 
1949                 if firstid 
<= start 
< nextfirstid
 
1953                 ((end 
- 1) % self
._pagesize
) + 1 
1954                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
1957             if startv 
!= 0 or endv 
is not None: 
1958                 page_results 
= page_results
[startv
:endv
] 
1959             res
.extend(page_results
) 
1961             # A little optimization - if current page is not "full", ie. does 
1962             # not contain page_size videos then we can assume that this page 
1963             # is the last one - there are no more ids on further pages - 
1964             # i.e. no need to query again. 
1965             if len(page_results
) + startv 
< self
._pagesize
: 
1968             # If we got the whole page, but the next page is not interesting, 
1969             # break out early as well 
1970             if end 
== nextfirstid
: 
1975 class InAdvancePagedList(PagedList
): 
1976     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
1977         self
._pagefunc 
= pagefunc
 
1978         self
._pagecount 
= pagecount
 
1979         self
._pagesize 
= pagesize
 
1981     def getslice(self
, start
=0, end
=None): 
1983         start_page 
= start 
// self
._pagesize
 
1985             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
1986         skip_elems 
= start 
- start_page 
* self
._pagesize
 
1987         only_more 
= None if end 
is None else end 
- start
 
1988         for pagenum 
in range(start_page
, end_page
): 
1989             page 
= list(self
._pagefunc
(pagenum
)) 
1991                 page 
= page
[skip_elems
:] 
1993             if only_more 
is not None: 
1994                 if len(page
) < only_more
: 
1995                     only_more 
-= len(page
) 
1997                     page 
= page
[:only_more
] 
2004 def uppercase_escape(s
): 
2005     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2007         r
'\\U[0-9a-fA-F]{8}', 
2008         lambda m
: unicode_escape(m
.group(0))[0], 
2012 def lowercase_escape(s
): 
2013     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2015         r
'\\u[0-9a-fA-F]{4}', 
2016         lambda m
: unicode_escape(m
.group(0))[0], 
2020 def escape_rfc3986(s
): 
2021     """Escape non-ASCII characters as suggested by RFC 3986""" 
2022     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
2023         s 
= s
.encode('utf-8') 
2024     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
2027 def escape_url(url
): 
2028     """Escape URL as suggested by RFC 3986""" 
2029     url_parsed 
= compat_urllib_parse_urlparse(url
) 
2030     return url_parsed
._replace
( 
2031         netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'), 
2032         path
=escape_rfc3986(url_parsed
.path
), 
2033         params
=escape_rfc3986(url_parsed
.params
), 
2034         query
=escape_rfc3986(url_parsed
.query
), 
2035         fragment
=escape_rfc3986(url_parsed
.fragment
) 
2039 def read_batch_urls(batch_fd
): 
2041         if not isinstance(url
, compat_str
): 
2042             url 
= url
.decode('utf-8', 'replace') 
2043         BOM_UTF8 
= '\xef\xbb\xbf' 
2044         if url
.startswith(BOM_UTF8
): 
2045             url 
= url
[len(BOM_UTF8
):] 
2047         if url
.startswith(('#', ';', ']')): 
2051     with contextlib
.closing(batch_fd
) as fd
: 
2052         return [url 
for url 
in map(fixup
, fd
) if url
] 
2055 def urlencode_postdata(*args
, **kargs
): 
2056     return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii') 
2059 def update_url_query(url
, query
): 
2062     parsed_url 
= compat_urlparse
.urlparse(url
) 
2063     qs 
= compat_parse_qs(parsed_url
.query
) 
2065     return compat_urlparse
.urlunparse(parsed_url
._replace
( 
2066         query
=compat_urllib_parse_urlencode(qs
, True))) 
2069 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}): 
2070     req_headers 
= req
.headers
.copy() 
2071     req_headers
.update(headers
) 
2072     req_data 
= data 
or req
.data
 
2073     req_url 
= update_url_query(url 
or req
.get_full_url(), query
) 
2074     req_get_method 
= req
.get_method() 
2075     if req_get_method 
== 'HEAD': 
2076         req_type 
= HEADRequest
 
2077     elif req_get_method 
== 'PUT': 
2078         req_type 
= PUTRequest
 
2080         req_type 
= compat_urllib_request
.Request
 
2082         req_url
, data
=req_data
, headers
=req_headers
, 
2083         origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
) 
2084     if hasattr(req
, 'timeout'): 
2085         new_req
.timeout 
= req
.timeout
 
2089 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True): 
2090     if isinstance(key_or_keys
, (list, tuple)): 
2091         for key 
in key_or_keys
: 
2092             if key 
not in d 
or d
[key
] is None or skip_false_values 
and not d
[key
]: 
2096     return d
.get(key_or_keys
, default
) 
2099 def try_get(src
, getter
, expected_type
=None): 
2102     except (AttributeError, KeyError, TypeError, IndexError): 
2105         if expected_type 
is None or isinstance(v
, expected_type
): 
2109 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'): 
2110     return string 
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
) 
2122 TV_PARENTAL_GUIDELINES 
= { 
2132 def parse_age_limit(s
): 
2134         return s 
if 0 <= s 
<= 21 else None 
2135     if not isinstance(s
, compat_basestring
): 
2137     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
2139         return int(m
.group('age')) 
2141         return US_RATINGS
[s
] 
2142     return TV_PARENTAL_GUIDELINES
.get(s
) 
2145 def strip_jsonp(code
): 
2147         r
'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
) 
2150 def js_to_json(code
): 
2151     COMMENT_RE 
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*' 
2152     SKIP_RE 
= r
'\s*(?:{comment})?\s*'.format(comment
=COMMENT_RE
) 
2154         (r
'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip
=SKIP_RE
), 16), 
2155         (r
'(?s)^(0+[0-7]+){skip}:?$'.format(skip
=SKIP_RE
), 8), 
2160         if v 
in ('true', 'false', 'null'): 
2162         elif v
.startswith('/*') or v
.startswith('//') or v 
== ',': 
2165         if v
[0] in ("'", '"'): 
2166             v 
= re
.sub(r
'(?s)\\.|"', lambda m
: { 
2171             }.get(m
.group(0), m
.group(0)), v
[1:-1]) 
2173         for regex
, base 
in INTEGER_TABLE
: 
2174             im 
= re
.match(regex
, v
) 
2176                 i 
= int(im
.group(1), base
) 
2177                 return '"%d":' % i 
if v
.endswith(':') else '%d' % i
 
2181     return re
.sub(r
'''(?sx) 
2182         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 
2183         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 
2184         {comment}|,(?={skip}[\]}}])| 
2185         [a-zA-Z_][.a-zA-Z_0-9]*| 
2186         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| 
2188         '''.format(comment
=COMMENT_RE
, skip
=SKIP_RE
), fix_kv
, code
) 
2191 def qualities(quality_ids
): 
2192     """ Get a numeric quality value out of a list of possible values """ 
2195             return quality_ids
.index(qid
) 
2201 DEFAULT_OUTTMPL 
= '%(title)s-%(id)s.%(ext)s' 
2204 def limit_length(s
, length
): 
2205     """ Add ellipses to overly long strings """ 
2210         return s
[:length 
- len(ELLIPSES
)] + ELLIPSES
 
2214 def version_tuple(v
): 
2215     return tuple(int(e
) for e 
in re
.split(r
'[-.]', v
)) 
2218 def is_outdated_version(version
, limit
, assume_new
=True): 
2220         return not assume_new
 
2222         return version_tuple(version
) < version_tuple(limit
) 
2224         return not assume_new
 
2227 def ytdl_is_updateable(): 
2228     """ Returns if youtube-dl can be updated with -U """ 
2229     from zipimport 
import zipimporter
 
2231     return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen') 
2234 def args_to_str(args
): 
2235     # Get a short string representation for a subprocess command 
2236     return ' '.join(compat_shlex_quote(a
) for a 
in args
) 
2239 def error_to_compat_str(err
): 
2241     # On python 2 error byte string must be decoded with proper 
2242     # encoding rather than ascii 
2243     if sys
.version_info
[0] < 3: 
2244         err_str 
= err_str
.decode(preferredencoding()) 
2248 def mimetype2ext(mt
): 
2254         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 
2255         # it's the most popular one 
2256         'audio/mpeg': 'mp3', 
2261     _
, _
, res 
= mt
.rpartition('/') 
2262     res 
= res
.split(';')[0].strip().lower() 
2266         'smptett+xml': 'tt', 
2272         'x-mp4-fragmented': 'mp4', 
2275         'x-mpegurl': 'm3u8', 
2276         'vnd.apple.mpegurl': 'm3u8', 
2281         'vnd.ms-sstr+xml': 'ism', 
2286 def parse_codecs(codecs_str
): 
2287     # http://tools.ietf.org/html/rfc6381 
2290     splited_codecs 
= list(filter(None, map( 
2291         lambda str: str.strip(), codecs_str
.strip().strip(',').split(',')))) 
2292     vcodec
, acodec 
= None, None 
2293     for full_codec 
in splited_codecs
: 
2294         codec 
= full_codec
.split('.')[0] 
2295         if codec 
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): 
2298         elif codec 
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'): 
2302             write_string('WARNING: Unknown codec %s' % full_codec
, sys
.stderr
) 
2303     if not vcodec 
and not acodec
: 
2304         if len(splited_codecs
) == 2: 
2309         elif len(splited_codecs
) == 1: 
2316             'vcodec': vcodec 
or 'none', 
2317             'acodec': acodec 
or 'none', 
2322 def urlhandle_detect_ext(url_handle
): 
2323     getheader 
= url_handle
.headers
.get
 
2325     cd 
= getheader('Content-Disposition') 
2327         m 
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
) 
2329             e 
= determine_ext(m
.group('filename'), default_ext
=None) 
2333     return mimetype2ext(getheader('Content-Type')) 
2336 def encode_data_uri(data
, mime_type
): 
2337     return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii')) 
2340 def age_restricted(content_limit
, age_limit
): 
2341     """ Returns True iff the content should be blocked """ 
2343     if age_limit 
is None:  # No limit set 
2345     if content_limit 
is None: 
2346         return False  # Content available for everyone 
2347     return age_limit 
< content_limit
 
2350 def is_html(first_bytes
): 
2351     """ Detect whether a file contains HTML by examining its first bytes. """ 
2354         (b
'\xef\xbb\xbf', 'utf-8'), 
2355         (b
'\x00\x00\xfe\xff', 'utf-32-be'), 
2356         (b
'\xff\xfe\x00\x00', 'utf-32-le'), 
2357         (b
'\xff\xfe', 'utf-16-le'), 
2358         (b
'\xfe\xff', 'utf-16-be'), 
2360     for bom
, enc 
in BOMS
: 
2361         if first_bytes
.startswith(bom
): 
2362             s 
= first_bytes
[len(bom
):].decode(enc
, 'replace') 
2365         s 
= first_bytes
.decode('utf-8', 'replace') 
2367     return re
.match(r
'^\s*<', s
) 
2370 def determine_protocol(info_dict
): 
2371     protocol 
= info_dict
.get('protocol') 
2372     if protocol 
is not None: 
2375     url 
= info_dict
['url'] 
2376     if url
.startswith('rtmp'): 
2378     elif url
.startswith('mms'): 
2380     elif url
.startswith('rtsp'): 
2383     ext 
= determine_ext(url
) 
2389     return compat_urllib_parse_urlparse(url
).scheme
 
2392 def render_table(header_row
, data
): 
2393     """ Render a list of rows, each as a list of values """ 
2394     table 
= [header_row
] + data
 
2395     max_lens 
= [max(len(compat_str(v
)) for v 
in col
) for col 
in zip(*table
)] 
2396     format_str 
= ' '.join('%-' + compat_str(ml 
+ 1) + 's' for ml 
in max_lens
[:-1]) + '%s' 
2397     return '\n'.join(format_str 
% tuple(row
) for row 
in table
) 
2400 def _match_one(filter_part
, dct
): 
2401     COMPARISON_OPERATORS 
= { 
2409     operator_rex 
= re
.compile(r
'''(?x)\s* 
2411         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
2413             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
2414             (?P<quote>["\'])(?P
<quotedstrval
>(?
:\\.|
(?
!(?P
=quote
)|
\\).)+?
)(?P
=quote
)|
 
2415             (?P
<strval
>(?
![0-9.])[a
-z0
-9A
-Z
]*) 
2418         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) 
2419     m = operator_rex.search(filter_part) 
2421         op = COMPARISON_OPERATORS[m.group('op')] 
2422         actual_value = dct.get(m.group('key')) 
2423         if (m.group('quotedstrval') is not None or 
2424             m.group('strval') is not None or 
2425             # If the original field is a string and matching comparisonvalue is 
2426             # a number we should respect the origin of the original field 
2427             # and process comparison value as a string (see 
2428             # https://github.com/rg3/youtube-dl/issues/11082). 
2429             actual_value is not None and m.group('intval') is not None and 
2430                 isinstance(actual_value, compat_str)): 
2431             if m.group('op') not in ('=', '!='): 
2433                     'Operator %s does not support string values!' % m.group('op')) 
2434             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') 
2435             quote = m.group('quote') 
2436             if quote is not None: 
2437                 comparison_value = comparison_value.replace(r'\%s' % quote, quote) 
2440                 comparison_value = int(m.group('intval')) 
2442                 comparison_value = parse_filesize(m.group('intval')) 
2443                 if comparison_value is None: 
2444                     comparison_value = parse_filesize(m.group('intval') + 'B') 
2445                 if comparison_value is None: 
2447                         'Invalid integer value %r in filter part %r' % ( 
2448                             m.group('intval'), filter_part)) 
2449         if actual_value is None: 
2450             return m.group('none_inclusive') 
2451         return op(actual_value, comparison_value) 
2454         '': lambda v: v is not None, 
2455         '!': lambda v: v is None, 
2457     operator_rex = re.compile(r'''(?x
)\s
* 
2458         (?P
<op
>%s)\s
*(?P
<key
>[a
-z_
]+) 
2460         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) 
2461     m = operator_rex.search(filter_part) 
2463         op = UNARY_OPERATORS[m.group('op')] 
2464         actual_value = dct.get(m.group('key')) 
2465         return op(actual_value) 
2467     raise ValueError('Invalid filter part %r' % filter_part) 
2470 def match_str(filter_str, dct): 
2471     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
2474         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) 
2477 def match_filter_func(filter_str): 
2478     def _match_func(info_dict): 
2479         if match_str(filter_str, info_dict): 
2482             video_title = info_dict.get('title', info_dict.get('id', 'video')) 
2483             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) 
2487 def parse_dfxp_time_expr(time_expr): 
2491     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) 
2493         return float(mobj.group('time_offset')) 
2495     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr) 
2497         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.')) 
2500 def srt_subtitles_timecode(seconds): 
2501     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) 
2504 def dfxp2srt(dfxp_data): 
2505     _x = functools.partial(xpath_with_ns, ns_map={ 
2506         'ttml': 'http://www.w3.org/ns/ttml', 
2507         'ttaf1': 'http://www.w3.org/2006/10/ttaf1', 
2508         'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1', 
2511     class TTMLPElementParser(object): 
2514         def start(self, tag, attrib): 
2515             if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): 
2521         def data(self, data): 
2525             return self.out.strip() 
2527     def parse_node(node): 
2528         target = TTMLPElementParser() 
2529         parser = xml.etree.ElementTree.XMLParser(target=target) 
2530         parser.feed(xml.etree.ElementTree.tostring(node)) 
2531         return parser.close() 
2533     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) 
2535     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p') 
2538         raise ValueError('Invalid dfxp/TTML subtitle') 
2540     for para, index in zip(paras, itertools.count(1)): 
2541         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) 
2542         end_time = parse_dfxp_time_expr(para.attrib.get('end')) 
2543         dur = parse_dfxp_time_expr(para.attrib.get('dur')) 
2544         if begin_time is None: 
2549             end_time = begin_time + dur 
2550         out.append('%d\n%s --> %s\n%s\n\n' % ( 
2552             srt_subtitles_timecode(begin_time), 
2553             srt_subtitles_timecode(end_time), 
2559 def cli_option(params, command_option, param): 
2560     param = params.get(param) 
2562         param = compat_str(param) 
2563     return [command_option, param] if param is not None else [] 
2566 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): 
2567     param = params.get(param) 
2568     assert isinstance(param, bool) 
2570         return [command_option + separator + (true_value if param else false_value)] 
2571     return [command_option, true_value if param else false_value] 
2574 def cli_valueless_option(params, command_option, param, expected_value=True): 
2575     param = params.get(param) 
2576     return [command_option] if param == expected_value else [] 
2579 def cli_configuration_args(params, param, default=[]): 
2580     ex_args = params.get(param) 
2583     assert isinstance(ex_args, list) 
2587 class ISO639Utils(object): 
2588     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 
2777     def short2long(cls, code): 
2778         """Convert language code from ISO 639-1 to ISO 639-2/T""" 
2779         return cls._lang_map.get(code[:2]) 
2782     def long2short(cls, code): 
2783         """Convert language code from ISO 639-2/T to ISO 639-1""" 
2784         for short_name, long_name in cls._lang_map.items(): 
2785             if long_name == code: 
2789 class ISO3166Utils(object): 
2790     # From http://data.okfn.org/data/core/country-list 
2792         'AF': 'Afghanistan', 
2793         'AX': 'Åland Islands', 
2796         'AS': 'American Samoa', 
2801         'AG': 'Antigua and Barbuda', 
2818         'BO': 'Bolivia, Plurinational State of', 
2819         'BQ': 'Bonaire, Sint Eustatius and Saba', 
2820         'BA': 'Bosnia and Herzegovina', 
2822         'BV': 'Bouvet Island', 
2824         'IO': 'British Indian Ocean Territory', 
2825         'BN': 'Brunei Darussalam', 
2827         'BF': 'Burkina Faso', 
2833         'KY': 'Cayman Islands', 
2834         'CF': 'Central African Republic', 
2838         'CX': 'Christmas Island', 
2839         'CC': 'Cocos (Keeling) Islands', 
2843         'CD': 'Congo, the Democratic Republic of the', 
2844         'CK': 'Cook Islands', 
2846         'CI': 'Côte d\'Ivoire', 
2851         'CZ': 'Czech Republic', 
2855         'DO': 'Dominican Republic', 
2858         'SV': 'El Salvador', 
2859         'GQ': 'Equatorial Guinea', 
2863         'FK': 'Falkland Islands (Malvinas)', 
2864         'FO': 'Faroe Islands', 
2868         'GF': 'French Guiana', 
2869         'PF': 'French Polynesia', 
2870         'TF': 'French Southern Territories', 
2885         'GW': 'Guinea-Bissau', 
2888         'HM': 'Heard Island and McDonald Islands', 
2889         'VA': 'Holy See (Vatican City State)', 
2896         'IR': 'Iran, Islamic Republic of', 
2899         'IM': 'Isle of Man', 
2909         'KP': 'Korea, Democratic People\'s Republic of', 
2910         'KR': 'Korea, Republic of', 
2913         'LA': 'Lao People\'s Democratic Republic', 
2919         'LI': 'Liechtenstein', 
2923         'MK': 'Macedonia, the Former Yugoslav Republic of', 
2930         'MH': 'Marshall Islands', 
2936         'FM': 'Micronesia, Federated States of', 
2937         'MD': 'Moldova, Republic of', 
2948         'NL': 'Netherlands', 
2949         'NC': 'New Caledonia', 
2950         'NZ': 'New Zealand', 
2955         'NF': 'Norfolk Island', 
2956         'MP': 'Northern Mariana Islands', 
2961         'PS': 'Palestine, State of', 
2963         'PG': 'Papua New Guinea', 
2966         'PH': 'Philippines', 
2970         'PR': 'Puerto Rico', 
2974         'RU': 'Russian Federation', 
2976         'BL': 'Saint Barthélemy', 
2977         'SH': 'Saint Helena, Ascension and Tristan da Cunha', 
2978         'KN': 'Saint Kitts and Nevis', 
2979         'LC': 'Saint Lucia', 
2980         'MF': 'Saint Martin (French part)', 
2981         'PM': 'Saint Pierre and Miquelon', 
2982         'VC': 'Saint Vincent and the Grenadines', 
2985         'ST': 'Sao Tome and Principe', 
2986         'SA': 'Saudi Arabia', 
2990         'SL': 'Sierra Leone', 
2992         'SX': 'Sint Maarten (Dutch part)', 
2995         'SB': 'Solomon Islands', 
2997         'ZA': 'South Africa', 
2998         'GS': 'South Georgia and the South Sandwich Islands', 
2999         'SS': 'South Sudan', 
3004         'SJ': 'Svalbard and Jan Mayen', 
3007         'CH': 'Switzerland', 
3008         'SY': 'Syrian Arab Republic', 
3009         'TW': 'Taiwan, Province of China', 
3011         'TZ': 'Tanzania, United Republic of', 
3013         'TL': 'Timor-Leste', 
3017         'TT': 'Trinidad and Tobago', 
3020         'TM': 'Turkmenistan', 
3021         'TC': 'Turks and Caicos Islands', 
3025         'AE': 'United Arab Emirates', 
3026         'GB': 'United Kingdom', 
3027         'US': 'United States', 
3028         'UM': 'United States Minor Outlying Islands', 
3032         'VE': 'Venezuela, Bolivarian Republic of', 
3034         'VG': 'Virgin Islands, British', 
3035         'VI': 'Virgin Islands, U.S.', 
3036         'WF': 'Wallis and Futuna', 
3037         'EH': 'Western Sahara', 
3044     def short2full(cls, code): 
3045         """Convert an ISO 3166-2 country code to the corresponding full name""" 
3046         return cls._country_map.get(code.upper()) 
3049 class GeoUtils(object): 
3050     # Major IPv4 address blocks per country 
3052         'AD': '85.94.160.0/19', 
3053         'AE': '94.200.0.0/13', 
3054         'AF': '149.54.0.0/17', 
3055         'AG': '209.59.64.0/18', 
3056         'AI': '204.14.248.0/21', 
3057         'AL': '46.99.0.0/16', 
3058         'AM': '46.70.0.0/15', 
3059         'AO': '105.168.0.0/13', 
3060         'AP': '159.117.192.0/21', 
3061         'AR': '181.0.0.0/12', 
3062         'AS': '202.70.112.0/20', 
3063         'AT': '84.112.0.0/13', 
3064         'AU': '1.128.0.0/11', 
3065         'AW': '181.41.0.0/18', 
3066         'AZ': '5.191.0.0/16', 
3067         'BA': '31.176.128.0/17', 
3068         'BB': '65.48.128.0/17', 
3069         'BD': '114.130.0.0/16', 
3071         'BF': '129.45.128.0/17', 
3072         'BG': '95.42.0.0/15', 
3073         'BH': '37.131.0.0/17', 
3074         'BI': '154.117.192.0/18', 
3075         'BJ': '137.255.0.0/16', 
3076         'BL': '192.131.134.0/24', 
3077         'BM': '196.12.64.0/18', 
3078         'BN': '156.31.0.0/16', 
3079         'BO': '161.56.0.0/16', 
3080         'BQ': '161.0.80.0/20', 
3081         'BR': '152.240.0.0/12', 
3082         'BS': '24.51.64.0/18', 
3083         'BT': '119.2.96.0/19', 
3084         'BW': '168.167.0.0/16', 
3085         'BY': '178.120.0.0/13', 
3086         'BZ': '179.42.192.0/18', 
3087         'CA': '99.224.0.0/11', 
3088         'CD': '41.243.0.0/16', 
3089         'CF': '196.32.200.0/21', 
3090         'CG': '197.214.128.0/17', 
3091         'CH': '85.0.0.0/13', 
3092         'CI': '154.232.0.0/14', 
3093         'CK': '202.65.32.0/19', 
3094         'CL': '152.172.0.0/14', 
3095         'CM': '165.210.0.0/15', 
3096         'CN': '36.128.0.0/10', 
3097         'CO': '181.240.0.0/12', 
3098         'CR': '201.192.0.0/12', 
3099         'CU': '152.206.0.0/15', 
3100         'CV': '165.90.96.0/19', 
3101         'CW': '190.88.128.0/17', 
3102         'CY': '46.198.0.0/15', 
3103         'CZ': '88.100.0.0/14', 
3105         'DJ': '197.241.0.0/17', 
3106         'DK': '87.48.0.0/12', 
3107         'DM': '192.243.48.0/20', 
3108         'DO': '152.166.0.0/15', 
3109         'DZ': '41.96.0.0/12', 
3110         'EC': '186.68.0.0/15', 
3111         'EE': '90.190.0.0/15', 
3112         'EG': '156.160.0.0/11', 
3113         'ER': '196.200.96.0/20', 
3114         'ES': '88.0.0.0/11', 
3115         'ET': '196.188.0.0/14', 
3116         'EU': '2.16.0.0/13', 
3117         'FI': '91.152.0.0/13', 
3118         'FJ': '144.120.0.0/16', 
3119         'FM': '119.252.112.0/20', 
3120         'FO': '88.85.32.0/19', 
3122         'GA': '41.158.0.0/15', 
3124         'GD': '74.122.88.0/21', 
3125         'GE': '31.146.0.0/16', 
3126         'GF': '161.22.64.0/18', 
3127         'GG': '62.68.160.0/19', 
3128         'GH': '45.208.0.0/14', 
3129         'GI': '85.115.128.0/19', 
3130         'GL': '88.83.0.0/19', 
3131         'GM': '160.182.0.0/15', 
3132         'GN': '197.149.192.0/18', 
3133         'GP': '104.250.0.0/19', 
3134         'GQ': '105.235.224.0/20', 
3135         'GR': '94.64.0.0/13', 
3136         'GT': '168.234.0.0/16', 
3137         'GU': '168.123.0.0/16', 
3138         'GW': '197.214.80.0/20', 
3139         'GY': '181.41.64.0/18', 
3140         'HK': '113.252.0.0/14', 
3141         'HN': '181.210.0.0/16', 
3142         'HR': '93.136.0.0/13', 
3143         'HT': '148.102.128.0/17', 
3144         'HU': '84.0.0.0/14', 
3145         'ID': '39.192.0.0/10', 
3146         'IE': '87.32.0.0/12', 
3147         'IL': '79.176.0.0/13', 
3148         'IM': '5.62.80.0/20', 
3149         'IN': '117.192.0.0/10', 
3150         'IO': '203.83.48.0/21', 
3151         'IQ': '37.236.0.0/14', 
3152         'IR': '2.176.0.0/12', 
3153         'IS': '82.221.0.0/16', 
3154         'IT': '79.0.0.0/10', 
3155         'JE': '87.244.64.0/18', 
3156         'JM': '72.27.0.0/17', 
3157         'JO': '176.29.0.0/16', 
3158         'JP': '126.0.0.0/8', 
3159         'KE': '105.48.0.0/12', 
3160         'KG': '158.181.128.0/17', 
3161         'KH': '36.37.128.0/17', 
3162         'KI': '103.25.140.0/22', 
3163         'KM': '197.255.224.0/20', 
3164         'KN': '198.32.32.0/19', 
3165         'KP': '175.45.176.0/22', 
3166         'KR': '175.192.0.0/10', 
3167         'KW': '37.36.0.0/14', 
3168         'KY': '64.96.0.0/15', 
3169         'KZ': '2.72.0.0/13', 
3170         'LA': '115.84.64.0/18', 
3171         'LB': '178.135.0.0/16', 
3172         'LC': '192.147.231.0/24', 
3173         'LI': '82.117.0.0/19', 
3174         'LK': '112.134.0.0/15', 
3175         'LR': '41.86.0.0/19', 
3176         'LS': '129.232.0.0/17', 
3177         'LT': '78.56.0.0/13', 
3178         'LU': '188.42.0.0/16', 
3179         'LV': '46.109.0.0/16', 
3180         'LY': '41.252.0.0/14', 
3181         'MA': '105.128.0.0/11', 
3182         'MC': '88.209.64.0/18', 
3183         'MD': '37.246.0.0/16', 
3184         'ME': '178.175.0.0/17', 
3185         'MF': '74.112.232.0/21', 
3186         'MG': '154.126.0.0/17', 
3187         'MH': '117.103.88.0/21', 
3188         'MK': '77.28.0.0/15', 
3189         'ML': '154.118.128.0/18', 
3190         'MM': '37.111.0.0/17', 
3191         'MN': '49.0.128.0/17', 
3192         'MO': '60.246.0.0/16', 
3193         'MP': '202.88.64.0/20', 
3194         'MQ': '109.203.224.0/19', 
3195         'MR': '41.188.64.0/18', 
3196         'MS': '208.90.112.0/22', 
3197         'MT': '46.11.0.0/16', 
3198         'MU': '105.16.0.0/12', 
3199         'MV': '27.114.128.0/18', 
3200         'MW': '105.234.0.0/16', 
3201         'MX': '187.192.0.0/11', 
3202         'MY': '175.136.0.0/13', 
3203         'MZ': '197.218.0.0/15', 
3204         'NA': '41.182.0.0/16', 
3205         'NC': '101.101.0.0/18', 
3206         'NE': '197.214.0.0/18', 
3207         'NF': '203.17.240.0/22', 
3208         'NG': '105.112.0.0/12', 
3209         'NI': '186.76.0.0/15', 
3210         'NL': '145.96.0.0/11', 
3211         'NO': '84.208.0.0/13', 
3212         'NP': '36.252.0.0/15', 
3213         'NR': '203.98.224.0/19', 
3214         'NU': '49.156.48.0/22', 
3215         'NZ': '49.224.0.0/14', 
3216         'OM': '5.36.0.0/15', 
3217         'PA': '186.72.0.0/15', 
3218         'PE': '186.160.0.0/14', 
3219         'PF': '123.50.64.0/18', 
3220         'PG': '124.240.192.0/19', 
3221         'PH': '49.144.0.0/13', 
3222         'PK': '39.32.0.0/11', 
3223         'PL': '83.0.0.0/11', 
3224         'PM': '70.36.0.0/20', 
3225         'PR': '66.50.0.0/16', 
3226         'PS': '188.161.0.0/16', 
3227         'PT': '85.240.0.0/13', 
3228         'PW': '202.124.224.0/20', 
3229         'PY': '181.120.0.0/14', 
3230         'QA': '37.210.0.0/15', 
3231         'RE': '139.26.0.0/16', 
3232         'RO': '79.112.0.0/13', 
3233         'RS': '178.220.0.0/14', 
3234         'RU': '5.136.0.0/13', 
3235         'RW': '105.178.0.0/15', 
3236         'SA': '188.48.0.0/13', 
3237         'SB': '202.1.160.0/19', 
3238         'SC': '154.192.0.0/11', 
3239         'SD': '154.96.0.0/13', 
3240         'SE': '78.64.0.0/12', 
3241         'SG': '152.56.0.0/14', 
3242         'SI': '188.196.0.0/14', 
3243         'SK': '78.98.0.0/15', 
3244         'SL': '197.215.0.0/17', 
3245         'SM': '89.186.32.0/19', 
3246         'SN': '41.82.0.0/15', 
3247         'SO': '197.220.64.0/19', 
3248         'SR': '186.179.128.0/17', 
3249         'SS': '105.235.208.0/21', 
3250         'ST': '197.159.160.0/19', 
3251         'SV': '168.243.0.0/16', 
3252         'SX': '190.102.0.0/20', 
3254         'SZ': '41.84.224.0/19', 
3255         'TC': '65.255.48.0/20', 
3256         'TD': '154.68.128.0/19', 
3257         'TG': '196.168.0.0/14', 
3258         'TH': '171.96.0.0/13', 
3259         'TJ': '85.9.128.0/18', 
3260         'TK': '27.96.24.0/21', 
3261         'TL': '180.189.160.0/20', 
3262         'TM': '95.85.96.0/19', 
3263         'TN': '197.0.0.0/11', 
3264         'TO': '175.176.144.0/21', 
3265         'TR': '78.160.0.0/11', 
3266         'TT': '186.44.0.0/15', 
3267         'TV': '202.2.96.0/19', 
3268         'TW': '120.96.0.0/11', 
3269         'TZ': '156.156.0.0/14', 
3270         'UA': '93.72.0.0/13', 
3271         'UG': '154.224.0.0/13', 
3273         'UY': '167.56.0.0/13', 
3274         'UZ': '82.215.64.0/18', 
3275         'VA': '212.77.0.0/19', 
3276         'VC': '24.92.144.0/20', 
3277         'VE': '186.88.0.0/13', 
3278         'VG': '172.103.64.0/18', 
3279         'VI': '146.226.0.0/16', 
3280         'VN': '14.160.0.0/11', 
3281         'VU': '202.80.32.0/20', 
3282         'WF': '117.20.32.0/21', 
3283         'WS': '202.4.32.0/19', 
3284         'YE': '134.35.0.0/16', 
3285         'YT': '41.242.116.0/22', 
3286         'ZA': '41.0.0.0/11', 
3287         'ZM': '165.56.0.0/13', 
3288         'ZW': '41.85.192.0/19', 
3292     def random_ipv4(cls, code): 
3293         block = cls._country_ip_map.get(code.upper()) 
3296         addr, preflen = block.split('/') 
3297         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] 
3298         addr_max = addr_min | (0xffffffff >> int(preflen)) 
3299         return compat_str(socket.inet_ntoa( 
3300             compat_struct_pack('!L', random.randint(addr_min, addr_max)))) 
3303 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): 
3304     def __init__(self, proxies=None): 
3305         # Set default handlers 
3306         for type in ('http', 'https'): 
3307             setattr(self, '%s_open' % type, 
3308                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: 
3309                         meth(r, proxy, type)) 
3310         return compat_urllib_request.ProxyHandler.__init__(self, proxies) 
3312     def proxy_open(self, req, proxy, type): 
3313         req_proxy = req.headers.get('Ytdl-request-proxy') 
3314         if req_proxy is not None: 
3316             del req.headers['Ytdl-request-proxy'] 
3318         if proxy == '__noproxy__': 
3319             return None  # No Proxy 
3320         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 
3321             req.add_header('Ytdl-socks-proxy', proxy) 
3322             # youtube-dl's http/https handlers do wrapping the socket with socks 
3324         return compat_urllib_request.ProxyHandler.proxy_open( 
3325             self, req, proxy, type) 
3328 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is 
3329 # released into Public Domain 
3330 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 
3332 def long_to_bytes(n, blocksize=0): 
3333     """long_to_bytes(n:long, blocksize:int) : string 
3334     Convert a long integer to a byte string. 
3336     If optional blocksize is given and greater than zero, pad the front of the 
3337     byte string with binary zeros so that the length is a multiple of 
3340     # after much testing, this algorithm was deemed to be the fastest 
3344         s = compat_struct_pack('>I', n & 0xffffffff) + s 
3346     # strip off leading zeros 
3347     for i in range(len(s)): 
3348         if s[i] != b'\000'[0]: 
3351         # only happens when n == 0 
3355     # add back some pad bytes.  this could be done more efficiently w.r.t. the 
3356     # de-padding being done above, but sigh... 
3357     if blocksize > 0 and len(s) % blocksize: 
3358         s = (blocksize - len(s) % blocksize) * b'\000' + s 
3362 def bytes_to_long(s): 
3363     """bytes_to_long(string) : long 
3364     Convert a byte string to a long integer. 
3366     This is (essentially) the inverse of long_to_bytes(). 
3371         extra = (4 - length % 4) 
3372         s = b'\000' * extra + s 
3373         length = length + extra 
3374     for i in range(0, length, 4): 
3375         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0] 
3379 def ohdave_rsa_encrypt(data, exponent, modulus): 
3381     Implement OHDave
's RSA algorithm. See http://www.ohdave.com/rsa/ 
3384         data: data to encrypt, bytes-like object 
3385         exponent, modulus: parameter e and N of RSA algorithm, both integer 
3386     Output: hex string of encrypted data 
3388     Limitation: supports one block encryption only 
3391     payload = int(binascii.hexlify(data[::-1]), 16) 
3392     encrypted = pow(payload, exponent, modulus) 
3393     return '%x' % encrypted 
3396 def pkcs1pad(data, length): 
3398     Padding input data with PKCS#1 scheme 
3400     @param {int[]} data        input data 
3401     @param {int}   length      target length 
3402     @returns {int[]}           padded data 
3404     if len(data) > length - 11: 
3405         raise ValueError('Input data too 
long for PKCS
#1 padding') 
3407     pseudo_random 
= [random
.randint(0, 254) for _ 
in range(length 
- len(data
) - 3)] 
3408     return [0, 2] + pseudo_random 
+ [0] + data
 
3411 def encode_base_n(num
, n
, table
=None): 
3412     FULL_TABLE 
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 
3414         table 
= FULL_TABLE
[:n
] 
3417         raise ValueError('base %d exceeds table length %d' % (n
, len(table
))) 
3424         ret 
= table
[num 
% n
] + ret
 
3429 def decode_packed_codes(code
): 
3430     mobj 
= re
.search(PACKED_CODES_RE
, code
) 
3431     obfucasted_code
, base
, count
, symbols 
= mobj
.groups() 
3434     symbols 
= symbols
.split('|') 
3439         base_n_count 
= encode_base_n(count
, base
) 
3440         symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
 
3443         r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)], 
3447 def parse_m3u8_attributes(attrib
): 
3449     for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
): 
3450         if val
.startswith('"'): 
3456 def urshift(val
, n
): 
3457     return val 
>> n 
if val 
>= 0 else (val 
+ 0x100000000) >> n
 
3460 # Based on png2str() written by @gdkchan and improved by @yokrysty 
3461 # Originally posted at https://github.com/rg3/youtube-dl/issues/9706 
3462 def decode_png(png_data
): 
3463     # Reference: https://www.w3.org/TR/PNG/ 
3464     header 
= png_data
[8:] 
3466     if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR': 
3467         raise IOError('Not a valid PNG file.') 
3469     int_map 
= {1: '>B', 2: '>H', 4: '>I'} 
3470     unpack_integer 
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0] 
3475         length 
= unpack_integer(header
[:4]) 
3478         chunk_type 
= header
[:4] 
3481         chunk_data 
= header
[:length
] 
3482         header 
= header
[length
:] 
3484         header 
= header
[4:]  # Skip CRC 
3492     ihdr 
= chunks
[0]['data'] 
3494     width 
= unpack_integer(ihdr
[:4]) 
3495     height 
= unpack_integer(ihdr
[4:8]) 
3499     for chunk 
in chunks
: 
3500         if chunk
['type'] == b
'IDAT': 
3501             idat 
+= chunk
['data'] 
3504         raise IOError('Unable to read PNG data.') 
3506     decompressed_data 
= bytearray(zlib
.decompress(idat
)) 
3511     def _get_pixel(idx
): 
3516     for y 
in range(height
): 
3517         basePos 
= y 
* (1 + stride
) 
3518         filter_type 
= decompressed_data
[basePos
] 
3522         pixels
.append(current_row
) 
3524         for x 
in range(stride
): 
3525             color 
= decompressed_data
[1 + basePos 
+ x
] 
3526             basex 
= y 
* stride 
+ x
 
3531                 left 
= _get_pixel(basex 
- 3) 
3533                 up 
= _get_pixel(basex 
- stride
) 
3535             if filter_type 
== 1:  # Sub 
3536                 color 
= (color 
+ left
) & 0xff 
3537             elif filter_type 
== 2:  # Up 
3538                 color 
= (color 
+ up
) & 0xff 
3539             elif filter_type 
== 3:  # Average 
3540                 color 
= (color 
+ ((left 
+ up
) >> 1)) & 0xff 
3541             elif filter_type 
== 4:  # Paeth 
3547                     c 
= _get_pixel(basex 
- stride 
- 3) 
3555                 if pa 
<= pb 
and pa 
<= pc
: 
3556                     color 
= (color 
+ a
) & 0xff 
3558                     color 
= (color 
+ b
) & 0xff 
3560                     color 
= (color 
+ c
) & 0xff 
3562             current_row
.append(color
) 
3564     return width
, height
, pixels
 
3567 def write_xattr(path
, key
, value
): 
3568     # This mess below finds the best xattr tool for the job 
3570         # try the pyxattr module... 
3573         if hasattr(xattr
, 'set'):  # pyxattr 
3574             # Unicode arguments are not supported in python-pyxattr until 
3576             # See https://github.com/rg3/youtube-dl/issues/5498 
3577             pyxattr_required_version 
= '0.5.0' 
3578             if version_tuple(xattr
.__version
__) < version_tuple(pyxattr_required_version
): 
3579                 # TODO: fallback to CLI tools 
3580                 raise XAttrUnavailableError( 
3581                     'python-pyxattr is detected but is too old. ' 
3582                     'youtube-dl requires %s or above while your version is %s. ' 
3583                     'Falling back to other xattr implementations' % ( 
3584                         pyxattr_required_version
, xattr
.__version
__)) 
3586             setxattr 
= xattr
.set 
3588             setxattr 
= xattr
.setxattr
 
3591             setxattr(path
, key
, value
) 
3592         except EnvironmentError as e
: 
3593             raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3596         if compat_os_name 
== 'nt': 
3597             # Write xattrs to NTFS Alternate Data Streams: 
3598             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 
3599             assert ':' not in key
 
3600             assert os
.path
.exists(path
) 
3602             ads_fn 
= path 
+ ':' + key
 
3604                 with open(ads_fn
, 'wb') as f
: 
3606             except EnvironmentError as e
: 
3607                 raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3609             user_has_setfattr 
= check_executable('setfattr', ['--version']) 
3610             user_has_xattr 
= check_executable('xattr', ['-h']) 
3612             if user_has_setfattr 
or user_has_xattr
: 
3614                 value 
= value
.decode('utf-8') 
3615                 if user_has_setfattr
: 
3616                     executable 
= 'setfattr' 
3617                     opts 
= ['-n', key
, '-v', value
] 
3618                 elif user_has_xattr
: 
3619                     executable 
= 'xattr' 
3620                     opts 
= ['-w', key
, value
] 
3622                 cmd 
= ([encodeFilename(executable
, True)] + 
3623                        [encodeArgument(o
) for o 
in opts
] + 
3624                        [encodeFilename(path
, True)]) 
3627                     p 
= subprocess
.Popen( 
3628                         cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
) 
3629                 except EnvironmentError as e
: 
3630                     raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3631                 stdout
, stderr 
= p
.communicate() 
3632                 stderr 
= stderr
.decode('utf-8', 'replace') 
3633                 if p
.returncode 
!= 0: 
3634                     raise XAttrMetadataError(p
.returncode
, stderr
) 
3637                 # On Unix, and can't find pyxattr, setfattr, or xattr. 
3638                 if sys
.platform
.startswith('linux'): 
3639                     raise XAttrUnavailableError( 
3640                         "Couldn't find a tool to set the xattrs. " 
3641                         "Install either the python 'pyxattr' or 'xattr' " 
3642                         "modules, or the GNU 'attr' package " 
3643                         "(which contains the 'setfattr' tool).") 
3645                     raise XAttrUnavailableError( 
3646                         "Couldn't find a tool to set the xattrs. " 
3647                         "Install either the python 'xattr' module, " 
3648                         "or the 'xattr' binary.")