4 from __future__ 
import unicode_literals
 
  34 import xml
.etree
.ElementTree
 
  38     compat_HTMLParseError
, 
  43     compat_ctypes_WINFUNCTYPE
, 
  44     compat_etree_fromstring
, 
  47     compat_html_entities_html5
, 
  58     compat_urllib_parse_urlencode
, 
  59     compat_urllib_parse_urlparse
, 
  60     compat_urllib_parse_unquote_plus
, 
  61     compat_urllib_request
, 
  72 def register_socks_protocols(): 
  73     # "Register" SOCKS protocols 
  74     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 
  75     # URLs with protocols not in urlparse.uses_netloc are not handled correctly 
  76     for scheme 
in ('socks', 'socks4', 'socks4a', 'socks5'): 
  77         if scheme 
not in compat_urlparse
.uses_netloc
: 
  78             compat_urlparse
.uses_netloc
.append(scheme
) 
  81 # This is not clearly defined otherwise 
  82 compiled_regex_type 
= type(re
.compile('')) 
  85     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0', 
  86     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  87     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  88     'Accept-Encoding': 'gzip, deflate', 
  89     'Accept-Language': 'en-us,en;q=0.5', 
  94     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', 
 100 ENGLISH_MONTH_NAMES 
= [ 
 101     'January', 'February', 'March', 'April', 'May', 'June', 
 102     'July', 'August', 'September', 'October', 'November', 'December'] 
 105     'en': ENGLISH_MONTH_NAMES
, 
 107         'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 
 108         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], 
 112     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 
 113     'flv', 'f4v', 'f4a', 'f4b', 
 114     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 
 115     'mkv', 'mka', 'mk3d', 
 124     'f4f', 'f4m', 'm3u8', 'smil') 
 126 # needed for sanitizing filenames in restricted mode 
 127 ACCENT_CHARS 
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', 
 128                         itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], 
 129                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 
 152     '%Y-%m-%d %H:%M:%S.%f', 
 155     '%Y-%m-%dT%H:%M:%SZ', 
 156     '%Y-%m-%dT%H:%M:%S.%fZ', 
 157     '%Y-%m-%dT%H:%M:%S.%f0Z', 
 159     '%Y-%m-%dT%H:%M:%S.%f', 
 162     '%b %d %Y at %H:%M:%S', 
 164     '%B %d %Y at %H:%M:%S', 
 167 DATE_FORMATS_DAY_FIRST 
= list(DATE_FORMATS
) 
 168 DATE_FORMATS_DAY_FIRST
.extend([ 
 177 DATE_FORMATS_MONTH_FIRST 
= list(DATE_FORMATS
) 
 178 DATE_FORMATS_MONTH_FIRST
.extend([ 
 186 PACKED_CODES_RE 
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" 
 187 JSON_LD_RE 
= r
'(?is)<script[^>]+type=(["\'])application
/ld\
+json\
1[^
>]*>(?P
<json_ld
>.+?
)</script
>' 
 190 def preferredencoding(): 
 191     """Get preferred encoding. 
 193     Returns the best encoding scheme for the system, based on 
 194     locale.getpreferredencoding() and some further tweaks. 
 197         pref = locale.getpreferredencoding() 
 205 def write_json_file(obj, fn): 
 206     """ Encode obj as JSON and write it to fn, atomically if possible """ 
 208     fn = encodeFilename(fn) 
 209     if sys.version_info < (3, 0) and sys.platform != 'win32
': 
 210         encoding = get_filesystem_encoding() 
 211         # os.path.basename returns a bytes object, but NamedTemporaryFile 
 212         # will fail if the filename contains non ascii characters unless we 
 213         # use a unicode object 
 214         path_basename = lambda f: os.path.basename(fn).decode(encoding) 
 215         # the same for os.path.dirname 
 216         path_dirname = lambda f: os.path.dirname(fn).decode(encoding) 
 218         path_basename = os.path.basename 
 219         path_dirname = os.path.dirname 
 223         'prefix
': path_basename(fn) + '.', 
 224         'dir': path_dirname(fn), 
 228     # In Python 2.x, json.dump expects a bytestream. 
 229     # In Python 3.x, it writes to a character stream 
 230     if sys.version_info < (3, 0): 
 238     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args)) 
 243         if sys.platform == 'win32
': 
 244             # Need to remove existing file on Windows, else os.rename raises 
 245             # WindowsError or FileExistsError. 
 250         os.rename(tf.name, fn) 
 259 if sys.version_info >= (2, 7): 
 260     def find_xpath_attr(node, xpath, key, val=None): 
 261         """ Find the xpath xpath[@key=val] """ 
 262         assert re.match(r'^
[a
-zA
-Z_
-]+$
', key) 
 263         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) 
 264         return node.find(expr) 
 266     def find_xpath_attr(node, xpath, key, val=None): 
 267         for f in node.findall(compat_xpath(xpath)): 
 268             if key not in f.attrib: 
 270             if val is None or f.attrib.get(key) == val: 
 274 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 
 275 # the namespace parameter 
 278 def xpath_with_ns(path
, ns_map
): 
 279     components 
= [c
.split(':') for c 
in path
.split('/')] 
 283             replaced
.append(c
[0]) 
 286             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 287     return '/'.join(replaced
) 
 290 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 291     def _find_xpath(xpath
): 
 292         return node
.find(compat_xpath(xpath
)) 
 294     if isinstance(xpath
, (str, compat_str
)): 
 295         n 
= _find_xpath(xpath
) 
 303         if default 
is not NO_DEFAULT
: 
 306             name 
= xpath 
if name 
is None else name
 
 307             raise ExtractorError('Could not find XML element %s' % name
) 
 313 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 314     n 
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
) 
 315     if n 
is None or n 
== default
: 
 318         if default 
is not NO_DEFAULT
: 
 321             name 
= xpath 
if name 
is None else name
 
 322             raise ExtractorError('Could not find XML element\'s text %s' % name
) 
 328 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 329     n 
= find_xpath_attr(node
, xpath
, key
) 
 331         if default 
is not NO_DEFAULT
: 
 334             name 
= '%s[@%s]' % (xpath
, key
) if name 
is None else name
 
 335             raise ExtractorError('Could not find XML attribute %s' % name
) 
 341 def get_element_by_id(id, html
): 
 342     """Return the content of the tag with the specified ID in the passed HTML document""" 
 343     return get_element_by_attribute('id', id, html
) 
 346 def get_element_by_class(class_name
, html
): 
 347     """Return the content of the first tag with the specified class in the passed HTML document""" 
 348     retval 
= get_elements_by_class(class_name
, html
) 
 349     return retval
[0] if retval 
else None 
 352 def get_element_by_attribute(attribute
, value
, html
, escape_value
=True): 
 353     retval 
= get_elements_by_attribute(attribute
, value
, html
, escape_value
) 
 354     return retval
[0] if retval 
else None 
 357 def get_elements_by_class(class_name
, html
): 
 358     """Return the content of all tags with the specified class in the passed HTML document as a list""" 
 359     return get_elements_by_attribute( 
 360         'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 
 361         html, escape_value=False) 
 364 def get_elements_by_attribute(attribute, value, html, escape_value=True): 
 365     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 367     value = re.escape(value) if escape_value else value 
 370     for m in re.finditer(r'''(?xs) 
 372          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'|
))*?
 
 374          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'|))*? 
 378     ''' % (re.escape(attribute), value), html): 
 379         res = m.group('content
') 
 381         if res.startswith('"') or res.startswith("'"): 
 384         retlist.append(unescapeHTML(res)) 
 389 class HTMLAttributeParser(compat_HTMLParser): 
 390     """Trivial HTML parser to gather the attributes for a single element""" 
 393         compat_HTMLParser.__init__(self) 
 395     def handle_starttag(self, tag, attrs): 
 396         self.attrs = dict(attrs) 
 399 def extract_attributes(html_element): 
 400     """Given a string for an HTML element such as 
 402          a="foo" B="bar" c="&98;az" d=boz 
 403          empty= noval entity="&" 
 406     Decode and return a dictionary of attributes. 
 408         'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
', 
 409         'empty
': '', 'noval
': None, 'entity
': '&', 
 410         'sq
': '"', 'dq': '\'' 
 412     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 
 413     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 
 415     parser = HTMLAttributeParser() 
 417         parser.feed(html_element) 
 419     # Older Python may throw HTMLParseError in case of malformed HTML 
 420     except compat_HTMLParseError: 
 425 def clean_html(html): 
 426     """Clean an HTML snippet into a readable string""" 
 428     if html is None:  # Convenience for sanitizing descriptions etc. 
 432     html = html.replace('\n', ' ') 
 433     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) 
 434     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 
 436     html = re.sub('<.*?>', '', html) 
 437     # Replace html entities 
 438     html = unescapeHTML(html) 
 442 def sanitize_open(filename, open_mode): 
 443     """Try to open the given filename, and slightly tweak it if this fails. 
 445     Attempts to open the given filename. If this fails, it tries to change 
 446     the filename slightly, step by step, until it's either able to open it 
 447     or it fails and raises a final exception, like the standard open() 
 450     It returns the tuple (stream, definitive_file_name). 
 454             if sys.platform == 'win32': 
 456                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 
 457             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 
 458         stream = open(encodeFilename(filename), open_mode) 
 459         return (stream, filename) 
 460     except (IOError, OSError) as err: 
 461         if err.errno in (errno.EACCES,): 
 464         # In case of error, try to remove win32 forbidden chars 
 465         alt_filename = sanitize_path(filename) 
 466         if alt_filename == filename: 
 469             # An exception here should be caught in the caller 
 470             stream = open(encodeFilename(alt_filename), open_mode) 
 471             return (stream, alt_filename) 
 474 def timeconvert(timestr): 
 475     """Convert RFC 2822 defined time string into system timestamp""" 
 477     timetuple = email.utils.parsedate_tz(timestr) 
 478     if timetuple is not None: 
 479         timestamp = email.utils.mktime_tz(timetuple) 
 483 def sanitize_filename(s, restricted=False, is_id=False): 
 484     """Sanitizes a string so it could be used as part of a filename. 
 485     If restricted is set, use a stricter subset of allowed characters. 
 486     Set is_id if this is not an arbitrary string, but an ID that should be kept 
 489     def replace_insane(char): 
 490         if restricted and char in ACCENT_CHARS: 
 491             return ACCENT_CHARS[char] 
 492         if char == '?' or ord(char) < 32 or ord(char) == 127: 
 495             return '' if restricted else '\'' 
 497             return '_
-' if restricted else ' -' 
 498         elif char in '\\/|
*<>': 
 500         if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()): 
 502         if restricted 
and ord(char
) > 127: 
 507     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 508     result 
= ''.join(map(replace_insane
, s
)) 
 510         while '__' in result
: 
 511             result 
= result
.replace('__', '_') 
 512         result 
= result
.strip('_') 
 513         # Common case of "Foreign band name - English song title" 
 514         if restricted 
and result
.startswith('-_'): 
 516         if result
.startswith('-'): 
 517             result 
= '_' + result
[len('-'):] 
 518         result 
= result
.lstrip('.') 
 524 def sanitize_path(s
): 
 525     """Sanitizes and normalizes path on Windows""" 
 526     if sys
.platform 
!= 'win32': 
 528     drive_or_unc
, _ 
= os
.path
.splitdrive(s
) 
 529     if sys
.version_info 
< (2, 7) and not drive_or_unc
: 
 530         drive_or_unc
, _ 
= os
.path
.splitunc(s
) 
 531     norm_path 
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
) 
 535         path_part 
if path_part 
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
) 
 536         for path_part 
in norm_path
] 
 538         sanitized_path
.insert(0, drive_or_unc 
+ os
.path
.sep
) 
 539     return os
.path
.join(*sanitized_path
) 
 542 def sanitize_url(url
): 
 543     # Prepend protocol-less URLs with `http:` scheme in order to mitigate 
 544     # the number of unwanted failures due to missing protocol 
 545     if url
.startswith('//'): 
 546         return 'http:%s' % url
 
 547     # Fix some common typos seen so far 
 549         # https://github.com/rg3/youtube-dl/issues/15649 
 550         (r
'^httpss://', r
'https://'), 
 551         # https://bx1.be/lives/direct-tv/ 
 552         (r
'^rmtp([es]?)://', r
'rtmp\1://'), 
 554     for mistake
, fixup 
in COMMON_TYPOS
: 
 555         if re
.match(mistake
, url
): 
 556             return re
.sub(mistake
, fixup
, url
) 
 560 def sanitized_Request(url
, *args
, **kwargs
): 
 561     return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
) 
 565     """Expand shell variables and ~""" 
 566     return os
.path
.expandvars(compat_expanduser(s
)) 
 569 def orderedSet(iterable
): 
 570     """ Remove all duplicates from the input iterable """ 
 578 def _htmlentity_transform(entity_with_semicolon
): 
 579     """Transforms an HTML entity to a character.""" 
 580     entity 
= entity_with_semicolon
[:-1] 
 582     # Known non-numeric HTML entity 
 583     if entity 
in compat_html_entities
.name2codepoint
: 
 584         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 586     # TODO: HTML5 allows entities without a semicolon. For example, 
 587     # 'Éric' should be decoded as 'Éric'. 
 588     if entity_with_semicolon 
in compat_html_entities_html5
: 
 589         return compat_html_entities_html5
[entity_with_semicolon
] 
 591     mobj 
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
) 
 593         numstr 
= mobj
.group(1) 
 594         if numstr
.startswith('x'): 
 596             numstr 
= '0%s' % numstr
 
 599         # See https://github.com/rg3/youtube-dl/issues/7518 
 601             return compat_chr(int(numstr
, base
)) 
 605     # Unknown entity in name, return its literal representation 
 606     return '&%s;' % entity
 
 612     assert type(s
) == compat_str
 
 615         r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 618 def get_subprocess_encoding(): 
 619     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 620         # For subprocess calls, encode with locale encoding 
 621         # Refer to http://stackoverflow.com/a/9951851/35070 
 622         encoding 
= preferredencoding() 
 624         encoding 
= sys
.getfilesystemencoding() 
 630 def encodeFilename(s
, for_subprocess
=False): 
 632     @param s The name of the file 
 635     assert type(s
) == compat_str
 
 637     # Python 3 has a Unicode API 
 638     if sys
.version_info 
>= (3, 0): 
 641     # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 642     # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 643     # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 644     if not for_subprocess 
and sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 647     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 
 648     if sys
.platform
.startswith('java'): 
 651     return s
.encode(get_subprocess_encoding(), 'ignore') 
 654 def decodeFilename(b
, for_subprocess
=False): 
 656     if sys
.version_info 
>= (3, 0): 
 659     if not isinstance(b
, bytes): 
 662     return b
.decode(get_subprocess_encoding(), 'ignore') 
 665 def encodeArgument(s
): 
 666     if not isinstance(s
, compat_str
): 
 667         # Legacy code that uses byte strings 
 668         # Uncomment the following line after fixing all post processors 
 669         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 670         s 
= s
.decode('ascii') 
 671     return encodeFilename(s
, True) 
 674 def decodeArgument(b
): 
 675     return decodeFilename(b
, True) 
 678 def decodeOption(optval
): 
 681     if isinstance(optval
, bytes): 
 682         optval 
= optval
.decode(preferredencoding()) 
 684     assert isinstance(optval
, compat_str
) 
 688 def formatSeconds(secs
): 
 690         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 692         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 697 def make_HTTPS_handler(params
, **kwargs
): 
 698     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 699     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 700         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 701         if opts_no_check_certificate
: 
 702             context
.check_hostname 
= False 
 703             context
.verify_mode 
= ssl
.CERT_NONE
 
 705             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 708             # (create_default_context present but HTTPSHandler has no context=) 
 711     if sys
.version_info 
< (3, 2): 
 712         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 714         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 715         context
.verify_mode 
= (ssl
.CERT_NONE
 
 716                                if opts_no_check_certificate
 
 717                                else ssl
.CERT_REQUIRED
) 
 718         context
.set_default_verify_paths() 
 719         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 722 def bug_reports_message(): 
 723     if ytdl_is_updateable(): 
 724         update_cmd 
= 'type  youtube-dl -U  to update' 
 726         update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 727     msg 
= '; please report this issue on https://yt-dl.org/bug .' 
 728     msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 729     msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 733 class YoutubeDLError(Exception): 
 734     """Base exception for YoutubeDL errors.""" 
 738 class ExtractorError(YoutubeDLError
): 
 739     """Error during info extraction.""" 
 741     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 742         """ tb, if given, is the original traceback (so that it can be printed out). 
 743         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 746         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 748         if video_id 
is not None: 
 749             msg 
= video_id 
+ ': ' + msg
 
 751             msg 
+= ' (caused by %r)' % cause
 
 753             msg 
+= bug_reports_message() 
 754         super(ExtractorError
, self
).__init
__(msg
) 
 757         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 759         self
.video_id 
= video_id
 
 761     def format_traceback(self
): 
 762         if self
.traceback 
is None: 
 764         return ''.join(traceback
.format_tb(self
.traceback
)) 
 767 class UnsupportedError(ExtractorError
): 
 768     def __init__(self
, url
): 
 769         super(UnsupportedError
, self
).__init
__( 
 770             'Unsupported URL: %s' % url
, expected
=True) 
 774 class RegexNotFoundError(ExtractorError
): 
 775     """Error when a regex didn't match""" 
 779 class GeoRestrictedError(ExtractorError
): 
 780     """Geographic restriction Error exception. 
 782     This exception may be thrown when a video is not available from your 
 783     geographic location due to geographic restrictions imposed by a website. 
 785     def __init__(self
, msg
, countries
=None): 
 786         super(GeoRestrictedError
, self
).__init
__(msg
, expected
=True) 
 788         self
.countries 
= countries
 
 791 class DownloadError(YoutubeDLError
): 
 792     """Download Error exception. 
 794     This exception may be thrown by FileDownloader objects if they are not 
 795     configured to continue on errors. They will contain the appropriate 
 799     def __init__(self
, msg
, exc_info
=None): 
 800         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 801         super(DownloadError
, self
).__init
__(msg
) 
 802         self
.exc_info 
= exc_info
 
 805 class SameFileError(YoutubeDLError
): 
 806     """Same File exception. 
 808     This exception will be thrown by FileDownloader objects if they detect 
 809     multiple files would have to be downloaded to the same file on disk. 
 814 class PostProcessingError(YoutubeDLError
): 
 815     """Post Processing exception. 
 817     This exception may be raised by PostProcessor's .run() method to 
 818     indicate an error in the postprocessing task. 
 821     def __init__(self
, msg
): 
 822         super(PostProcessingError
, self
).__init
__(msg
) 
 826 class MaxDownloadsReached(YoutubeDLError
): 
 827     """ --max-downloads limit has been reached. """ 
 831 class UnavailableVideoError(YoutubeDLError
): 
 832     """Unavailable Format exception. 
 834     This exception will be thrown when a video is requested 
 835     in a format that is not available for that video. 
 840 class ContentTooShortError(YoutubeDLError
): 
 841     """Content Too Short exception. 
 843     This exception may be raised by FileDownloader objects when a file they 
 844     download is too small for what the server announced first, indicating 
 845     the connection was probably interrupted. 
 848     def __init__(self
, downloaded
, expected
): 
 849         super(ContentTooShortError
, self
).__init
__( 
 850             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded
, expected
) 
 853         self
.downloaded 
= downloaded
 
 854         self
.expected 
= expected
 
 857 class XAttrMetadataError(YoutubeDLError
): 
 858     def __init__(self
, code
=None, msg
='Unknown error'): 
 859         super(XAttrMetadataError
, self
).__init
__(msg
) 
 863         # Parsing code and msg 
 864         if (self
.code 
in (errno
.ENOSPC
, errno
.EDQUOT
) or 
 865                 'No space left' in self
.msg 
or 'Disk quota excedded' in self
.msg
): 
 866             self
.reason 
= 'NO_SPACE' 
 867         elif self
.code 
== errno
.E2BIG 
or 'Argument list too long' in self
.msg
: 
 868             self
.reason 
= 'VALUE_TOO_LONG' 
 870             self
.reason 
= 'NOT_SUPPORTED' 
 873 class XAttrUnavailableError(YoutubeDLError
): 
 877 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 878     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 
 879     # expected HTTP responses to meet HTTP/1.0 or later (see also 
 880     # https://github.com/rg3/youtube-dl/issues/6727) 
 881     if sys
.version_info 
< (3, 0): 
 882         kwargs
['strict'] = True 
 883     hc 
= http_class(*args
, **compat_kwargs(kwargs
)) 
 884     source_address 
= ydl_handler
._params
.get('source_address') 
 886     if source_address 
is not None: 
 887         # This is to workaround _create_connection() from socket where it will try all 
 888         # address data from getaddrinfo() including IPv6. This filters the result from 
 889         # getaddrinfo() based on the source_address value. 
 890         # This is based on the cpython socket.create_connection() function. 
 891         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691 
 892         def _create_connection(address
, timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
, source_address
=None): 
 895             addrs 
= socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
) 
 896             af 
= socket
.AF_INET 
if '.' in source_address
[0] else socket
.AF_INET6
 
 897             ip_addrs 
= [addr 
for addr 
in addrs 
if addr
[0] == af
] 
 898             if addrs 
and not ip_addrs
: 
 899                 ip_version 
= 'v4' if af 
== socket
.AF_INET 
else 'v6' 
 901                     "No remote IP%s addresses available for connect, can't use '%s' as source address" 
 902                     % (ip_version
, source_address
[0])) 
 904                 af
, socktype
, proto
, canonname
, sa 
= res
 
 907                     sock 
= socket
.socket(af
, socktype
, proto
) 
 908                     if timeout 
is not socket
._GLOBAL
_DEFAULT
_TIMEOUT
: 
 909                         sock
.settimeout(timeout
) 
 910                     sock
.bind(source_address
) 
 912                     err 
= None  # Explicitly break reference cycle 
 914                 except socket
.error 
as _
: 
 921                 raise socket
.error('getaddrinfo returns an empty list') 
 922         if hasattr(hc
, '_create_connection'): 
 923             hc
._create
_connection 
= _create_connection
 
 924         sa 
= (source_address
, 0) 
 925         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 926             hc
.source_address 
= sa
 
 928             def _hc_connect(self
, *args
, **kwargs
): 
 929                 sock 
= _create_connection( 
 930                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 932                     self
.sock 
= ssl
.wrap_socket( 
 933                         sock
, self
.key_file
, self
.cert_file
, 
 934                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 937             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 942 def handle_youtubedl_headers(headers
): 
 943     filtered_headers 
= headers
 
 945     if 'Youtubedl-no-compression' in filtered_headers
: 
 946         filtered_headers 
= dict((k
, v
) for k
, v 
in filtered_headers
.items() if k
.lower() != 'accept-encoding') 
 947         del filtered_headers
['Youtubedl-no-compression'] 
 949     return filtered_headers
 
 952 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 953     """Handler for HTTP requests and responses. 
 955     This class, when installed with an OpenerDirector, automatically adds 
 956     the standard headers to every HTTP request and handles gzipped and 
 957     deflated responses from web servers. If compression is to be avoided in 
 958     a particular request, the original request in the program code only has 
 959     to include the HTTP header "Youtubedl-no-compression", which will be 
 960     removed before making the real request. 
 962     Part of this code was copied from: 
 964     http://techknack.net/python-urllib2-handlers/ 
 966     Andrew Rowls, the author of that code, agreed to release it to the 
 970     def __init__(self
, params
, *args
, **kwargs
): 
 971         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 972         self
._params 
= params
 
 974     def http_open(self
, req
): 
 975         conn_class 
= compat_http_client
.HTTPConnection
 
 977         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
 979             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
 980             del req
.headers
['Ytdl-socks-proxy'] 
 982         return self
.do_open(functools
.partial( 
 983             _create_http_connection
, self
, conn_class
, False), 
 989             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 991             return zlib
.decompress(data
) 
 993     def http_request(self
, req
): 
 994         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 
 995         # always respected by websites, some tend to give out URLs with non percent-encoded 
 996         # non-ASCII characters (see telemb.py, ard.py [#3412]) 
 997         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 
 998         # To work around aforementioned issue we will replace request's original URL with 
 999         # percent-encoded one 
1000         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 
1001         # the code of this workaround has been moved here from YoutubeDL.urlopen() 
1002         url 
= req
.get_full_url() 
1003         url_escaped 
= escape_url(url
) 
1005         # Substitute URL if any change after escaping 
1006         if url 
!= url_escaped
: 
1007             req 
= update_Request(req
, url
=url_escaped
) 
1009         for h
, v 
in std_headers
.items(): 
1010             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
1011             # The dict keys are capitalized because of this bug by urllib 
1012             if h
.capitalize() not in req
.headers
: 
1013                 req
.add_header(h
, v
) 
1015         req
.headers 
= handle_youtubedl_headers(req
.headers
) 
1017         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
1018             # Python 2.6 is brain-dead when it comes to fragments 
1019             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
1020             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
1024     def http_response(self
, req
, resp
): 
1027         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
1028             content 
= resp
.read() 
1029             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
1031                 uncompressed 
= io
.BytesIO(gz
.read()) 
1032             except IOError as original_ioerror
: 
1033                 # There may be junk add the end of the file 
1034                 # See http://stackoverflow.com/q/4928560/35070 for details 
1035                 for i 
in range(1, 1024): 
1037                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
1038                         uncompressed 
= io
.BytesIO(gz
.read()) 
1043                     raise original_ioerror
 
1044             resp 
= compat_urllib_request
.addinfourl(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
1045             resp
.msg 
= old_resp
.msg
 
1046             del resp
.headers
['Content-encoding'] 
1048         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
1049             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
1050             resp 
= compat_urllib_request
.addinfourl(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
1051             resp
.msg 
= old_resp
.msg
 
1052             del resp
.headers
['Content-encoding'] 
1053         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 
1054         # https://github.com/rg3/youtube-dl/issues/6457). 
1055         if 300 <= resp
.code 
< 400: 
1056             location 
= resp
.headers
.get('Location') 
1058                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 
1059                 if sys
.version_info 
>= (3, 0): 
1060                     location 
= location
.encode('iso-8859-1').decode('utf-8') 
1062                     location 
= location
.decode('utf-8') 
1063                 location_escaped 
= escape_url(location
) 
1064                 if location 
!= location_escaped
: 
1065                     del resp
.headers
['Location'] 
1066                     if sys
.version_info 
< (3, 0): 
1067                         location_escaped 
= location_escaped
.encode('utf-8') 
1068                     resp
.headers
['Location'] = location_escaped
 
1071     https_request 
= http_request
 
1072     https_response 
= http_response
 
1075 def make_socks_conn_class(base_class
, socks_proxy
): 
1076     assert issubclass(base_class
, ( 
1077         compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
)) 
1079     url_components 
= compat_urlparse
.urlparse(socks_proxy
) 
1080     if url_components
.scheme
.lower() == 'socks5': 
1081         socks_type 
= ProxyType
.SOCKS5
 
1082     elif url_components
.scheme
.lower() in ('socks', 'socks4'): 
1083         socks_type 
= ProxyType
.SOCKS4
 
1084     elif url_components
.scheme
.lower() == 'socks4a': 
1085         socks_type 
= ProxyType
.SOCKS4A
 
1087     def unquote_if_non_empty(s
): 
1090         return compat_urllib_parse_unquote_plus(s
) 
1094         url_components
.hostname
, url_components
.port 
or 1080, 
1096         unquote_if_non_empty(url_components
.username
), 
1097         unquote_if_non_empty(url_components
.password
), 
1100     class SocksConnection(base_class
): 
1102             self
.sock 
= sockssocket() 
1103             self
.sock
.setproxy(*proxy_args
) 
1104             if type(self
.timeout
) in (int, float): 
1105                 self
.sock
.settimeout(self
.timeout
) 
1106             self
.sock
.connect((self
.host
, self
.port
)) 
1108             if isinstance(self
, compat_http_client
.HTTPSConnection
): 
1109                 if hasattr(self
, '_context'):  # Python > 2.6 
1110                     self
.sock 
= self
._context
.wrap_socket( 
1111                         self
.sock
, server_hostname
=self
.host
) 
1113                     self
.sock 
= ssl
.wrap_socket(self
.sock
) 
1115     return SocksConnection
 
1118 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
1119     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
1120         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
1121         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
1122         self
._params 
= params
 
1124     def https_open(self
, req
): 
1126         conn_class 
= self
._https
_conn
_class
 
1128         if hasattr(self
, '_context'):  # python > 2.6 
1129             kwargs
['context'] = self
._context
 
1130         if hasattr(self
, '_check_hostname'):  # python 3.x 
1131             kwargs
['check_hostname'] = self
._check
_hostname
 
1133         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
1135             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
1136             del req
.headers
['Ytdl-socks-proxy'] 
1138         return self
.do_open(functools
.partial( 
1139             _create_http_connection
, self
, conn_class
, True), 
1143 class YoutubeDLCookieJar(compat_cookiejar
.MozillaCookieJar
): 
1144     def save(self
, filename
=None, ignore_discard
=False, ignore_expires
=False): 
1145         # Store session cookies with `expires` set to 0 instead of an empty 
1148             if cookie
.expires 
is None: 
1150         compat_cookiejar
.MozillaCookieJar
.save(self
, filename
, ignore_discard
, ignore_expires
) 
1152     def load(self
, filename
=None, ignore_discard
=False, ignore_expires
=False): 
1153         compat_cookiejar
.MozillaCookieJar
.load(self
, filename
, ignore_discard
, ignore_expires
) 
1154         # Session cookies are denoted by either `expires` field set to 
1155         # an empty string or 0. MozillaCookieJar only recognizes the former 
1156         # (see [1]). So we need force the latter to be recognized as session 
1157         # cookies on our own. 
1158         # Session cookies may be important for cookies-based authentication, 
1159         # e.g. usually, when user does not check 'Remember me' check box while 
1160         # logging in on a site, some important cookies are stored as session 
1161         # cookies so that not recognizing them will result in failed login. 
1162         # 1. https://bugs.python.org/issue17164 
1164             # Treat `expires=0` cookies as session cookies 
1165             if cookie
.expires 
== 0: 
1166                 cookie
.expires 
= None 
1167                 cookie
.discard 
= True 
1170 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
): 
1171     def __init__(self
, cookiejar
=None): 
1172         compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
) 
1174     def http_response(self
, request
, response
): 
1175         # Python 2 will choke on next HTTP request in row if there are non-ASCII 
1176         # characters in Set-Cookie HTTP header of last response (see 
1177         # https://github.com/rg3/youtube-dl/issues/6769). 
1178         # In order to at least prevent crashing we will percent encode Set-Cookie 
1179         # header before HTTPCookieProcessor starts processing it. 
1180         # if sys.version_info < (3, 0) and response.headers: 
1181         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 
1182         #         set_cookie = response.headers.get(set_cookie_header) 
1184         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 
1185         #             if set_cookie != set_cookie_escaped: 
1186         #                 del response.headers[set_cookie_header] 
1187         #                 response.headers[set_cookie_header] = set_cookie_escaped 
1188         return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
) 
1190     https_request 
= compat_urllib_request
.HTTPCookieProcessor
.http_request
 
1191     https_response 
= http_response
 
1194 def extract_timezone(date_str
): 
1196         r
'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
1199         timezone 
= datetime
.timedelta() 
1201         date_str 
= date_str
[:-len(m
.group('tz'))] 
1202         if not m
.group('sign'): 
1203             timezone 
= datetime
.timedelta() 
1205             sign 
= 1 if m
.group('sign') == '+' else -1 
1206             timezone 
= datetime
.timedelta( 
1207                 hours
=sign 
* int(m
.group('hours')), 
1208                 minutes
=sign 
* int(m
.group('minutes'))) 
1209     return timezone
, date_str
 
1212 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
1213     """ Return a UNIX timestamp from the given date """ 
1215     if date_str 
is None: 
1218     date_str 
= re
.sub(r
'\.[0-9]+', '', date_str
) 
1220     if timezone 
is None: 
1221         timezone
, date_str 
= extract_timezone(date_str
) 
1224         date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
1225         dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
1226         return calendar
.timegm(dt
.timetuple()) 
1231 def date_formats(day_first
=True): 
1232     return DATE_FORMATS_DAY_FIRST 
if day_first 
else DATE_FORMATS_MONTH_FIRST
 
1235 def unified_strdate(date_str
, day_first
=True): 
1236     """Return a string with the date in the format YYYYMMDD""" 
1238     if date_str 
is None: 
1242     date_str 
= date_str
.replace(',', ' ') 
1243     # Remove AM/PM + timezone 
1244     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1245     _
, date_str 
= extract_timezone(date_str
) 
1247     for expression 
in date_formats(day_first
): 
1249             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
1252     if upload_date 
is None: 
1253         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1256                 upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
1259     if upload_date 
is not None: 
1260         return compat_str(upload_date
) 
1263 def unified_timestamp(date_str
, day_first
=True): 
1264     if date_str 
is None: 
1267     date_str 
= re
.sub(r
'[,|]', '', date_str
) 
1269     pm_delta 
= 12 if re
.search(r
'(?i)PM', date_str
) else 0 
1270     timezone
, date_str 
= extract_timezone(date_str
) 
1272     # Remove AM/PM + timezone 
1273     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1275     # Remove unrecognized timezones from ISO 8601 alike timestamps 
1276     m 
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
) 
1278         date_str 
= date_str
[:-len(m
.group('tz'))] 
1280     # Python only supports microseconds, so remove nanoseconds 
1281     m 
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
) 
1283         date_str 
= m
.group(1) 
1285     for expression 
in date_formats(day_first
): 
1287             dt 
= datetime
.datetime
.strptime(date_str
, expression
) - timezone 
+ datetime
.timedelta(hours
=pm_delta
) 
1288             return calendar
.timegm(dt
.timetuple()) 
1291     timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1293         return calendar
.timegm(timetuple
) + pm_delta 
* 3600 
1296 def determine_ext(url
, default_ext
='unknown_video'): 
1297     if url 
is None or '.' not in url
: 
1299     guess 
= url
.partition('?')[0].rpartition('.')[2] 
1300     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
1302     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 
1303     elif guess
.rstrip('/') in KNOWN_EXTENSIONS
: 
1304         return guess
.rstrip('/') 
1309 def subtitles_filename(filename
, sub_lang
, sub_format
): 
1310     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
1313 def date_from_str(date_str
): 
1315     Return a datetime object from a string in the format YYYYMMDD or 
1316     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
1317     today 
= datetime
.date
.today() 
1318     if date_str 
in ('now', 'today'): 
1320     if date_str 
== 'yesterday': 
1321         return today 
- datetime
.timedelta(days
=1) 
1322     match 
= re
.match(r
'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
1323     if match 
is not None: 
1324         sign 
= match
.group('sign') 
1325         time 
= int(match
.group('time')) 
1328         unit 
= match
.group('unit') 
1329         # A bad approximation? 
1333         elif unit 
== 'year': 
1337         delta 
= datetime
.timedelta(**{unit
: time
}) 
1338         return today 
+ delta
 
1339     return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date() 
1342 def hyphenate_date(date_str
): 
1344     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
1345     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
1346     if match 
is not None: 
1347         return '-'.join(match
.groups()) 
1352 class DateRange(object): 
1353     """Represents a time interval between two dates""" 
1355     def __init__(self
, start
=None, end
=None): 
1356         """start and end must be strings in the format accepted by date""" 
1357         if start 
is not None: 
1358             self
.start 
= date_from_str(start
) 
1360             self
.start 
= datetime
.datetime
.min.date() 
1362             self
.end 
= date_from_str(end
) 
1364             self
.end 
= datetime
.datetime
.max.date() 
1365         if self
.start 
> self
.end
: 
1366             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
1370         """Returns a range that only contains the given day""" 
1371         return cls(day
, day
) 
1373     def __contains__(self
, date
): 
1374         """Check if the date is in the range""" 
1375         if not isinstance(date
, datetime
.date
): 
1376             date 
= date_from_str(date
) 
1377         return self
.start 
<= date 
<= self
.end
 
1380         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
1383 def platform_name(): 
1384     """ Returns the platform name as a compat_str """ 
1385     res 
= platform
.platform() 
1386     if isinstance(res
, bytes): 
1387         res 
= res
.decode(preferredencoding()) 
1389     assert isinstance(res
, compat_str
) 
1393 def _windows_write_string(s
, out
): 
1394     """ Returns True if the string was written using special methods, 
1395     False if it has yet to be written out.""" 
1396     # Adapted from http://stackoverflow.com/a/3259271/35070 
1399     import ctypes
.wintypes
 
1407         fileno 
= out
.fileno() 
1408     except AttributeError: 
1409         # If the output stream doesn't have a fileno, it's virtual 
1411     except io
.UnsupportedOperation
: 
1412         # Some strange Windows pseudo files? 
1414     if fileno 
not in WIN_OUTPUT_IDS
: 
1417     GetStdHandle 
= compat_ctypes_WINFUNCTYPE( 
1418         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
1419         ('GetStdHandle', ctypes
.windll
.kernel32
)) 
1420     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
1422     WriteConsoleW 
= compat_ctypes_WINFUNCTYPE( 
1423         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
1424         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
1425         ctypes
.wintypes
.LPVOID
)(('WriteConsoleW', ctypes
.windll
.kernel32
)) 
1426     written 
= ctypes
.wintypes
.DWORD(0) 
1428     GetFileType 
= compat_ctypes_WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)(('GetFileType', ctypes
.windll
.kernel32
)) 
1429     FILE_TYPE_CHAR 
= 0x0002 
1430     FILE_TYPE_REMOTE 
= 0x8000 
1431     GetConsoleMode 
= compat_ctypes_WINFUNCTYPE( 
1432         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
1433         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
1434         ('GetConsoleMode', ctypes
.windll
.kernel32
)) 
1435     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
1437     def not_a_console(handle
): 
1438         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
1440         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR 
or 
1441                 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
1443     if not_a_console(h
): 
1446     def next_nonbmp_pos(s
): 
1448             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
1449         except StopIteration: 
1453         count 
= min(next_nonbmp_pos(s
), 1024) 
1455         ret 
= WriteConsoleW( 
1456             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
1458             raise OSError('Failed to write string') 
1459         if not count
:  # We just wrote a non-BMP character 
1460             assert written
.value 
== 2 
1463             assert written
.value 
> 0 
1464             s 
= s
[written
.value
:] 
1468 def write_string(s
, out
=None, encoding
=None): 
1471     assert type(s
) == compat_str
 
1473     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
1474         if _windows_write_string(s
, out
): 
1477     if ('b' in getattr(out
, 'mode', '') or 
1478             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
1479         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
1481     elif hasattr(out
, 'buffer'): 
1482         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
1483         byt 
= s
.encode(enc
, 'ignore') 
1484         out
.buffer.write(byt
) 
1490 def bytes_to_intlist(bs
): 
1493     if isinstance(bs
[0], int):  # Python 3 
1496         return [ord(c
) for c 
in bs
] 
1499 def intlist_to_bytes(xs
): 
1502     return compat_struct_pack('%dB' % len(xs
), *xs
) 
1505 # Cross-platform file locking 
1506 if sys
.platform 
== 'win32': 
1507     import ctypes
.wintypes
 
1510     class OVERLAPPED(ctypes
.Structure
): 
1512             ('Internal', ctypes
.wintypes
.LPVOID
), 
1513             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
1514             ('Offset', ctypes
.wintypes
.DWORD
), 
1515             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
1516             ('hEvent', ctypes
.wintypes
.HANDLE
), 
1519     kernel32 
= ctypes
.windll
.kernel32
 
1520     LockFileEx 
= kernel32
.LockFileEx
 
1521     LockFileEx
.argtypes 
= [ 
1522         ctypes
.wintypes
.HANDLE
,     # hFile 
1523         ctypes
.wintypes
.DWORD
,      # dwFlags 
1524         ctypes
.wintypes
.DWORD
,      # dwReserved 
1525         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1526         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1527         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1529     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1530     UnlockFileEx 
= kernel32
.UnlockFileEx
 
1531     UnlockFileEx
.argtypes 
= [ 
1532         ctypes
.wintypes
.HANDLE
,     # hFile 
1533         ctypes
.wintypes
.DWORD
,      # dwReserved 
1534         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1535         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1536         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1538     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1539     whole_low 
= 0xffffffff 
1540     whole_high 
= 0x7fffffff 
1542     def _lock_file(f
, exclusive
): 
1543         overlapped 
= OVERLAPPED() 
1544         overlapped
.Offset 
= 0 
1545         overlapped
.OffsetHigh 
= 0 
1546         overlapped
.hEvent 
= 0 
1547         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1548         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1549         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1550                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1551             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1553     def _unlock_file(f
): 
1554         assert f
._lock
_file
_overlapped
_p
 
1555         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1556         if not UnlockFileEx(handle
, 0, 
1557                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1558             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1561     # Some platforms, such as Jython, is missing fcntl 
1565         def _lock_file(f
, exclusive
): 
1566             fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1568         def _unlock_file(f
): 
1569             fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1571         UNSUPPORTED_MSG 
= 'file locking is not supported on this platform' 
1573         def _lock_file(f
, exclusive
): 
1574             raise IOError(UNSUPPORTED_MSG
) 
1576         def _unlock_file(f
): 
1577             raise IOError(UNSUPPORTED_MSG
) 
1580 class locked_file(object): 
1581     def __init__(self
, filename
, mode
, encoding
=None): 
1582         assert mode 
in ['r', 'a', 'w'] 
1583         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1586     def __enter__(self
): 
1587         exclusive 
= self
.mode 
!= 'r' 
1589             _lock_file(self
.f
, exclusive
) 
1595     def __exit__(self
, etype
, value
, traceback
): 
1597             _unlock_file(self
.f
) 
1604     def write(self
, *args
): 
1605         return self
.f
.write(*args
) 
1607     def read(self
, *args
): 
1608         return self
.f
.read(*args
) 
1611 def get_filesystem_encoding(): 
1612     encoding 
= sys
.getfilesystemencoding() 
1613     return encoding 
if encoding 
is not None else 'utf-8' 
1616 def shell_quote(args
): 
1618     encoding 
= get_filesystem_encoding() 
1620         if isinstance(a
, bytes): 
1621             # We may get a filename encoded with 'encodeFilename' 
1622             a 
= a
.decode(encoding
) 
1623         quoted_args
.append(compat_shlex_quote(a
)) 
1624     return ' '.join(quoted_args
) 
1627 def smuggle_url(url
, data
): 
1628     """ Pass additional data in a URL for internal use. """ 
1630     url
, idata 
= unsmuggle_url(url
, {}) 
1632     sdata 
= compat_urllib_parse_urlencode( 
1633         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1634     return url 
+ '#' + sdata
 
1637 def unsmuggle_url(smug_url
, default
=None): 
1638     if '#__youtubedl_smuggle' not in smug_url
: 
1639         return smug_url
, default
 
1640     url
, _
, sdata 
= smug_url
.rpartition('#') 
1641     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1642     data 
= json
.loads(jsond
) 
1646 def format_bytes(bytes): 
1649     if type(bytes) is str: 
1650         bytes = float(bytes) 
1654         exponent 
= int(math
.log(bytes, 1024.0)) 
1655     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1656     converted 
= float(bytes) / float(1024 ** exponent
) 
1657     return '%.2f%s' % (converted
, suffix
) 
1660 def lookup_unit_table(unit_table
, s
): 
1661     units_re 
= '|'.join(re
.escape(u
) for u 
in unit_table
) 
1663         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
) 
1666     num_str 
= m
.group('num').replace(',', '.') 
1667     mult 
= unit_table
[m
.group('unit')] 
1668     return int(float(num_str
) * mult
) 
1671 def parse_filesize(s
): 
1675     # The lower-case forms are of course incorrect and unofficial, 
1676     # but we support those too 
1693         'megabytes': 1000 ** 2, 
1694         'mebibytes': 1024 ** 2, 
1700         'gigabytes': 1000 ** 3, 
1701         'gibibytes': 1024 ** 3, 
1707         'terabytes': 1000 ** 4, 
1708         'tebibytes': 1024 ** 4, 
1714         'petabytes': 1000 ** 5, 
1715         'pebibytes': 1024 ** 5, 
1721         'exabytes': 1000 ** 6, 
1722         'exbibytes': 1024 ** 6, 
1728         'zettabytes': 1000 ** 7, 
1729         'zebibytes': 1024 ** 7, 
1735         'yottabytes': 1000 ** 8, 
1736         'yobibytes': 1024 ** 8, 
1739     return lookup_unit_table(_UNIT_TABLE
, s
) 
1748     if re
.match(r
'^[\d,.]+$', s
): 
1749         return str_to_int(s
) 
1760     return lookup_unit_table(_UNIT_TABLE
, s
) 
1763 def parse_resolution(s
): 
1767     mobj 
= re
.search(r
'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s
) 
1770             'width': int(mobj
.group('w')), 
1771             'height': int(mobj
.group('h')), 
1774     mobj 
= re
.search(r
'\b(\d+)[pPiI]\b', s
) 
1776         return {'height': int(mobj
.group(1))} 
1778     mobj 
= re
.search(r
'\b([48])[kK]\b', s
) 
1780         return {'height': int(mobj
.group(1)) * 540} 
1785 def month_by_name(name
, lang
='en'): 
1786     """ Return the number of a month by (locale-independently) English name """ 
1788     month_names 
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en']) 
1791         return month_names
.index(name
) + 1 
1796 def month_by_abbreviation(abbrev
): 
1797     """ Return the number of a month by (locale-independently) English 
1801         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
1806 def fix_xml_ampersands(xml_str
): 
1807     """Replace all the '&' by '&' in XML""" 
1809         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1814 def setproctitle(title
): 
1815     assert isinstance(title
, compat_str
) 
1817     # ctypes in Jython is not complete 
1818     # http://bugs.jython.org/issue2148 
1819     if sys
.platform
.startswith('java'): 
1823         libc 
= ctypes
.cdll
.LoadLibrary('libc.so.6') 
1827         # LoadLibrary in Windows Python 2.7.13 only expects 
1828         # a bytestring, but since unicode_literals turns 
1829         # every string into a unicode string, it fails. 
1831     title_bytes 
= title
.encode('utf-8') 
1832     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1833     buf
.value 
= title_bytes
 
1835         libc
.prctl(15, buf
, 0, 0, 0) 
1836     except AttributeError: 
1837         return  # Strange libc, just skip this 
1840 def remove_start(s
, start
): 
1841     return s
[len(start
):] if s 
is not None and s
.startswith(start
) else s
 
1844 def remove_end(s
, end
): 
1845     return s
[:-len(end
)] if s 
is not None and s
.endswith(end
) else s
 
1848 def remove_quotes(s
): 
1849     if s 
is None or len(s
) < 2: 
1851     for quote 
in ('"', "'", ): 
1852         if s
[0] == quote 
and s
[-1] == quote
: 
1857 def url_basename(url
): 
1858     path 
= compat_urlparse
.urlparse(url
).path
 
1859     return path
.strip('/').split('/')[-1] 
1863     return re
.match(r
'https?://[^?#&]+/', url
).group() 
1866 def urljoin(base
, path
): 
1867     if isinstance(path
, bytes): 
1868         path 
= path
.decode('utf-8') 
1869     if not isinstance(path
, compat_str
) or not path
: 
1871     if re
.match(r
'^(?:https?:)?//', path
): 
1873     if isinstance(base
, bytes): 
1874         base 
= base
.decode('utf-8') 
1875     if not isinstance(base
, compat_str
) or not re
.match( 
1876             r
'^(?:https?:)?//', base
): 
1878     return compat_urlparse
.urljoin(base
, path
) 
1881 class HEADRequest(compat_urllib_request
.Request
): 
1882     def get_method(self
): 
1886 class PUTRequest(compat_urllib_request
.Request
): 
1887     def get_method(self
): 
1891 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1894             v 
= getattr(v
, get_attr
, None) 
1900         return int(v
) * invscale 
// scale
 
1905 def str_or_none(v
, default
=None): 
1906     return default 
if v 
is None else compat_str(v
) 
1909 def str_to_int(int_str
): 
1910     """ A more relaxed version of int_or_none """ 
1913     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1917 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1921         return float(v
) * invscale 
/ scale
 
1926 def bool_or_none(v
, default
=None): 
1927     return v 
if isinstance(v
, bool) else default
 
1930 def strip_or_none(v
): 
1931     return None if v 
is None else v
.strip() 
1934 def url_or_none(url
): 
1935     if not url 
or not isinstance(url
, compat_str
): 
1938     return url 
if re
.match(r
'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url
) else None 
1941 def parse_duration(s
): 
1942     if not isinstance(s
, compat_basestring
): 
1947     days
, hours
, mins
, secs
, ms 
= [None] * 5 
1948     m 
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s
) 
1950         days
, hours
, mins
, secs
, ms 
= m
.groups() 
1955                     [0-9]+\s*y(?:ears?)?\s* 
1958                     [0-9]+\s*m(?:onths?)?\s* 
1961                     [0-9]+\s*w(?:eeks?)?\s* 
1964                     (?P<days>[0-9]+)\s*d(?:ays?)?\s* 
1968                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 
1971                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 
1974                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 
1977             days
, hours
, mins
, secs
, ms 
= m
.groups() 
1979             m 
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
) 
1981                 hours
, mins 
= m
.groups() 
1987         duration 
+= float(secs
) 
1989         duration 
+= float(mins
) * 60 
1991         duration 
+= float(hours
) * 60 * 60 
1993         duration 
+= float(days
) * 24 * 60 * 60 
1995         duration 
+= float(ms
) 
1999 def prepend_extension(filename
, ext
, expected_real_ext
=None): 
2000     name
, real_ext 
= os
.path
.splitext(filename
) 
2002         '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
2003         if not expected_real_ext 
or real_ext
[1:] == expected_real_ext
 
2004         else '{0}.{1}'.format(filename
, ext
)) 
2007 def replace_extension(filename
, ext
, expected_real_ext
=None): 
2008     name
, real_ext 
= os
.path
.splitext(filename
) 
2009     return '{0}.{1}'.format( 
2010         name 
if not expected_real_ext 
or real_ext
[1:] == expected_real_ext 
else filename
, 
2014 def check_executable(exe
, args
=[]): 
2015     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
2016     args can be a list of arguments for a short output (like -version) """ 
2018         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
2024 def get_exe_version(exe
, args
=['--version'], 
2025                     version_re
=None, unrecognized
='present'): 
2026     """ Returns the version of the specified executable, 
2027     or False if the executable is not present """ 
2029         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers 
2030         # SIGTTOU if youtube-dl is run in the background. 
2031         # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656 
2032         out
, _ 
= subprocess
.Popen( 
2033             [encodeArgument(exe
)] + args
, 
2034             stdin
=subprocess
.PIPE
, 
2035             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
2038     if isinstance(out
, bytes):  # Python 2.x 
2039         out 
= out
.decode('ascii', 'ignore') 
2040     return detect_exe_version(out
, version_re
, unrecognized
) 
2043 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
2044     assert isinstance(output
, compat_str
) 
2045     if version_re 
is None: 
2046         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
2047     m 
= re
.search(version_re
, output
) 
2054 class PagedList(object): 
2056         # This is only useful for tests 
2057         return len(self
.getslice()) 
2060 class OnDemandPagedList(PagedList
): 
2061     def __init__(self
, pagefunc
, pagesize
, use_cache
=True): 
2062         self
._pagefunc 
= pagefunc
 
2063         self
._pagesize 
= pagesize
 
2064         self
._use
_cache 
= use_cache
 
2068     def getslice(self
, start
=0, end
=None): 
2070         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
2071             firstid 
= pagenum 
* self
._pagesize
 
2072             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
2073             if start 
>= nextfirstid
: 
2078                 page_results 
= self
._cache
.get(pagenum
) 
2079             if page_results 
is None: 
2080                 page_results 
= list(self
._pagefunc
(pagenum
)) 
2082                 self
._cache
[pagenum
] = page_results
 
2085                 start 
% self
._pagesize
 
2086                 if firstid 
<= start 
< nextfirstid
 
2090                 ((end 
- 1) % self
._pagesize
) + 1 
2091                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
2094             if startv 
!= 0 or endv 
is not None: 
2095                 page_results 
= page_results
[startv
:endv
] 
2096             res
.extend(page_results
) 
2098             # A little optimization - if current page is not "full", ie. does 
2099             # not contain page_size videos then we can assume that this page 
2100             # is the last one - there are no more ids on further pages - 
2101             # i.e. no need to query again. 
2102             if len(page_results
) + startv 
< self
._pagesize
: 
2105             # If we got the whole page, but the next page is not interesting, 
2106             # break out early as well 
2107             if end 
== nextfirstid
: 
2112 class InAdvancePagedList(PagedList
): 
2113     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
2114         self
._pagefunc 
= pagefunc
 
2115         self
._pagecount 
= pagecount
 
2116         self
._pagesize 
= pagesize
 
2118     def getslice(self
, start
=0, end
=None): 
2120         start_page 
= start 
// self
._pagesize
 
2122             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
2123         skip_elems 
= start 
- start_page 
* self
._pagesize
 
2124         only_more 
= None if end 
is None else end 
- start
 
2125         for pagenum 
in range(start_page
, end_page
): 
2126             page 
= list(self
._pagefunc
(pagenum
)) 
2128                 page 
= page
[skip_elems
:] 
2130             if only_more 
is not None: 
2131                 if len(page
) < only_more
: 
2132                     only_more 
-= len(page
) 
2134                     page 
= page
[:only_more
] 
2141 def uppercase_escape(s
): 
2142     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2144         r
'\\U[0-9a-fA-F]{8}', 
2145         lambda m
: unicode_escape(m
.group(0))[0], 
2149 def lowercase_escape(s
): 
2150     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2152         r
'\\u[0-9a-fA-F]{4}', 
2153         lambda m
: unicode_escape(m
.group(0))[0], 
2157 def escape_rfc3986(s
): 
2158     """Escape non-ASCII characters as suggested by RFC 3986""" 
2159     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
2160         s 
= s
.encode('utf-8') 
2161     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
2164 def escape_url(url
): 
2165     """Escape URL as suggested by RFC 3986""" 
2166     url_parsed 
= compat_urllib_parse_urlparse(url
) 
2167     return url_parsed
._replace
( 
2168         netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'), 
2169         path
=escape_rfc3986(url_parsed
.path
), 
2170         params
=escape_rfc3986(url_parsed
.params
), 
2171         query
=escape_rfc3986(url_parsed
.query
), 
2172         fragment
=escape_rfc3986(url_parsed
.fragment
) 
2176 def read_batch_urls(batch_fd
): 
2178         if not isinstance(url
, compat_str
): 
2179             url 
= url
.decode('utf-8', 'replace') 
2180         BOM_UTF8 
= '\xef\xbb\xbf' 
2181         if url
.startswith(BOM_UTF8
): 
2182             url 
= url
[len(BOM_UTF8
):] 
2184         if url
.startswith(('#', ';', ']')): 
2188     with contextlib
.closing(batch_fd
) as fd
: 
2189         return [url 
for url 
in map(fixup
, fd
) if url
] 
2192 def urlencode_postdata(*args
, **kargs
): 
2193     return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii') 
2196 def update_url_query(url
, query
): 
2199     parsed_url 
= compat_urlparse
.urlparse(url
) 
2200     qs 
= compat_parse_qs(parsed_url
.query
) 
2202     return compat_urlparse
.urlunparse(parsed_url
._replace
( 
2203         query
=compat_urllib_parse_urlencode(qs
, True))) 
2206 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}): 
2207     req_headers 
= req
.headers
.copy() 
2208     req_headers
.update(headers
) 
2209     req_data 
= data 
or req
.data
 
2210     req_url 
= update_url_query(url 
or req
.get_full_url(), query
) 
2211     req_get_method 
= req
.get_method() 
2212     if req_get_method 
== 'HEAD': 
2213         req_type 
= HEADRequest
 
2214     elif req_get_method 
== 'PUT': 
2215         req_type 
= PUTRequest
 
2217         req_type 
= compat_urllib_request
.Request
 
2219         req_url
, data
=req_data
, headers
=req_headers
, 
2220         origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
) 
2221     if hasattr(req
, 'timeout'): 
2222         new_req
.timeout 
= req
.timeout
 
2226 def _multipart_encode_impl(data
, boundary
): 
2227     content_type 
= 'multipart/form-data; boundary=%s' % boundary
 
2230     for k
, v 
in data
.items(): 
2231         out 
+= b
'--' + boundary
.encode('ascii') + b
'\r\n' 
2232         if isinstance(k
, compat_str
): 
2233             k 
= k
.encode('utf-8') 
2234         if isinstance(v
, compat_str
): 
2235             v 
= v
.encode('utf-8') 
2236         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 
2237         # suggests sending UTF-8 directly. Firefox sends UTF-8, too 
2238         content 
= b
'Content-Disposition: form-data; name="' + k 
+ b
'"\r\n\r\n' + v 
+ b
'\r\n' 
2239         if boundary
.encode('ascii') in content
: 
2240             raise ValueError('Boundary overlaps with data') 
2243     out 
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n' 
2245     return out
, content_type
 
2248 def multipart_encode(data
, boundary
=None): 
2250     Encode a dict to RFC 7578-compliant form-data 
2253         A dict where keys and values can be either Unicode or bytes-like 
2256         If specified a Unicode object, it's used as the boundary. Otherwise 
2257         a random boundary is generated. 
2259     Reference: https://tools.ietf.org/html/rfc7578 
2261     has_specified_boundary 
= boundary 
is not None 
2264         if boundary 
is None: 
2265             boundary 
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff)) 
2268             out
, content_type 
= _multipart_encode_impl(data
, boundary
) 
2271             if has_specified_boundary
: 
2275     return out
, content_type
 
2278 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True): 
2279     if isinstance(key_or_keys
, (list, tuple)): 
2280         for key 
in key_or_keys
: 
2281             if key 
not in d 
or d
[key
] is None or skip_false_values 
and not d
[key
]: 
2285     return d
.get(key_or_keys
, default
) 
2288 def try_get(src
, getter
, expected_type
=None): 
2289     if not isinstance(getter
, (list, tuple)): 
2294         except (AttributeError, KeyError, TypeError, IndexError): 
2297             if expected_type 
is None or isinstance(v
, expected_type
): 
2301 def merge_dicts(*dicts
): 
2303     for a_dict 
in dicts
: 
2304         for k
, v 
in a_dict
.items(): 
2307             if (k 
not in merged 
or 
2308                     (isinstance(v
, compat_str
) and v 
and 
2309                         isinstance(merged
[k
], compat_str
) and 
2315 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'): 
2316     return string 
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
) 
2328 TV_PARENTAL_GUIDELINES 
= { 
2338 def parse_age_limit(s
): 
2340         return s 
if 0 <= s 
<= 21 else None 
2341     if not isinstance(s
, compat_basestring
): 
2343     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
2345         return int(m
.group('age')) 
2347         return US_RATINGS
[s
] 
2348     m 
= re
.match(r
'^TV[_-]?(%s)$' % '|'.join(k
[3:] for k 
in TV_PARENTAL_GUIDELINES
), s
) 
2350         return TV_PARENTAL_GUIDELINES
['TV-' + m
.group(1)] 
2354 def strip_jsonp(code
): 
2357             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*) 
2358             (?:\s*&&\s*(?P=func_name))? 
2359             \s*\(\s*(?P<callback_data>.*)\);? 
2360             \s*?(?://[^\n]*)*$''', 
2361         r
'\g<callback_data>', code
) 
2364 def js_to_json(code
): 
2365     COMMENT_RE 
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*' 
2366     SKIP_RE 
= r
'\s*(?:{comment})?\s*'.format(comment
=COMMENT_RE
) 
2368         (r
'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip
=SKIP_RE
), 16), 
2369         (r
'(?s)^(0+[0-7]+){skip}:?$'.format(skip
=SKIP_RE
), 8), 
2374         if v 
in ('true', 'false', 'null'): 
2376         elif v
.startswith('/*') or v
.startswith('//') or v 
== ',': 
2379         if v
[0] in ("'", '"'): 
2380             v 
= re
.sub(r
'(?s)\\.|"', lambda m
: { 
2385             }.get(m
.group(0), m
.group(0)), v
[1:-1]) 
2387         for regex
, base 
in INTEGER_TABLE
: 
2388             im 
= re
.match(regex
, v
) 
2390                 i 
= int(im
.group(1), base
) 
2391                 return '"%d":' % i 
if v
.endswith(':') else '%d' % i
 
2395     return re
.sub(r
'''(?sx) 
2396         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 
2397         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 
2398         {comment}|,(?={skip}[\]}}])| 
2399         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*| 
2400         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| 
2402         '''.format(comment
=COMMENT_RE
, skip
=SKIP_RE
), fix_kv
, code
) 
2405 def qualities(quality_ids
): 
2406     """ Get a numeric quality value out of a list of possible values """ 
2409             return quality_ids
.index(qid
) 
2415 DEFAULT_OUTTMPL 
= '%(title)s-%(id)s.%(ext)s' 
2418 def limit_length(s
, length
): 
2419     """ Add ellipses to overly long strings """ 
2424         return s
[:length 
- len(ELLIPSES
)] + ELLIPSES
 
2428 def version_tuple(v
): 
2429     return tuple(int(e
) for e 
in re
.split(r
'[-.]', v
)) 
2432 def is_outdated_version(version
, limit
, assume_new
=True): 
2434         return not assume_new
 
2436         return version_tuple(version
) < version_tuple(limit
) 
2438         return not assume_new
 
2441 def ytdl_is_updateable(): 
2442     """ Returns if youtube-dl can be updated with -U """ 
2443     from zipimport 
import zipimporter
 
2445     return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen') 
2448 def args_to_str(args
): 
2449     # Get a short string representation for a subprocess command 
2450     return ' '.join(compat_shlex_quote(a
) for a 
in args
) 
2453 def error_to_compat_str(err
): 
2455     # On python 2 error byte string must be decoded with proper 
2456     # encoding rather than ascii 
2457     if sys
.version_info
[0] < 3: 
2458         err_str 
= err_str
.decode(preferredencoding()) 
2462 def mimetype2ext(mt
): 
2468         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 
2469         # it's the most popular one 
2470         'audio/mpeg': 'mp3', 
2475     _
, _
, res 
= mt
.rpartition('/') 
2476     res 
= res
.split(';')[0].strip().lower() 
2480         'smptett+xml': 'tt', 
2484         'x-mp4-fragmented': 'mp4', 
2485         'x-ms-sami': 'sami', 
2488         'x-mpegurl': 'm3u8', 
2489         'vnd.apple.mpegurl': 'm3u8', 
2493         'vnd.ms-sstr+xml': 'ism', 
2499 def parse_codecs(codecs_str
): 
2500     # http://tools.ietf.org/html/rfc6381 
2503     splited_codecs 
= list(filter(None, map( 
2504         lambda str: str.strip(), codecs_str
.strip().strip(',').split(',')))) 
2505     vcodec
, acodec 
= None, None 
2506     for full_codec 
in splited_codecs
: 
2507         codec 
= full_codec
.split('.')[0] 
2508         if codec 
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'): 
2511         elif codec 
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): 
2515             write_string('WARNING: Unknown codec %s\n' % full_codec
, sys
.stderr
) 
2516     if not vcodec 
and not acodec
: 
2517         if len(splited_codecs
) == 2: 
2522         elif len(splited_codecs
) == 1: 
2529             'vcodec': vcodec 
or 'none', 
2530             'acodec': acodec 
or 'none', 
2535 def urlhandle_detect_ext(url_handle
): 
2536     getheader 
= url_handle
.headers
.get
 
2538     cd 
= getheader('Content-Disposition') 
2540         m 
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
) 
2542             e 
= determine_ext(m
.group('filename'), default_ext
=None) 
2546     return mimetype2ext(getheader('Content-Type')) 
2549 def encode_data_uri(data
, mime_type
): 
2550     return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii')) 
2553 def age_restricted(content_limit
, age_limit
): 
2554     """ Returns True iff the content should be blocked """ 
2556     if age_limit 
is None:  # No limit set 
2558     if content_limit 
is None: 
2559         return False  # Content available for everyone 
2560     return age_limit 
< content_limit
 
2563 def is_html(first_bytes
): 
2564     """ Detect whether a file contains HTML by examining its first bytes. """ 
2567         (b
'\xef\xbb\xbf', 'utf-8'), 
2568         (b
'\x00\x00\xfe\xff', 'utf-32-be'), 
2569         (b
'\xff\xfe\x00\x00', 'utf-32-le'), 
2570         (b
'\xff\xfe', 'utf-16-le'), 
2571         (b
'\xfe\xff', 'utf-16-be'), 
2573     for bom
, enc 
in BOMS
: 
2574         if first_bytes
.startswith(bom
): 
2575             s 
= first_bytes
[len(bom
):].decode(enc
, 'replace') 
2578         s 
= first_bytes
.decode('utf-8', 'replace') 
2580     return re
.match(r
'^\s*<', s
) 
2583 def determine_protocol(info_dict
): 
2584     protocol 
= info_dict
.get('protocol') 
2585     if protocol 
is not None: 
2588     url 
= info_dict
['url'] 
2589     if url
.startswith('rtmp'): 
2591     elif url
.startswith('mms'): 
2593     elif url
.startswith('rtsp'): 
2596     ext 
= determine_ext(url
) 
2602     return compat_urllib_parse_urlparse(url
).scheme
 
2605 def render_table(header_row
, data
): 
2606     """ Render a list of rows, each as a list of values """ 
2607     table 
= [header_row
] + data
 
2608     max_lens 
= [max(len(compat_str(v
)) for v 
in col
) for col 
in zip(*table
)] 
2609     format_str 
= ' '.join('%-' + compat_str(ml 
+ 1) + 's' for ml 
in max_lens
[:-1]) + '%s' 
2610     return '\n'.join(format_str 
% tuple(row
) for row 
in table
) 
2613 def _match_one(filter_part
, dct
): 
2614     COMPARISON_OPERATORS 
= { 
2622     operator_rex 
= re
.compile(r
'''(?x)\s* 
2624         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
2626             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
2627             (?P<quote>["\'])(?P
<quotedstrval
>(?
:\\.|
(?
!(?P
=quote
)|
\\).)+?
)(?P
=quote
)|
 
2628             (?P
<strval
>(?
![0-9.])[a
-z0
-9A
-Z
]*) 
2631         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) 
2632     m = operator_rex.search(filter_part) 
2634         op = COMPARISON_OPERATORS[m.group('op')] 
2635         actual_value = dct.get(m.group('key')) 
2636         if (m.group('quotedstrval') is not None or 
2637             m.group('strval') is not None or 
2638             # If the original field is a string and matching comparisonvalue is 
2639             # a number we should respect the origin of the original field 
2640             # and process comparison value as a string (see 
2641             # https://github.com/rg3/youtube-dl/issues/11082). 
2642             actual_value is not None and m.group('intval') is not None and 
2643                 isinstance(actual_value, compat_str)): 
2644             if m.group('op') not in ('=', '!='): 
2646                     'Operator %s does not support string values!' % m.group('op')) 
2647             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') 
2648             quote = m.group('quote') 
2649             if quote is not None: 
2650                 comparison_value = comparison_value.replace(r'\%s' % quote, quote) 
2653                 comparison_value = int(m.group('intval')) 
2655                 comparison_value = parse_filesize(m.group('intval')) 
2656                 if comparison_value is None: 
2657                     comparison_value = parse_filesize(m.group('intval') + 'B') 
2658                 if comparison_value is None: 
2660                         'Invalid integer value %r in filter part %r' % ( 
2661                             m.group('intval'), filter_part)) 
2662         if actual_value is None: 
2663             return m.group('none_inclusive') 
2664         return op(actual_value, comparison_value) 
2667         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None), 
2668         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None), 
2670     operator_rex = re.compile(r'''(?x
)\s
* 
2671         (?P
<op
>%s)\s
*(?P
<key
>[a
-z_
]+) 
2673         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) 
2674     m = operator_rex.search(filter_part) 
2676         op = UNARY_OPERATORS[m.group('op')] 
2677         actual_value = dct.get(m.group('key')) 
2678         return op(actual_value) 
2680     raise ValueError('Invalid filter part %r' % filter_part) 
2683 def match_str(filter_str, dct): 
2684     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
2687         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) 
2690 def match_filter_func(filter_str): 
2691     def _match_func(info_dict): 
2692         if match_str(filter_str, info_dict): 
2695             video_title = info_dict.get('title', info_dict.get('id', 'video')) 
2696             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) 
2700 def parse_dfxp_time_expr(time_expr): 
2704     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) 
2706         return float(mobj.group('time_offset')) 
2708     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr) 
2710         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.')) 
2713 def srt_subtitles_timecode(seconds): 
2714     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) 
2717 def dfxp2srt(dfxp_data): 
2719     @param dfxp_data A 
bytes-like 
object containing DFXP data
 
2720     @returns A 
unicode object containing converted SRT data
 
2722     LEGACY_NAMESPACES = ( 
2723         (b'http://www.w3.org/ns/ttml', [ 
2724             b'http://www.w3.org/2004/11/ttaf1', 
2725             b'http://www.w3.org/2006/04/ttaf1', 
2726             b'http://www.w3.org/2006/10/ttaf1', 
2728         (b'http://www.w3.org/ns/ttml#styling', [ 
2729             b'http://www.w3.org/ns/ttml#style', 
2733     SUPPORTED_STYLING = [ 
2742     _x = functools.partial(xpath_with_ns, ns_map={ 
2743         'xml': 'http://www.w3.org/XML/1998/namespace', 
2744         'ttml': 'http://www.w3.org/ns/ttml', 
2745         'tts': 'http://www.w3.org/ns/ttml#styling', 
2751     class TTMLPElementParser(object): 
2753         _unclosed_elements = [] 
2754         _applied_styles = [] 
2756         def start(self, tag, attrib): 
2757             if tag in (_x('ttml:br'), 'br'): 
2760                 unclosed_elements = [] 
2762                 element_style_id = attrib.get('style') 
2764                     style.update(default_style) 
2765                 if element_style_id: 
2766                     style.update(styles.get(element_style_id, {})) 
2767                 for prop in SUPPORTED_STYLING: 
2768                     prop_val = attrib.get(_x('tts:' + prop)) 
2770                         style[prop] = prop_val 
2773                     for k, v in sorted(style.items()): 
2774                         if self._applied_styles and self._applied_styles[-1].get(k) == v: 
2777                             font += ' color="%s"' % v 
2778                         elif k == 'fontSize': 
2779                             font += ' size="%s"' % v 
2780                         elif k == 'fontFamily': 
2781                             font += ' face="%s"' % v 
2782                         elif k == 'fontWeight' and v == 'bold': 
2784                             unclosed_elements.append('b') 
2785                         elif k == 'fontStyle' and v == 'italic': 
2787                             unclosed_elements.append('i') 
2788                         elif k == 'textDecoration' and v == 'underline': 
2790                             unclosed_elements.append('u') 
2792                         self._out += '<font' + font + '>' 
2793                         unclosed_elements.append('font') 
2795                     if self._applied_styles: 
2796                         applied_style.update(self._applied_styles[-1]) 
2797                     applied_style.update(style) 
2798                     self._applied_styles.append(applied_style) 
2799                 self._unclosed_elements.append(unclosed_elements) 
2802             if tag not in (_x('ttml:br'), 'br'): 
2803                 unclosed_elements = self._unclosed_elements.pop() 
2804                 for element in reversed(unclosed_elements): 
2805                     self._out += '</%s>' % element 
2806                 if unclosed_elements and self._applied_styles: 
2807                     self._applied_styles.pop() 
2809         def data(self, data): 
2813             return self._out.strip() 
2815     def parse_node(node): 
2816         target = TTMLPElementParser() 
2817         parser = xml.etree.ElementTree.XMLParser(target=target) 
2818         parser.feed(xml.etree.ElementTree.tostring(node)) 
2819         return parser.close() 
2821     for k, v in LEGACY_NAMESPACES: 
2823             dfxp_data = dfxp_data.replace(ns, k) 
2825     dfxp = compat_etree_fromstring(dfxp_data) 
2827     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') 
2830         raise ValueError('Invalid dfxp/TTML subtitle') 
2834         for style in dfxp.findall(_x('.//ttml:style')): 
2835             style_id = style.get('id') or style.get(_x('xml:id')) 
2838             parent_style_id = style.get('style') 
2840                 if parent_style_id not in styles: 
2843                 styles[style_id] = styles[parent_style_id].copy() 
2844             for prop in SUPPORTED_STYLING: 
2845                 prop_val = style.get(_x('tts:' + prop)) 
2847                     styles.setdefault(style_id, {})[prop] = prop_val 
2853     for p in ('body', 'div'): 
2854         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p]) 
2857         style = styles.get(ele.get('style')) 
2860         default_style.update(style) 
2862     for para, index in zip(paras, itertools.count(1)): 
2863         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) 
2864         end_time = parse_dfxp_time_expr(para.attrib.get('end')) 
2865         dur = parse_dfxp_time_expr(para.attrib.get('dur')) 
2866         if begin_time is None: 
2871             end_time = begin_time + dur 
2872         out.append('%d\n%s --> %s\n%s\n\n' % ( 
2874             srt_subtitles_timecode(begin_time), 
2875             srt_subtitles_timecode(end_time), 
2881 def cli_option(params, command_option, param): 
2882     param = params.get(param) 
2884         param = compat_str(param) 
2885     return [command_option, param] if param is not None else [] 
2888 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): 
2889     param = params.get(param) 
2892     assert isinstance(param, bool) 
2894         return [command_option + separator + (true_value if param else false_value)] 
2895     return [command_option, true_value if param else false_value] 
2898 def cli_valueless_option(params, command_option, param, expected_value=True): 
2899     param = params.get(param) 
2900     return [command_option] if param == expected_value else [] 
2903 def cli_configuration_args(params, param, default=[]): 
2904     ex_args = params.get(param) 
2907     assert isinstance(ex_args, list) 
2911 class ISO639Utils(object): 
2912     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 
2971         'iw': 'heb',  # Replaced by he in 1989 revision 
2981         'in': 'ind',  # Replaced by id in 1989 revision 
3096         'ji': 'yid',  # Replaced by yi in 1989 revision 
3104     def short2long(cls, code): 
3105         """Convert language code from ISO 639-1 to ISO 639-2/T""" 
3106         return cls._lang_map.get(code[:2]) 
3109     def long2short(cls, code): 
3110         """Convert language code from ISO 639-2/T to ISO 639-1""" 
3111         for short_name, long_name in cls._lang_map.items(): 
3112             if long_name == code: 
3116 class ISO3166Utils(object): 
3117     # From http://data.okfn.org/data/core/country-list 
3119         'AF': 'Afghanistan', 
3120         'AX': 'Åland Islands', 
3123         'AS': 'American Samoa', 
3128         'AG': 'Antigua and Barbuda', 
3145         'BO': 'Bolivia, Plurinational State of', 
3146         'BQ': 'Bonaire, Sint Eustatius and Saba', 
3147         'BA': 'Bosnia and Herzegovina', 
3149         'BV': 'Bouvet Island', 
3151         'IO': 'British Indian Ocean Territory', 
3152         'BN': 'Brunei Darussalam', 
3154         'BF': 'Burkina Faso', 
3160         'KY': 'Cayman Islands', 
3161         'CF': 'Central African Republic', 
3165         'CX': 'Christmas Island', 
3166         'CC': 'Cocos (Keeling) Islands', 
3170         'CD': 'Congo, the Democratic Republic of the', 
3171         'CK': 'Cook Islands', 
3173         'CI': 'Côte d\'Ivoire', 
3178         'CZ': 'Czech Republic', 
3182         'DO': 'Dominican Republic', 
3185         'SV': 'El Salvador', 
3186         'GQ': 'Equatorial Guinea', 
3190         'FK': 'Falkland Islands (Malvinas)', 
3191         'FO': 'Faroe Islands', 
3195         'GF': 'French Guiana', 
3196         'PF': 'French Polynesia', 
3197         'TF': 'French Southern Territories', 
3212         'GW': 'Guinea-Bissau', 
3215         'HM': 'Heard Island and McDonald Islands', 
3216         'VA': 'Holy See (Vatican City State)', 
3223         'IR': 'Iran, Islamic Republic of', 
3226         'IM': 'Isle of Man', 
3236         'KP': 'Korea, Democratic People\'s Republic of', 
3237         'KR': 'Korea, Republic of', 
3240         'LA': 'Lao People\'s Democratic Republic', 
3246         'LI': 'Liechtenstein', 
3250         'MK': 'Macedonia, the Former Yugoslav Republic of', 
3257         'MH': 'Marshall Islands', 
3263         'FM': 'Micronesia, Federated States of', 
3264         'MD': 'Moldova, Republic of', 
3275         'NL': 'Netherlands', 
3276         'NC': 'New Caledonia', 
3277         'NZ': 'New Zealand', 
3282         'NF': 'Norfolk Island', 
3283         'MP': 'Northern Mariana Islands', 
3288         'PS': 'Palestine, State of', 
3290         'PG': 'Papua New Guinea', 
3293         'PH': 'Philippines', 
3297         'PR': 'Puerto Rico', 
3301         'RU': 'Russian Federation', 
3303         'BL': 'Saint Barthélemy', 
3304         'SH': 'Saint Helena, Ascension and Tristan da Cunha', 
3305         'KN': 'Saint Kitts and Nevis', 
3306         'LC': 'Saint Lucia', 
3307         'MF': 'Saint Martin (French part)', 
3308         'PM': 'Saint Pierre and Miquelon', 
3309         'VC': 'Saint Vincent and the Grenadines', 
3312         'ST': 'Sao Tome and Principe', 
3313         'SA': 'Saudi Arabia', 
3317         'SL': 'Sierra Leone', 
3319         'SX': 'Sint Maarten (Dutch part)', 
3322         'SB': 'Solomon Islands', 
3324         'ZA': 'South Africa', 
3325         'GS': 'South Georgia and the South Sandwich Islands', 
3326         'SS': 'South Sudan', 
3331         'SJ': 'Svalbard and Jan Mayen', 
3334         'CH': 'Switzerland', 
3335         'SY': 'Syrian Arab Republic', 
3336         'TW': 'Taiwan, Province of China', 
3338         'TZ': 'Tanzania, United Republic of', 
3340         'TL': 'Timor-Leste', 
3344         'TT': 'Trinidad and Tobago', 
3347         'TM': 'Turkmenistan', 
3348         'TC': 'Turks and Caicos Islands', 
3352         'AE': 'United Arab Emirates', 
3353         'GB': 'United Kingdom', 
3354         'US': 'United States', 
3355         'UM': 'United States Minor Outlying Islands', 
3359         'VE': 'Venezuela, Bolivarian Republic of', 
3361         'VG': 'Virgin Islands, British', 
3362         'VI': 'Virgin Islands, U.S.', 
3363         'WF': 'Wallis and Futuna', 
3364         'EH': 'Western Sahara', 
3371     def short2full(cls, code): 
3372         """Convert an ISO 3166-2 country code to the corresponding full name""" 
3373         return cls._country_map.get(code.upper()) 
3376 class GeoUtils(object): 
3377     # Major IPv4 address blocks per country 
3379         'AD': '85.94.160.0/19', 
3380         'AE': '94.200.0.0/13', 
3381         'AF': '149.54.0.0/17', 
3382         'AG': '209.59.64.0/18', 
3383         'AI': '204.14.248.0/21', 
3384         'AL': '46.99.0.0/16', 
3385         'AM': '46.70.0.0/15', 
3386         'AO': '105.168.0.0/13', 
3387         'AP': '159.117.192.0/21', 
3388         'AR': '181.0.0.0/12', 
3389         'AS': '202.70.112.0/20', 
3390         'AT': '84.112.0.0/13', 
3391         'AU': '1.128.0.0/11', 
3392         'AW': '181.41.0.0/18', 
3393         'AZ': '5.191.0.0/16', 
3394         'BA': '31.176.128.0/17', 
3395         'BB': '65.48.128.0/17', 
3396         'BD': '114.130.0.0/16', 
3398         'BF': '129.45.128.0/17', 
3399         'BG': '95.42.0.0/15', 
3400         'BH': '37.131.0.0/17', 
3401         'BI': '154.117.192.0/18', 
3402         'BJ': '137.255.0.0/16', 
3403         'BL': '192.131.134.0/24', 
3404         'BM': '196.12.64.0/18', 
3405         'BN': '156.31.0.0/16', 
3406         'BO': '161.56.0.0/16', 
3407         'BQ': '161.0.80.0/20', 
3408         'BR': '152.240.0.0/12', 
3409         'BS': '24.51.64.0/18', 
3410         'BT': '119.2.96.0/19', 
3411         'BW': '168.167.0.0/16', 
3412         'BY': '178.120.0.0/13', 
3413         'BZ': '179.42.192.0/18', 
3414         'CA': '99.224.0.0/11', 
3415         'CD': '41.243.0.0/16', 
3416         'CF': '196.32.200.0/21', 
3417         'CG': '197.214.128.0/17', 
3418         'CH': '85.0.0.0/13', 
3419         'CI': '154.232.0.0/14', 
3420         'CK': '202.65.32.0/19', 
3421         'CL': '152.172.0.0/14', 
3422         'CM': '165.210.0.0/15', 
3423         'CN': '36.128.0.0/10', 
3424         'CO': '181.240.0.0/12', 
3425         'CR': '201.192.0.0/12', 
3426         'CU': '152.206.0.0/15', 
3427         'CV': '165.90.96.0/19', 
3428         'CW': '190.88.128.0/17', 
3429         'CY': '46.198.0.0/15', 
3430         'CZ': '88.100.0.0/14', 
3432         'DJ': '197.241.0.0/17', 
3433         'DK': '87.48.0.0/12', 
3434         'DM': '192.243.48.0/20', 
3435         'DO': '152.166.0.0/15', 
3436         'DZ': '41.96.0.0/12', 
3437         'EC': '186.68.0.0/15', 
3438         'EE': '90.190.0.0/15', 
3439         'EG': '156.160.0.0/11', 
3440         'ER': '196.200.96.0/20', 
3441         'ES': '88.0.0.0/11', 
3442         'ET': '196.188.0.0/14', 
3443         'EU': '2.16.0.0/13', 
3444         'FI': '91.152.0.0/13', 
3445         'FJ': '144.120.0.0/16', 
3446         'FM': '119.252.112.0/20', 
3447         'FO': '88.85.32.0/19', 
3449         'GA': '41.158.0.0/15', 
3451         'GD': '74.122.88.0/21', 
3452         'GE': '31.146.0.0/16', 
3453         'GF': '161.22.64.0/18', 
3454         'GG': '62.68.160.0/19', 
3455         'GH': '45.208.0.0/14', 
3456         'GI': '85.115.128.0/19', 
3457         'GL': '88.83.0.0/19', 
3458         'GM': '160.182.0.0/15', 
3459         'GN': '197.149.192.0/18', 
3460         'GP': '104.250.0.0/19', 
3461         'GQ': '105.235.224.0/20', 
3462         'GR': '94.64.0.0/13', 
3463         'GT': '168.234.0.0/16', 
3464         'GU': '168.123.0.0/16', 
3465         'GW': '197.214.80.0/20', 
3466         'GY': '181.41.64.0/18', 
3467         'HK': '113.252.0.0/14', 
3468         'HN': '181.210.0.0/16', 
3469         'HR': '93.136.0.0/13', 
3470         'HT': '148.102.128.0/17', 
3471         'HU': '84.0.0.0/14', 
3472         'ID': '39.192.0.0/10', 
3473         'IE': '87.32.0.0/12', 
3474         'IL': '79.176.0.0/13', 
3475         'IM': '5.62.80.0/20', 
3476         'IN': '117.192.0.0/10', 
3477         'IO': '203.83.48.0/21', 
3478         'IQ': '37.236.0.0/14', 
3479         'IR': '2.176.0.0/12', 
3480         'IS': '82.221.0.0/16', 
3481         'IT': '79.0.0.0/10', 
3482         'JE': '87.244.64.0/18', 
3483         'JM': '72.27.0.0/17', 
3484         'JO': '176.29.0.0/16', 
3485         'JP': '126.0.0.0/8', 
3486         'KE': '105.48.0.0/12', 
3487         'KG': '158.181.128.0/17', 
3488         'KH': '36.37.128.0/17', 
3489         'KI': '103.25.140.0/22', 
3490         'KM': '197.255.224.0/20', 
3491         'KN': '198.32.32.0/19', 
3492         'KP': '175.45.176.0/22', 
3493         'KR': '175.192.0.0/10', 
3494         'KW': '37.36.0.0/14', 
3495         'KY': '64.96.0.0/15', 
3496         'KZ': '2.72.0.0/13', 
3497         'LA': '115.84.64.0/18', 
3498         'LB': '178.135.0.0/16', 
3499         'LC': '192.147.231.0/24', 
3500         'LI': '82.117.0.0/19', 
3501         'LK': '112.134.0.0/15', 
3502         'LR': '41.86.0.0/19', 
3503         'LS': '129.232.0.0/17', 
3504         'LT': '78.56.0.0/13', 
3505         'LU': '188.42.0.0/16', 
3506         'LV': '46.109.0.0/16', 
3507         'LY': '41.252.0.0/14', 
3508         'MA': '105.128.0.0/11', 
3509         'MC': '88.209.64.0/18', 
3510         'MD': '37.246.0.0/16', 
3511         'ME': '178.175.0.0/17', 
3512         'MF': '74.112.232.0/21', 
3513         'MG': '154.126.0.0/17', 
3514         'MH': '117.103.88.0/21', 
3515         'MK': '77.28.0.0/15', 
3516         'ML': '154.118.128.0/18', 
3517         'MM': '37.111.0.0/17', 
3518         'MN': '49.0.128.0/17', 
3519         'MO': '60.246.0.0/16', 
3520         'MP': '202.88.64.0/20', 
3521         'MQ': '109.203.224.0/19', 
3522         'MR': '41.188.64.0/18', 
3523         'MS': '208.90.112.0/22', 
3524         'MT': '46.11.0.0/16', 
3525         'MU': '105.16.0.0/12', 
3526         'MV': '27.114.128.0/18', 
3527         'MW': '105.234.0.0/16', 
3528         'MX': '187.192.0.0/11', 
3529         'MY': '175.136.0.0/13', 
3530         'MZ': '197.218.0.0/15', 
3531         'NA': '41.182.0.0/16', 
3532         'NC': '101.101.0.0/18', 
3533         'NE': '197.214.0.0/18', 
3534         'NF': '203.17.240.0/22', 
3535         'NG': '105.112.0.0/12', 
3536         'NI': '186.76.0.0/15', 
3537         'NL': '145.96.0.0/11', 
3538         'NO': '84.208.0.0/13', 
3539         'NP': '36.252.0.0/15', 
3540         'NR': '203.98.224.0/19', 
3541         'NU': '49.156.48.0/22', 
3542         'NZ': '49.224.0.0/14', 
3543         'OM': '5.36.0.0/15', 
3544         'PA': '186.72.0.0/15', 
3545         'PE': '186.160.0.0/14', 
3546         'PF': '123.50.64.0/18', 
3547         'PG': '124.240.192.0/19', 
3548         'PH': '49.144.0.0/13', 
3549         'PK': '39.32.0.0/11', 
3550         'PL': '83.0.0.0/11', 
3551         'PM': '70.36.0.0/20', 
3552         'PR': '66.50.0.0/16', 
3553         'PS': '188.161.0.0/16', 
3554         'PT': '85.240.0.0/13', 
3555         'PW': '202.124.224.0/20', 
3556         'PY': '181.120.0.0/14', 
3557         'QA': '37.210.0.0/15', 
3558         'RE': '139.26.0.0/16', 
3559         'RO': '79.112.0.0/13', 
3560         'RS': '178.220.0.0/14', 
3561         'RU': '5.136.0.0/13', 
3562         'RW': '105.178.0.0/15', 
3563         'SA': '188.48.0.0/13', 
3564         'SB': '202.1.160.0/19', 
3565         'SC': '154.192.0.0/11', 
3566         'SD': '154.96.0.0/13', 
3567         'SE': '78.64.0.0/12', 
3568         'SG': '152.56.0.0/14', 
3569         'SI': '188.196.0.0/14', 
3570         'SK': '78.98.0.0/15', 
3571         'SL': '197.215.0.0/17', 
3572         'SM': '89.186.32.0/19', 
3573         'SN': '41.82.0.0/15', 
3574         'SO': '197.220.64.0/19', 
3575         'SR': '186.179.128.0/17', 
3576         'SS': '105.235.208.0/21', 
3577         'ST': '197.159.160.0/19', 
3578         'SV': '168.243.0.0/16', 
3579         'SX': '190.102.0.0/20', 
3581         'SZ': '41.84.224.0/19', 
3582         'TC': '65.255.48.0/20', 
3583         'TD': '154.68.128.0/19', 
3584         'TG': '196.168.0.0/14', 
3585         'TH': '171.96.0.0/13', 
3586         'TJ': '85.9.128.0/18', 
3587         'TK': '27.96.24.0/21', 
3588         'TL': '180.189.160.0/20', 
3589         'TM': '95.85.96.0/19', 
3590         'TN': '197.0.0.0/11', 
3591         'TO': '175.176.144.0/21', 
3592         'TR': '78.160.0.0/11', 
3593         'TT': '186.44.0.0/15', 
3594         'TV': '202.2.96.0/19', 
3595         'TW': '120.96.0.0/11', 
3596         'TZ': '156.156.0.0/14', 
3597         'UA': '93.72.0.0/13', 
3598         'UG': '154.224.0.0/13', 
3600         'UY': '167.56.0.0/13', 
3601         'UZ': '82.215.64.0/18', 
3602         'VA': '212.77.0.0/19', 
3603         'VC': '24.92.144.0/20', 
3604         'VE': '186.88.0.0/13', 
3605         'VG': '172.103.64.0/18', 
3606         'VI': '146.226.0.0/16', 
3607         'VN': '14.160.0.0/11', 
3608         'VU': '202.80.32.0/20', 
3609         'WF': '117.20.32.0/21', 
3610         'WS': '202.4.32.0/19', 
3611         'YE': '134.35.0.0/16', 
3612         'YT': '41.242.116.0/22', 
3613         'ZA': '41.0.0.0/11', 
3614         'ZM': '165.56.0.0/13', 
3615         'ZW': '41.85.192.0/19', 
3619     def random_ipv4(cls, code_or_block): 
3620         if len(code_or_block) == 2: 
3621             block = cls._country_ip_map.get(code_or_block.upper()) 
3625             block = code_or_block 
3626         addr, preflen = block.split('/') 
3627         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] 
3628         addr_max = addr_min | (0xffffffff >> int(preflen)) 
3629         return compat_str(socket.inet_ntoa( 
3630             compat_struct_pack('!L', random.randint(addr_min, addr_max)))) 
3633 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): 
3634     def __init__(self, proxies=None): 
3635         # Set default handlers 
3636         for type in ('http', 'https'): 
3637             setattr(self, '%s_open' % type, 
3638                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: 
3639                         meth(r, proxy, type)) 
3640         compat_urllib_request.ProxyHandler.__init__(self, proxies) 
3642     def proxy_open(self, req, proxy, type): 
3643         req_proxy = req.headers.get('Ytdl-request-proxy') 
3644         if req_proxy is not None: 
3646             del req.headers['Ytdl-request-proxy'] 
3648         if proxy == '__noproxy__': 
3649             return None  # No Proxy 
3650         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 
3651             req.add_header('Ytdl-socks-proxy', proxy) 
3652             # youtube-dl's http/https handlers do wrapping the socket with socks 
3654         return compat_urllib_request.ProxyHandler.proxy_open( 
3655             self, req, proxy, type) 
3658 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is 
3659 # released into Public Domain 
3660 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 
3662 def long_to_bytes(n, blocksize=0): 
3663     """long_to_bytes(n:long, blocksize:int) : string 
3664     Convert a long integer to a byte string. 
3666     If optional blocksize is given and greater than zero, pad the front of the 
3667     byte string with binary zeros so that the length is a multiple of 
3670     # after much testing, this algorithm was deemed to be the fastest 
3674         s = compat_struct_pack('>I', n & 0xffffffff) + s 
3676     # strip off leading zeros 
3677     for i in range(len(s)): 
3678         if s[i] != b'\000'[0]: 
3681         # only happens when n == 0 
3685     # add back some pad bytes.  this could be done more efficiently w.r.t. the 
3686     # de-padding being done above, but sigh... 
3687     if blocksize > 0 and len(s) % blocksize: 
3688         s = (blocksize - len(s) % blocksize) * b'\000' + s 
3692 def bytes_to_long(s): 
3693     """bytes_to_long(string) : long 
3694     Convert a byte string to a long integer. 
3696     This is (essentially) the inverse of long_to_bytes(). 
3701         extra = (4 - length % 4) 
3702         s = b'\000' * extra + s 
3703         length = length + extra 
3704     for i in range(0, length, 4): 
3705         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0] 
3709 def ohdave_rsa_encrypt(data, exponent, modulus): 
3711     Implement OHDave
's RSA algorithm. See http://www.ohdave.com/rsa/ 
3714         data: data to encrypt, bytes-like object 
3715         exponent, modulus: parameter e and N of RSA algorithm, both integer 
3716     Output: hex string of encrypted data 
3718     Limitation: supports one block encryption only 
3721     payload = int(binascii.hexlify(data[::-1]), 16) 
3722     encrypted = pow(payload, exponent, modulus) 
3723     return '%x' % encrypted 
3726 def pkcs1pad(data, length): 
3728     Padding input data with PKCS#1 scheme 
3730     @param {int[]} data        input data 
3731     @param {int}   length      target length 
3732     @returns {int[]}           padded data 
3734     if len(data) > length - 11: 
3735         raise ValueError('Input data too 
long for PKCS
#1 padding') 
3737     pseudo_random 
= [random
.randint(0, 254) for _ 
in range(length 
- len(data
) - 3)] 
3738     return [0, 2] + pseudo_random 
+ [0] + data
 
3741 def encode_base_n(num
, n
, table
=None): 
3742     FULL_TABLE 
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 
3744         table 
= FULL_TABLE
[:n
] 
3747         raise ValueError('base %d exceeds table length %d' % (n
, len(table
))) 
3754         ret 
= table
[num 
% n
] + ret
 
3759 def decode_packed_codes(code
): 
3760     mobj 
= re
.search(PACKED_CODES_RE
, code
) 
3761     obfucasted_code
, base
, count
, symbols 
= mobj
.groups() 
3764     symbols 
= symbols
.split('|') 
3769         base_n_count 
= encode_base_n(count
, base
) 
3770         symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
 
3773         r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)], 
3777 def parse_m3u8_attributes(attrib
): 
3779     for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
): 
3780         if val
.startswith('"'): 
3786 def urshift(val
, n
): 
3787     return val 
>> n 
if val 
>= 0 else (val 
+ 0x100000000) >> n
 
3790 # Based on png2str() written by @gdkchan and improved by @yokrysty 
3791 # Originally posted at https://github.com/rg3/youtube-dl/issues/9706 
3792 def decode_png(png_data
): 
3793     # Reference: https://www.w3.org/TR/PNG/ 
3794     header 
= png_data
[8:] 
3796     if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR': 
3797         raise IOError('Not a valid PNG file.') 
3799     int_map 
= {1: '>B', 2: '>H', 4: '>I'} 
3800     unpack_integer 
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0] 
3805         length 
= unpack_integer(header
[:4]) 
3808         chunk_type 
= header
[:4] 
3811         chunk_data 
= header
[:length
] 
3812         header 
= header
[length
:] 
3814         header 
= header
[4:]  # Skip CRC 
3822     ihdr 
= chunks
[0]['data'] 
3824     width 
= unpack_integer(ihdr
[:4]) 
3825     height 
= unpack_integer(ihdr
[4:8]) 
3829     for chunk 
in chunks
: 
3830         if chunk
['type'] == b
'IDAT': 
3831             idat 
+= chunk
['data'] 
3834         raise IOError('Unable to read PNG data.') 
3836     decompressed_data 
= bytearray(zlib
.decompress(idat
)) 
3841     def _get_pixel(idx
): 
3846     for y 
in range(height
): 
3847         basePos 
= y 
* (1 + stride
) 
3848         filter_type 
= decompressed_data
[basePos
] 
3852         pixels
.append(current_row
) 
3854         for x 
in range(stride
): 
3855             color 
= decompressed_data
[1 + basePos 
+ x
] 
3856             basex 
= y 
* stride 
+ x
 
3861                 left 
= _get_pixel(basex 
- 3) 
3863                 up 
= _get_pixel(basex 
- stride
) 
3865             if filter_type 
== 1:  # Sub 
3866                 color 
= (color 
+ left
) & 0xff 
3867             elif filter_type 
== 2:  # Up 
3868                 color 
= (color 
+ up
) & 0xff 
3869             elif filter_type 
== 3:  # Average 
3870                 color 
= (color 
+ ((left 
+ up
) >> 1)) & 0xff 
3871             elif filter_type 
== 4:  # Paeth 
3877                     c 
= _get_pixel(basex 
- stride 
- 3) 
3885                 if pa 
<= pb 
and pa 
<= pc
: 
3886                     color 
= (color 
+ a
) & 0xff 
3888                     color 
= (color 
+ b
) & 0xff 
3890                     color 
= (color 
+ c
) & 0xff 
3892             current_row
.append(color
) 
3894     return width
, height
, pixels
 
3897 def write_xattr(path
, key
, value
): 
3898     # This mess below finds the best xattr tool for the job 
3900         # try the pyxattr module... 
3903         if hasattr(xattr
, 'set'):  # pyxattr 
3904             # Unicode arguments are not supported in python-pyxattr until 
3906             # See https://github.com/rg3/youtube-dl/issues/5498 
3907             pyxattr_required_version 
= '0.5.0' 
3908             if version_tuple(xattr
.__version
__) < version_tuple(pyxattr_required_version
): 
3909                 # TODO: fallback to CLI tools 
3910                 raise XAttrUnavailableError( 
3911                     'python-pyxattr is detected but is too old. ' 
3912                     'youtube-dl requires %s or above while your version is %s. ' 
3913                     'Falling back to other xattr implementations' % ( 
3914                         pyxattr_required_version
, xattr
.__version
__)) 
3916             setxattr 
= xattr
.set 
3918             setxattr 
= xattr
.setxattr
 
3921             setxattr(path
, key
, value
) 
3922         except EnvironmentError as e
: 
3923             raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3926         if compat_os_name 
== 'nt': 
3927             # Write xattrs to NTFS Alternate Data Streams: 
3928             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 
3929             assert ':' not in key
 
3930             assert os
.path
.exists(path
) 
3932             ads_fn 
= path 
+ ':' + key
 
3934                 with open(ads_fn
, 'wb') as f
: 
3936             except EnvironmentError as e
: 
3937                 raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3939             user_has_setfattr 
= check_executable('setfattr', ['--version']) 
3940             user_has_xattr 
= check_executable('xattr', ['-h']) 
3942             if user_has_setfattr 
or user_has_xattr
: 
3944                 value 
= value
.decode('utf-8') 
3945                 if user_has_setfattr
: 
3946                     executable 
= 'setfattr' 
3947                     opts 
= ['-n', key
, '-v', value
] 
3948                 elif user_has_xattr
: 
3949                     executable 
= 'xattr' 
3950                     opts 
= ['-w', key
, value
] 
3952                 cmd 
= ([encodeFilename(executable
, True)] + 
3953                        [encodeArgument(o
) for o 
in opts
] + 
3954                        [encodeFilename(path
, True)]) 
3957                     p 
= subprocess
.Popen( 
3958                         cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
) 
3959                 except EnvironmentError as e
: 
3960                     raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3961                 stdout
, stderr 
= p
.communicate() 
3962                 stderr 
= stderr
.decode('utf-8', 'replace') 
3963                 if p
.returncode 
!= 0: 
3964                     raise XAttrMetadataError(p
.returncode
, stderr
) 
3967                 # On Unix, and can't find pyxattr, setfattr, or xattr. 
3968                 if sys
.platform
.startswith('linux'): 
3969                     raise XAttrUnavailableError( 
3970                         "Couldn't find a tool to set the xattrs. " 
3971                         "Install either the python 'pyxattr' or 'xattr' " 
3972                         "modules, or the GNU 'attr' package " 
3973                         "(which contains the 'setfattr' tool).") 
3975                     raise XAttrUnavailableError( 
3976                         "Couldn't find a tool to set the xattrs. " 
3977                         "Install either the python 'xattr' module, " 
3978                         "or the 'xattr' binary.") 
3981 def random_birthday(year_field
, month_field
, day_field
): 
3982     start_date 
= datetime
.date(1950, 1, 1) 
3983     end_date 
= datetime
.date(1995, 12, 31) 
3984     offset 
= random
.randint(0, (end_date 
- start_date
).days
) 
3985     random_date 
= start_date 
+ datetime
.timedelta(offset
) 
3987         year_field
: str(random_date
.year
), 
3988         month_field
: str(random_date
.month
), 
3989         day_field
: str(random_date
.day
),