4 from __future__ 
import unicode_literals
 
  34 import xml
.etree
.ElementTree
 
  41     compat_etree_fromstring
, 
  43     compat_html_entities_html5
, 
  49     compat_socket_create_connection
, 
  55     compat_urllib_parse_urlencode
, 
  56     compat_urllib_parse_urlparse
, 
  57     compat_urllib_parse_unquote_plus
, 
  58     compat_urllib_request
, 
  69 def register_socks_protocols(): 
  70     # "Register" SOCKS protocols 
  71     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 
  72     # URLs with protocols not in urlparse.uses_netloc are not handled correctly 
  73     for scheme 
in ('socks', 'socks4', 'socks4a', 'socks5'): 
  74         if scheme 
not in compat_urlparse
.uses_netloc
: 
  75             compat_urlparse
.uses_netloc
.append(scheme
) 
  78 # This is not clearly defined otherwise 
  79 compiled_regex_type 
= type(re
.compile('')) 
  82     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', 
  83     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  84     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  85     'Accept-Encoding': 'gzip, deflate', 
  86     'Accept-Language': 'en-us,en;q=0.5', 
  91     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', 
  97 ENGLISH_MONTH_NAMES 
= [ 
  98     'January', 'February', 'March', 'April', 'May', 'June', 
  99     'July', 'August', 'September', 'October', 'November', 'December'] 
 102     'en': ENGLISH_MONTH_NAMES
, 
 104         'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 
 105         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], 
 109     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 
 110     'flv', 'f4v', 'f4a', 'f4b', 
 111     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 
 112     'mkv', 'mka', 'mk3d', 
 121     'f4f', 'f4m', 'm3u8', 'smil') 
 123 # needed for sanitizing filenames in restricted mode 
 124 ACCENT_CHARS 
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', 
 125                         itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], 
 126                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 
 149     '%Y-%m-%d %H:%M:%S.%f', 
 152     '%Y-%m-%dT%H:%M:%SZ', 
 153     '%Y-%m-%dT%H:%M:%S.%fZ', 
 154     '%Y-%m-%dT%H:%M:%S.%f0Z', 
 156     '%Y-%m-%dT%H:%M:%S.%f', 
 159     '%b %d %Y at %H:%M:%S', 
 162 DATE_FORMATS_DAY_FIRST 
= list(DATE_FORMATS
) 
 163 DATE_FORMATS_DAY_FIRST
.extend([ 
 172 DATE_FORMATS_MONTH_FIRST 
= list(DATE_FORMATS
) 
 173 DATE_FORMATS_MONTH_FIRST
.extend([ 
 181 PACKED_CODES_RE 
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" 
 184 def preferredencoding(): 
 185     """Get preferred encoding. 
 187     Returns the best encoding scheme for the system, based on 
 188     locale.getpreferredencoding() and some further tweaks. 
 191         pref 
= locale
.getpreferredencoding() 
 199 def write_json_file(obj
, fn
): 
 200     """ Encode obj as JSON and write it to fn, atomically if possible """ 
 202     fn 
= encodeFilename(fn
) 
 203     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
 204         encoding 
= get_filesystem_encoding() 
 205         # os.path.basename returns a bytes object, but NamedTemporaryFile 
 206         # will fail if the filename contains non ascii characters unless we 
 207         # use a unicode object 
 208         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
 209         # the same for os.path.dirname 
 210         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
 212         path_basename 
= os
.path
.basename
 
 213         path_dirname 
= os
.path
.dirname
 
 217         'prefix': path_basename(fn
) + '.', 
 218         'dir': path_dirname(fn
), 
 222     # In Python 2.x, json.dump expects a bytestream. 
 223     # In Python 3.x, it writes to a character stream 
 224     if sys
.version_info 
< (3, 0): 
 232     tf 
= tempfile
.NamedTemporaryFile(**compat_kwargs(args
)) 
 237         if sys
.platform 
== 'win32': 
 238             # Need to remove existing file on Windows, else os.rename raises 
 239             # WindowsError or FileExistsError. 
 244         os
.rename(tf
.name
, fn
) 
 253 if sys
.version_info 
>= (2, 7): 
 254     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 255         """ Find the xpath xpath[@key=val] """ 
 256         assert re
.match(r
'^[a-zA-Z_-]+$', key
) 
 257         expr 
= xpath 
+ ('[@%s]' % key 
if val 
is None else "[@%s='%s']" % (key
, val
)) 
 258         return node
.find(expr
) 
 260     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 261         for f 
in node
.findall(compat_xpath(xpath
)): 
 262             if key 
not in f
.attrib
: 
 264             if val 
is None or f
.attrib
.get(key
) == val
: 
 268 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 269 # the namespace parameter 
 272 def xpath_with_ns(path
, ns_map
): 
 273     components 
= [c
.split(':') for c 
in path
.split('/')] 
 277             replaced
.append(c
[0]) 
 280             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 281     return '/'.join(replaced
) 
 284 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 285     def _find_xpath(xpath
): 
 286         return node
.find(compat_xpath(xpath
)) 
 288     if isinstance(xpath
, (str, compat_str
)): 
 289         n 
= _find_xpath(xpath
) 
 297         if default 
is not NO_DEFAULT
: 
 300             name 
= xpath 
if name 
is None else name
 
 301             raise ExtractorError('Could not find XML element %s' % name
) 
 307 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 308     n 
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
) 
 309     if n 
is None or n 
== default
: 
 312         if default 
is not NO_DEFAULT
: 
 315             name 
= xpath 
if name 
is None else name
 
 316             raise ExtractorError('Could not find XML element\'s text %s' % name
) 
 322 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 323     n 
= find_xpath_attr(node
, xpath
, key
) 
 325         if default 
is not NO_DEFAULT
: 
 328             name 
= '%s[@%s]' % (xpath
, key
) if name 
is None else name
 
 329             raise ExtractorError('Could not find XML attribute %s' % name
) 
 335 def get_element_by_id(id, html
): 
 336     """Return the content of the tag with the specified ID in the passed HTML document""" 
 337     return get_element_by_attribute('id', id, html
) 
 340 def get_element_by_class(class_name
, html
): 
 341     """Return the content of the first tag with the specified class in the passed HTML document""" 
 342     retval 
= get_elements_by_class(class_name
, html
) 
 343     return retval
[0] if retval 
else None 
 346 def get_element_by_attribute(attribute
, value
, html
, escape_value
=True): 
 347     retval 
= get_elements_by_attribute(attribute
, value
, html
, escape_value
) 
 348     return retval
[0] if retval 
else None 
 351 def get_elements_by_class(class_name
, html
): 
 352     """Return the content of all tags with the specified class in the passed HTML document as a list""" 
 353     return get_elements_by_attribute( 
 354         'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 
 355         html, escape_value=False) 
 358 def get_elements_by_attribute(attribute, value, html, escape_value=True): 
 359     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 361     value = re.escape(value) if escape_value else value 
 364     for m in re.finditer(r'''(?xs) 
 366          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'))*?
 
 368          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'))*? 
 372     ''' % (re.escape(attribute), value), html): 
 373         res = m.group('content
') 
 375         if res.startswith('"') or res.startswith("'"): 
 378         retlist.append(unescapeHTML(res)) 
 383 class HTMLAttributeParser(compat_HTMLParser): 
 384     """Trivial HTML parser to gather the attributes for a single element""" 
 387         compat_HTMLParser.__init__(self) 
 389     def handle_starttag(self, tag, attrs): 
 390         self.attrs = dict(attrs) 
 393 def extract_attributes(html_element): 
 394     """Given a string for an HTML element such as 
 396          a="foo" B="bar" c="&98;az" d=boz 
 397          empty= noval entity="&" 
 400     Decode and return a dictionary of attributes. 
 402         'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
', 
 403         'empty
': '', 'noval
': None, 'entity
': '&', 
 404         'sq
': '"', 'dq': '\'' 
 406     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 
 407     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 
 409     parser = HTMLAttributeParser() 
 410     parser.feed(html_element) 
 415 def clean_html(html): 
 416     """Clean an HTML snippet into a readable string""" 
 418     if html is None:  # Convenience for sanitizing descriptions etc. 
 422     html = html.replace('\n', ' ') 
 423     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) 
 424     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 
 426     html = re.sub('<.*?>', '', html) 
 427     # Replace html entities 
 428     html = unescapeHTML(html) 
 432 def sanitize_open(filename, open_mode): 
 433     """Try to open the given filename, and slightly tweak it if this fails. 
 435     Attempts to open the given filename. If this fails, it tries to change 
 436     the filename slightly, step by step, until it's either able to open it 
 437     or it fails and raises a final exception, like the standard open() 
 440     It returns the tuple (stream, definitive_file_name). 
 444             if sys.platform == 'win32': 
 446                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 
 447             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 
 448         stream = open(encodeFilename(filename), open_mode) 
 449         return (stream, filename) 
 450     except (IOError, OSError) as err: 
 451         if err.errno in (errno.EACCES,): 
 454         # In case of error, try to remove win32 forbidden chars 
 455         alt_filename = sanitize_path(filename) 
 456         if alt_filename == filename: 
 459             # An exception here should be caught in the caller 
 460             stream = open(encodeFilename(alt_filename), open_mode) 
 461             return (stream, alt_filename) 
 464 def timeconvert(timestr): 
 465     """Convert RFC 2822 defined time string into system timestamp""" 
 467     timetuple = email.utils.parsedate_tz(timestr) 
 468     if timetuple is not None: 
 469         timestamp = email.utils.mktime_tz(timetuple) 
 473 def sanitize_filename(s, restricted=False, is_id=False): 
 474     """Sanitizes a string so it could be used as part of a filename. 
 475     If restricted is set, use a stricter subset of allowed characters. 
 476     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible 
 478     def replace_insane(char): 
 479         if restricted and char in ACCENT_CHARS: 
 480             return ACCENT_CHARS[char] 
 481         if char == '?' or ord(char) < 32 or ord(char) == 127: 
 484             return '' if restricted else '\'' 
 486             return '_
-' if restricted else ' -' 
 487         elif char in '\\/|
*<>': 
 489         if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()): 
 491         if restricted 
and ord(char
) > 127: 
 496     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 497     result 
= ''.join(map(replace_insane
, s
)) 
 499         while '__' in result
: 
 500             result 
= result
.replace('__', '_') 
 501         result 
= result
.strip('_') 
 502         # Common case of "Foreign band name - English song title" 
 503         if restricted 
and result
.startswith('-_'): 
 505         if result
.startswith('-'): 
 506             result 
= '_' + result
[len('-'):] 
 507         result 
= result
.lstrip('.') 
 513 def sanitize_path(s
): 
 514     """Sanitizes and normalizes path on Windows""" 
 515     if sys
.platform 
!= 'win32': 
 517     drive_or_unc
, _ 
= os
.path
.splitdrive(s
) 
 518     if sys
.version_info 
< (2, 7) and not drive_or_unc
: 
 519         drive_or_unc
, _ 
= os
.path
.splitunc(s
) 
 520     norm_path 
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
) 
 524         path_part 
if path_part 
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
) 
 525         for path_part 
in norm_path
] 
 527         sanitized_path
.insert(0, drive_or_unc 
+ os
.path
.sep
) 
 528     return os
.path
.join(*sanitized_path
) 
 531 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of 
 532 # unwanted failures due to missing protocol 
 533 def sanitize_url(url
): 
 534     return 'http:%s' % url 
if url
.startswith('//') else url
 
 537 def sanitized_Request(url
, *args
, **kwargs
): 
 538     return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
) 
 541 def orderedSet(iterable
): 
 542     """ Remove all duplicates from the input iterable """ 
 550 def _htmlentity_transform(entity_with_semicolon
): 
 551     """Transforms an HTML entity to a character.""" 
 552     entity 
= entity_with_semicolon
[:-1] 
 554     # Known non-numeric HTML entity 
 555     if entity 
in compat_html_entities
.name2codepoint
: 
 556         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 558     # TODO: HTML5 allows entities without a semicolon. For example, 
 559     # 'Éric' should be decoded as 'Éric'. 
 560     if entity_with_semicolon 
in compat_html_entities_html5
: 
 561         return compat_html_entities_html5
[entity_with_semicolon
] 
 563     mobj 
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
) 
 565         numstr 
= mobj
.group(1) 
 566         if numstr
.startswith('x'): 
 568             numstr 
= '0%s' % numstr
 
 571         # See https://github.com/rg3/youtube-dl/issues/7518 
 573             return compat_chr(int(numstr
, base
)) 
 577     # Unknown entity in name, return its literal representation 
 578     return '&%s;' % entity
 
 584     assert type(s
) == compat_str
 
 587         r
'&([^;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 590 def get_subprocess_encoding(): 
 591     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 592         # For subprocess calls, encode with locale encoding 
 593         # Refer to http://stackoverflow.com/a/9951851/35070 
 594         encoding 
= preferredencoding() 
 596         encoding 
= sys
.getfilesystemencoding() 
 602 def encodeFilename(s
, for_subprocess
=False): 
 604     @param s The name of the file 
 607     assert type(s
) == compat_str
 
 609     # Python 3 has a Unicode API 
 610     if sys
.version_info 
>= (3, 0): 
 613     # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 614     # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 615     # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 616     if not for_subprocess 
and sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 619     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 
 620     if sys
.platform
.startswith('java'): 
 623     return s
.encode(get_subprocess_encoding(), 'ignore') 
 626 def decodeFilename(b
, for_subprocess
=False): 
 628     if sys
.version_info 
>= (3, 0): 
 631     if not isinstance(b
, bytes): 
 634     return b
.decode(get_subprocess_encoding(), 'ignore') 
 637 def encodeArgument(s
): 
 638     if not isinstance(s
, compat_str
): 
 639         # Legacy code that uses byte strings 
 640         # Uncomment the following line after fixing all post processors 
 641         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 642         s 
= s
.decode('ascii') 
 643     return encodeFilename(s
, True) 
 646 def decodeArgument(b
): 
 647     return decodeFilename(b
, True) 
 650 def decodeOption(optval
): 
 653     if isinstance(optval
, bytes): 
 654         optval 
= optval
.decode(preferredencoding()) 
 656     assert isinstance(optval
, compat_str
) 
 660 def formatSeconds(secs
): 
 662         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 664         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 669 def make_HTTPS_handler(params
, **kwargs
): 
 670     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 671     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 672         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 673         if opts_no_check_certificate
: 
 674             context
.check_hostname 
= False 
 675             context
.verify_mode 
= ssl
.CERT_NONE
 
 677             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 680             # (create_default_context present but HTTPSHandler has no context=) 
 683     if sys
.version_info 
< (3, 2): 
 684         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 686         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 687         context
.verify_mode 
= (ssl
.CERT_NONE
 
 688                                if opts_no_check_certificate
 
 689                                else ssl
.CERT_REQUIRED
) 
 690         context
.set_default_verify_paths() 
 691         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 694 def bug_reports_message(): 
 695     if ytdl_is_updateable(): 
 696         update_cmd 
= 'type  youtube-dl -U  to update' 
 698         update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 699     msg 
= '; please report this issue on https://yt-dl.org/bug .' 
 700     msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 701     msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 705 class YoutubeDLError(Exception): 
 706     """Base exception for YoutubeDL errors.""" 
 710 class ExtractorError(YoutubeDLError
): 
 711     """Error during info extraction.""" 
 713     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 714         """ tb, if given, is the original traceback (so that it can be printed out). 
 715         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 718         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 720         if video_id 
is not None: 
 721             msg 
= video_id 
+ ': ' + msg
 
 723             msg 
+= ' (caused by %r)' % cause
 
 725             msg 
+= bug_reports_message() 
 726         super(ExtractorError
, self
).__init
__(msg
) 
 729         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 731         self
.video_id 
= video_id
 
 733     def format_traceback(self
): 
 734         if self
.traceback 
is None: 
 736         return ''.join(traceback
.format_tb(self
.traceback
)) 
 739 class UnsupportedError(ExtractorError
): 
 740     def __init__(self
, url
): 
 741         super(UnsupportedError
, self
).__init
__( 
 742             'Unsupported URL: %s' % url
, expected
=True) 
 746 class RegexNotFoundError(ExtractorError
): 
 747     """Error when a regex didn't match""" 
 751 class GeoRestrictedError(ExtractorError
): 
 752     """Geographic restriction Error exception. 
 754     This exception may be thrown when a video is not available from your 
 755     geographic location due to geographic restrictions imposed by a website. 
 757     def __init__(self
, msg
, countries
=None): 
 758         super(GeoRestrictedError
, self
).__init
__(msg
, expected
=True) 
 760         self
.countries 
= countries
 
 763 class DownloadError(YoutubeDLError
): 
 764     """Download Error exception. 
 766     This exception may be thrown by FileDownloader objects if they are not 
 767     configured to continue on errors. They will contain the appropriate 
 771     def __init__(self
, msg
, exc_info
=None): 
 772         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 773         super(DownloadError
, self
).__init
__(msg
) 
 774         self
.exc_info 
= exc_info
 
 777 class SameFileError(YoutubeDLError
): 
 778     """Same File exception. 
 780     This exception will be thrown by FileDownloader objects if they detect 
 781     multiple files would have to be downloaded to the same file on disk. 
 786 class PostProcessingError(YoutubeDLError
): 
 787     """Post Processing exception. 
 789     This exception may be raised by PostProcessor's .run() method to 
 790     indicate an error in the postprocessing task. 
 793     def __init__(self
, msg
): 
 794         super(PostProcessingError
, self
).__init
__(msg
) 
 798 class MaxDownloadsReached(YoutubeDLError
): 
 799     """ --max-downloads limit has been reached. """ 
 803 class UnavailableVideoError(YoutubeDLError
): 
 804     """Unavailable Format exception. 
 806     This exception will be thrown when a video is requested 
 807     in a format that is not available for that video. 
 812 class ContentTooShortError(YoutubeDLError
): 
 813     """Content Too Short exception. 
 815     This exception may be raised by FileDownloader objects when a file they 
 816     download is too small for what the server announced first, indicating 
 817     the connection was probably interrupted. 
 820     def __init__(self
, downloaded
, expected
): 
 821         super(ContentTooShortError
, self
).__init
__( 
 822             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded
, expected
) 
 825         self
.downloaded 
= downloaded
 
 826         self
.expected 
= expected
 
 829 class XAttrMetadataError(YoutubeDLError
): 
 830     def __init__(self
, code
=None, msg
='Unknown error'): 
 831         super(XAttrMetadataError
, self
).__init
__(msg
) 
 835         # Parsing code and msg 
 836         if (self
.code 
in (errno
.ENOSPC
, errno
.EDQUOT
) or 
 837                 'No space left' in self
.msg 
or 'Disk quota excedded' in self
.msg
): 
 838             self
.reason 
= 'NO_SPACE' 
 839         elif self
.code 
== errno
.E2BIG 
or 'Argument list too long' in self
.msg
: 
 840             self
.reason 
= 'VALUE_TOO_LONG' 
 842             self
.reason 
= 'NOT_SUPPORTED' 
 845 class XAttrUnavailableError(YoutubeDLError
): 
 849 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 850     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 
 851     # expected HTTP responses to meet HTTP/1.0 or later (see also 
 852     # https://github.com/rg3/youtube-dl/issues/6727) 
 853     if sys
.version_info 
< (3, 0): 
 854         kwargs
[b
'strict'] = True 
 855     hc 
= http_class(*args
, **kwargs
) 
 856     source_address 
= ydl_handler
._params
.get('source_address') 
 857     if source_address 
is not None: 
 858         sa 
= (source_address
, 0) 
 859         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 860             hc
.source_address 
= sa
 
 862             def _hc_connect(self
, *args
, **kwargs
): 
 863                 sock 
= compat_socket_create_connection( 
 864                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 866                     self
.sock 
= ssl
.wrap_socket( 
 867                         sock
, self
.key_file
, self
.cert_file
, 
 868                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 871             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 876 def handle_youtubedl_headers(headers
): 
 877     filtered_headers 
= headers
 
 879     if 'Youtubedl-no-compression' in filtered_headers
: 
 880         filtered_headers 
= dict((k
, v
) for k
, v 
in filtered_headers
.items() if k
.lower() != 'accept-encoding') 
 881         del filtered_headers
['Youtubedl-no-compression'] 
 883     return filtered_headers
 
 886 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 887     """Handler for HTTP requests and responses. 
 889     This class, when installed with an OpenerDirector, automatically adds 
 890     the standard headers to every HTTP request and handles gzipped and 
 891     deflated responses from web servers. If compression is to be avoided in 
 892     a particular request, the original request in the program code only has 
 893     to include the HTTP header "Youtubedl-no-compression", which will be 
 894     removed before making the real request. 
 896     Part of this code was copied from: 
 898     http://techknack.net/python-urllib2-handlers/ 
 900     Andrew Rowls, the author of that code, agreed to release it to the 
 904     def __init__(self
, params
, *args
, **kwargs
): 
 905         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 906         self
._params 
= params
 
 908     def http_open(self
, req
): 
 909         conn_class 
= compat_http_client
.HTTPConnection
 
 911         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
 913             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
 914             del req
.headers
['Ytdl-socks-proxy'] 
 916         return self
.do_open(functools
.partial( 
 917             _create_http_connection
, self
, conn_class
, False), 
 923             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 925             return zlib
.decompress(data
) 
 928     def addinfourl_wrapper(stream
, headers
, url
, code
): 
 929         if hasattr(compat_urllib_request
.addinfourl
, 'getcode'): 
 930             return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
) 
 931         ret 
= compat_urllib_request
.addinfourl(stream
, headers
, url
) 
 935     def http_request(self
, req
): 
 936         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 
 937         # always respected by websites, some tend to give out URLs with non percent-encoded 
 938         # non-ASCII characters (see telemb.py, ard.py [#3412]) 
 939         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 
 940         # To work around aforementioned issue we will replace request's original URL with 
 941         # percent-encoded one 
 942         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 
 943         # the code of this workaround has been moved here from YoutubeDL.urlopen() 
 944         url 
= req
.get_full_url() 
 945         url_escaped 
= escape_url(url
) 
 947         # Substitute URL if any change after escaping 
 948         if url 
!= url_escaped
: 
 949             req 
= update_Request(req
, url
=url_escaped
) 
 951         for h
, v 
in std_headers
.items(): 
 952             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
 953             # The dict keys are capitalized because of this bug by urllib 
 954             if h
.capitalize() not in req
.headers
: 
 957         req
.headers 
= handle_youtubedl_headers(req
.headers
) 
 959         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 960             # Python 2.6 is brain-dead when it comes to fragments 
 961             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 962             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 966     def http_response(self
, req
, resp
): 
 969         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 970             content 
= resp
.read() 
 971             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 973                 uncompressed 
= io
.BytesIO(gz
.read()) 
 974             except IOError as original_ioerror
: 
 975                 # There may be junk add the end of the file 
 976                 # See http://stackoverflow.com/q/4928560/35070 for details 
 977                 for i 
in range(1, 1024): 
 979                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 980                         uncompressed 
= io
.BytesIO(gz
.read()) 
 985                     raise original_ioerror
 
 986             resp 
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 987             resp
.msg 
= old_resp
.msg
 
 988             del resp
.headers
['Content-encoding'] 
 990         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 991             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
 992             resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 993             resp
.msg 
= old_resp
.msg
 
 994             del resp
.headers
['Content-encoding'] 
 995         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 
 996         # https://github.com/rg3/youtube-dl/issues/6457). 
 997         if 300 <= resp
.code 
< 400: 
 998             location 
= resp
.headers
.get('Location') 
1000                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 
1001                 if sys
.version_info 
>= (3, 0): 
1002                     location 
= location
.encode('iso-8859-1').decode('utf-8') 
1004                     location 
= location
.decode('utf-8') 
1005                 location_escaped 
= escape_url(location
) 
1006                 if location 
!= location_escaped
: 
1007                     del resp
.headers
['Location'] 
1008                     if sys
.version_info 
< (3, 0): 
1009                         location_escaped 
= location_escaped
.encode('utf-8') 
1010                     resp
.headers
['Location'] = location_escaped
 
1013     https_request 
= http_request
 
1014     https_response 
= http_response
 
1017 def make_socks_conn_class(base_class
, socks_proxy
): 
1018     assert issubclass(base_class
, ( 
1019         compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
)) 
1021     url_components 
= compat_urlparse
.urlparse(socks_proxy
) 
1022     if url_components
.scheme
.lower() == 'socks5': 
1023         socks_type 
= ProxyType
.SOCKS5
 
1024     elif url_components
.scheme
.lower() in ('socks', 'socks4'): 
1025         socks_type 
= ProxyType
.SOCKS4
 
1026     elif url_components
.scheme
.lower() == 'socks4a': 
1027         socks_type 
= ProxyType
.SOCKS4A
 
1029     def unquote_if_non_empty(s
): 
1032         return compat_urllib_parse_unquote_plus(s
) 
1036         url_components
.hostname
, url_components
.port 
or 1080, 
1038         unquote_if_non_empty(url_components
.username
), 
1039         unquote_if_non_empty(url_components
.password
), 
1042     class SocksConnection(base_class
): 
1044             self
.sock 
= sockssocket() 
1045             self
.sock
.setproxy(*proxy_args
) 
1046             if type(self
.timeout
) in (int, float): 
1047                 self
.sock
.settimeout(self
.timeout
) 
1048             self
.sock
.connect((self
.host
, self
.port
)) 
1050             if isinstance(self
, compat_http_client
.HTTPSConnection
): 
1051                 if hasattr(self
, '_context'):  # Python > 2.6 
1052                     self
.sock 
= self
._context
.wrap_socket( 
1053                         self
.sock
, server_hostname
=self
.host
) 
1055                     self
.sock 
= ssl
.wrap_socket(self
.sock
) 
1057     return SocksConnection
 
1060 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
1061     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
1062         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
1063         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
1064         self
._params 
= params
 
1066     def https_open(self
, req
): 
1068         conn_class 
= self
._https
_conn
_class
 
1070         if hasattr(self
, '_context'):  # python > 2.6 
1071             kwargs
['context'] = self
._context
 
1072         if hasattr(self
, '_check_hostname'):  # python 3.x 
1073             kwargs
['check_hostname'] = self
._check
_hostname
 
1075         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
1077             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
1078             del req
.headers
['Ytdl-socks-proxy'] 
1080         return self
.do_open(functools
.partial( 
1081             _create_http_connection
, self
, conn_class
, True), 
1085 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
): 
1086     def __init__(self
, cookiejar
=None): 
1087         compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
) 
1089     def http_response(self
, request
, response
): 
1090         # Python 2 will choke on next HTTP request in row if there are non-ASCII 
1091         # characters in Set-Cookie HTTP header of last response (see 
1092         # https://github.com/rg3/youtube-dl/issues/6769). 
1093         # In order to at least prevent crashing we will percent encode Set-Cookie 
1094         # header before HTTPCookieProcessor starts processing it. 
1095         # if sys.version_info < (3, 0) and response.headers: 
1096         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 
1097         #         set_cookie = response.headers.get(set_cookie_header) 
1099         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 
1100         #             if set_cookie != set_cookie_escaped: 
1101         #                 del response.headers[set_cookie_header] 
1102         #                 response.headers[set_cookie_header] = set_cookie_escaped 
1103         return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
) 
1105     https_request 
= compat_urllib_request
.HTTPCookieProcessor
.http_request
 
1106     https_response 
= http_response
 
1109 def extract_timezone(date_str
): 
1111         r
'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
1114         timezone 
= datetime
.timedelta() 
1116         date_str 
= date_str
[:-len(m
.group('tz'))] 
1117         if not m
.group('sign'): 
1118             timezone 
= datetime
.timedelta() 
1120             sign 
= 1 if m
.group('sign') == '+' else -1 
1121             timezone 
= datetime
.timedelta( 
1122                 hours
=sign 
* int(m
.group('hours')), 
1123                 minutes
=sign 
* int(m
.group('minutes'))) 
1124     return timezone
, date_str
 
1127 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
1128     """ Return a UNIX timestamp from the given date """ 
1130     if date_str 
is None: 
1133     date_str 
= re
.sub(r
'\.[0-9]+', '', date_str
) 
1135     if timezone 
is None: 
1136         timezone
, date_str 
= extract_timezone(date_str
) 
1139         date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
1140         dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
1141         return calendar
.timegm(dt
.timetuple()) 
1146 def date_formats(day_first
=True): 
1147     return DATE_FORMATS_DAY_FIRST 
if day_first 
else DATE_FORMATS_MONTH_FIRST
 
1150 def unified_strdate(date_str
, day_first
=True): 
1151     """Return a string with the date in the format YYYYMMDD""" 
1153     if date_str 
is None: 
1157     date_str 
= date_str
.replace(',', ' ') 
1158     # Remove AM/PM + timezone 
1159     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1160     _
, date_str 
= extract_timezone(date_str
) 
1162     for expression 
in date_formats(day_first
): 
1164             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
1167     if upload_date 
is None: 
1168         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1171                 upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
1174     if upload_date 
is not None: 
1175         return compat_str(upload_date
) 
1178 def unified_timestamp(date_str
, day_first
=True): 
1179     if date_str 
is None: 
1182     date_str 
= date_str
.replace(',', ' ') 
1184     pm_delta 
= 12 if re
.search(r
'(?i)PM', date_str
) else 0 
1185     timezone
, date_str 
= extract_timezone(date_str
) 
1187     # Remove AM/PM + timezone 
1188     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1190     for expression 
in date_formats(day_first
): 
1192             dt 
= datetime
.datetime
.strptime(date_str
, expression
) - timezone 
+ datetime
.timedelta(hours
=pm_delta
) 
1193             return calendar
.timegm(dt
.timetuple()) 
1196     timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1198         return calendar
.timegm(timetuple
) + pm_delta 
* 3600 
1201 def determine_ext(url
, default_ext
='unknown_video'): 
1204     guess 
= url
.partition('?')[0].rpartition('.')[2] 
1205     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
1207     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 
1208     elif guess
.rstrip('/') in KNOWN_EXTENSIONS
: 
1209         return guess
.rstrip('/') 
1214 def subtitles_filename(filename
, sub_lang
, sub_format
): 
1215     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
1218 def date_from_str(date_str
): 
1220     Return a datetime object from a string in the format YYYYMMDD or 
1221     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
1222     today 
= datetime
.date
.today() 
1223     if date_str 
in ('now', 'today'): 
1225     if date_str 
== 'yesterday': 
1226         return today 
- datetime
.timedelta(days
=1) 
1227     match 
= re
.match(r
'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
1228     if match 
is not None: 
1229         sign 
= match
.group('sign') 
1230         time 
= int(match
.group('time')) 
1233         unit 
= match
.group('unit') 
1234         # A bad approximation? 
1238         elif unit 
== 'year': 
1242         delta 
= datetime
.timedelta(**{unit
: time
}) 
1243         return today 
+ delta
 
1244     return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date() 
1247 def hyphenate_date(date_str
): 
1249     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
1250     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
1251     if match 
is not None: 
1252         return '-'.join(match
.groups()) 
1257 class DateRange(object): 
1258     """Represents a time interval between two dates""" 
1260     def __init__(self
, start
=None, end
=None): 
1261         """start and end must be strings in the format accepted by date""" 
1262         if start 
is not None: 
1263             self
.start 
= date_from_str(start
) 
1265             self
.start 
= datetime
.datetime
.min.date() 
1267             self
.end 
= date_from_str(end
) 
1269             self
.end 
= datetime
.datetime
.max.date() 
1270         if self
.start 
> self
.end
: 
1271             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
1275         """Returns a range that only contains the given day""" 
1276         return cls(day
, day
) 
1278     def __contains__(self
, date
): 
1279         """Check if the date is in the range""" 
1280         if not isinstance(date
, datetime
.date
): 
1281             date 
= date_from_str(date
) 
1282         return self
.start 
<= date 
<= self
.end
 
1285         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
1288 def platform_name(): 
1289     """ Returns the platform name as a compat_str """ 
1290     res 
= platform
.platform() 
1291     if isinstance(res
, bytes): 
1292         res 
= res
.decode(preferredencoding()) 
1294     assert isinstance(res
, compat_str
) 
1298 def _windows_write_string(s
, out
): 
1299     """ Returns True if the string was written using special methods, 
1300     False if it has yet to be written out.""" 
1301     # Adapted from http://stackoverflow.com/a/3259271/35070 
1304     import ctypes
.wintypes
 
1312         fileno 
= out
.fileno() 
1313     except AttributeError: 
1314         # If the output stream doesn't have a fileno, it's virtual 
1316     except io
.UnsupportedOperation
: 
1317         # Some strange Windows pseudo files? 
1319     if fileno 
not in WIN_OUTPUT_IDS
: 
1322     GetStdHandle 
= ctypes
.WINFUNCTYPE( 
1323         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
1324         (b
'GetStdHandle', ctypes
.windll
.kernel32
)) 
1325     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
1327     WriteConsoleW 
= ctypes
.WINFUNCTYPE( 
1328         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
1329         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
1330         ctypes
.wintypes
.LPVOID
)((b
'WriteConsoleW', ctypes
.windll
.kernel32
)) 
1331     written 
= ctypes
.wintypes
.DWORD(0) 
1333     GetFileType 
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
'GetFileType', ctypes
.windll
.kernel32
)) 
1334     FILE_TYPE_CHAR 
= 0x0002 
1335     FILE_TYPE_REMOTE 
= 0x8000 
1336     GetConsoleMode 
= ctypes
.WINFUNCTYPE( 
1337         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
1338         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
1339         (b
'GetConsoleMode', ctypes
.windll
.kernel32
)) 
1340     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
1342     def not_a_console(handle
): 
1343         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
1345         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR 
or 
1346                 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
1348     if not_a_console(h
): 
1351     def next_nonbmp_pos(s
): 
1353             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
1354         except StopIteration: 
1358         count 
= min(next_nonbmp_pos(s
), 1024) 
1360         ret 
= WriteConsoleW( 
1361             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
1363             raise OSError('Failed to write string') 
1364         if not count
:  # We just wrote a non-BMP character 
1365             assert written
.value 
== 2 
1368             assert written
.value 
> 0 
1369             s 
= s
[written
.value
:] 
1373 def write_string(s
, out
=None, encoding
=None): 
1376     assert type(s
) == compat_str
 
1378     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
1379         if _windows_write_string(s
, out
): 
1382     if ('b' in getattr(out
, 'mode', '') or 
1383             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
1384         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
1386     elif hasattr(out
, 'buffer'): 
1387         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
1388         byt 
= s
.encode(enc
, 'ignore') 
1389         out
.buffer.write(byt
) 
1395 def bytes_to_intlist(bs
): 
1398     if isinstance(bs
[0], int):  # Python 3 
1401         return [ord(c
) for c 
in bs
] 
1404 def intlist_to_bytes(xs
): 
1407     return compat_struct_pack('%dB' % len(xs
), *xs
) 
1410 # Cross-platform file locking 
1411 if sys
.platform 
== 'win32': 
1412     import ctypes
.wintypes
 
1415     class OVERLAPPED(ctypes
.Structure
): 
1417             ('Internal', ctypes
.wintypes
.LPVOID
), 
1418             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
1419             ('Offset', ctypes
.wintypes
.DWORD
), 
1420             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
1421             ('hEvent', ctypes
.wintypes
.HANDLE
), 
1424     kernel32 
= ctypes
.windll
.kernel32
 
1425     LockFileEx 
= kernel32
.LockFileEx
 
1426     LockFileEx
.argtypes 
= [ 
1427         ctypes
.wintypes
.HANDLE
,     # hFile 
1428         ctypes
.wintypes
.DWORD
,      # dwFlags 
1429         ctypes
.wintypes
.DWORD
,      # dwReserved 
1430         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1431         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1432         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1434     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1435     UnlockFileEx 
= kernel32
.UnlockFileEx
 
1436     UnlockFileEx
.argtypes 
= [ 
1437         ctypes
.wintypes
.HANDLE
,     # hFile 
1438         ctypes
.wintypes
.DWORD
,      # dwReserved 
1439         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1440         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1441         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1443     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1444     whole_low 
= 0xffffffff 
1445     whole_high 
= 0x7fffffff 
1447     def _lock_file(f
, exclusive
): 
1448         overlapped 
= OVERLAPPED() 
1449         overlapped
.Offset 
= 0 
1450         overlapped
.OffsetHigh 
= 0 
1451         overlapped
.hEvent 
= 0 
1452         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1453         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1454         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1455                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1456             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1458     def _unlock_file(f
): 
1459         assert f
._lock
_file
_overlapped
_p
 
1460         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1461         if not UnlockFileEx(handle
, 0, 
1462                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1463             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1466     # Some platforms, such as Jython, is missing fcntl 
1470         def _lock_file(f
, exclusive
): 
1471             fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1473         def _unlock_file(f
): 
1474             fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1476         UNSUPPORTED_MSG 
= 'file locking is not supported on this platform' 
1478         def _lock_file(f
, exclusive
): 
1479             raise IOError(UNSUPPORTED_MSG
) 
1481         def _unlock_file(f
): 
1482             raise IOError(UNSUPPORTED_MSG
) 
1485 class locked_file(object): 
1486     def __init__(self
, filename
, mode
, encoding
=None): 
1487         assert mode 
in ['r', 'a', 'w'] 
1488         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1491     def __enter__(self
): 
1492         exclusive 
= self
.mode 
!= 'r' 
1494             _lock_file(self
.f
, exclusive
) 
1500     def __exit__(self
, etype
, value
, traceback
): 
1502             _unlock_file(self
.f
) 
1509     def write(self
, *args
): 
1510         return self
.f
.write(*args
) 
1512     def read(self
, *args
): 
1513         return self
.f
.read(*args
) 
1516 def get_filesystem_encoding(): 
1517     encoding 
= sys
.getfilesystemencoding() 
1518     return encoding 
if encoding 
is not None else 'utf-8' 
1521 def shell_quote(args
): 
1523     encoding 
= get_filesystem_encoding() 
1525         if isinstance(a
, bytes): 
1526             # We may get a filename encoded with 'encodeFilename' 
1527             a 
= a
.decode(encoding
) 
1528         quoted_args
.append(pipes
.quote(a
)) 
1529     return ' '.join(quoted_args
) 
1532 def smuggle_url(url
, data
): 
1533     """ Pass additional data in a URL for internal use. """ 
1535     url
, idata 
= unsmuggle_url(url
, {}) 
1537     sdata 
= compat_urllib_parse_urlencode( 
1538         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1539     return url 
+ '#' + sdata
 
1542 def unsmuggle_url(smug_url
, default
=None): 
1543     if '#__youtubedl_smuggle' not in smug_url
: 
1544         return smug_url
, default
 
1545     url
, _
, sdata 
= smug_url
.rpartition('#') 
1546     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1547     data 
= json
.loads(jsond
) 
1551 def format_bytes(bytes): 
1554     if type(bytes) is str: 
1555         bytes = float(bytes) 
1559         exponent 
= int(math
.log(bytes, 1024.0)) 
1560     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1561     converted 
= float(bytes) / float(1024 ** exponent
) 
1562     return '%.2f%s' % (converted
, suffix
) 
1565 def lookup_unit_table(unit_table
, s
): 
1566     units_re 
= '|'.join(re
.escape(u
) for u 
in unit_table
) 
1568         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
) 
1571     num_str 
= m
.group('num').replace(',', '.') 
1572     mult 
= unit_table
[m
.group('unit')] 
1573     return int(float(num_str
) * mult
) 
1576 def parse_filesize(s
): 
1580     # The lower-case forms are of course incorrect and unofficial, 
1581     # but we support those too 
1598         'megabytes': 1000 ** 2, 
1599         'mebibytes': 1024 ** 2, 
1605         'gigabytes': 1000 ** 3, 
1606         'gibibytes': 1024 ** 3, 
1612         'terabytes': 1000 ** 4, 
1613         'tebibytes': 1024 ** 4, 
1619         'petabytes': 1000 ** 5, 
1620         'pebibytes': 1024 ** 5, 
1626         'exabytes': 1000 ** 6, 
1627         'exbibytes': 1024 ** 6, 
1633         'zettabytes': 1000 ** 7, 
1634         'zebibytes': 1024 ** 7, 
1640         'yottabytes': 1000 ** 8, 
1641         'yobibytes': 1024 ** 8, 
1644     return lookup_unit_table(_UNIT_TABLE
, s
) 
1653     if re
.match(r
'^[\d,.]+$', s
): 
1654         return str_to_int(s
) 
1665     return lookup_unit_table(_UNIT_TABLE
, s
) 
1668 def month_by_name(name
, lang
='en'): 
1669     """ Return the number of a month by (locale-independently) English name """ 
1671     month_names 
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en']) 
1674         return month_names
.index(name
) + 1 
1679 def month_by_abbreviation(abbrev
): 
1680     """ Return the number of a month by (locale-independently) English 
1684         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
1689 def fix_xml_ampersands(xml_str
): 
1690     """Replace all the '&' by '&' in XML""" 
1692         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1697 def setproctitle(title
): 
1698     assert isinstance(title
, compat_str
) 
1700     # ctypes in Jython is not complete 
1701     # http://bugs.jython.org/issue2148 
1702     if sys
.platform
.startswith('java'): 
1706         libc 
= ctypes
.cdll
.LoadLibrary('libc.so.6') 
1710         # LoadLibrary in Windows Python 2.7.13 only expects 
1711         # a bytestring, but since unicode_literals turns 
1712         # every string into a unicode string, it fails. 
1714     title_bytes 
= title
.encode('utf-8') 
1715     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1716     buf
.value 
= title_bytes
 
1718         libc
.prctl(15, buf
, 0, 0, 0) 
1719     except AttributeError: 
1720         return  # Strange libc, just skip this 
1723 def remove_start(s
, start
): 
1724     return s
[len(start
):] if s 
is not None and s
.startswith(start
) else s
 
1727 def remove_end(s
, end
): 
1728     return s
[:-len(end
)] if s 
is not None and s
.endswith(end
) else s
 
1731 def remove_quotes(s
): 
1732     if s 
is None or len(s
) < 2: 
1734     for quote 
in ('"', "'", ): 
1735         if s
[0] == quote 
and s
[-1] == quote
: 
1740 def url_basename(url
): 
1741     path 
= compat_urlparse
.urlparse(url
).path
 
1742     return path
.strip('/').split('/')[-1] 
1746     return re
.match(r
'https?://[^?#&]+/', url
).group() 
1749 def urljoin(base
, path
): 
1750     if not isinstance(path
, compat_str
) or not path
: 
1752     if re
.match(r
'^(?:https?:)?//', path
): 
1754     if not isinstance(base
, compat_str
) or not re
.match(r
'^(?:https?:)?//', base
): 
1756     return compat_urlparse
.urljoin(base
, path
) 
1759 class HEADRequest(compat_urllib_request
.Request
): 
1760     def get_method(self
): 
1764 class PUTRequest(compat_urllib_request
.Request
): 
1765     def get_method(self
): 
1769 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1772             v 
= getattr(v
, get_attr
, None) 
1778         return int(v
) * invscale 
// scale
 
1783 def str_or_none(v
, default
=None): 
1784     return default 
if v 
is None else compat_str(v
) 
1787 def str_to_int(int_str
): 
1788     """ A more relaxed version of int_or_none """ 
1791     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1795 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1799         return float(v
) * invscale 
/ scale
 
1804 def strip_or_none(v
): 
1805     return None if v 
is None else v
.strip() 
1808 def parse_duration(s
): 
1809     if not isinstance(s
, compat_basestring
): 
1814     days
, hours
, mins
, secs
, ms 
= [None] * 5 
1815     m 
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s
) 
1817         days
, hours
, mins
, secs
, ms 
= m
.groups() 
1822                     (?P<days>[0-9]+)\s*d(?:ays?)?\s* 
1825                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 
1828                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 
1831                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 
1834             days
, hours
, mins
, secs
, ms 
= m
.groups() 
1836             m 
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
) 
1838                 hours
, mins 
= m
.groups() 
1844         duration 
+= float(secs
) 
1846         duration 
+= float(mins
) * 60 
1848         duration 
+= float(hours
) * 60 * 60 
1850         duration 
+= float(days
) * 24 * 60 * 60 
1852         duration 
+= float(ms
) 
1856 def prepend_extension(filename
, ext
, expected_real_ext
=None): 
1857     name
, real_ext 
= os
.path
.splitext(filename
) 
1859         '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1860         if not expected_real_ext 
or real_ext
[1:] == expected_real_ext
 
1861         else '{0}.{1}'.format(filename
, ext
)) 
1864 def replace_extension(filename
, ext
, expected_real_ext
=None): 
1865     name
, real_ext 
= os
.path
.splitext(filename
) 
1866     return '{0}.{1}'.format( 
1867         name 
if not expected_real_ext 
or real_ext
[1:] == expected_real_ext 
else filename
, 
1871 def check_executable(exe
, args
=[]): 
1872     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1873     args can be a list of arguments for a short output (like -version) """ 
1875         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1881 def get_exe_version(exe
, args
=['--version'], 
1882                     version_re
=None, unrecognized
='present'): 
1883     """ Returns the version of the specified executable, 
1884     or False if the executable is not present """ 
1886         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers 
1887         # SIGTTOU if youtube-dl is run in the background. 
1888         # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656 
1889         out
, _ 
= subprocess
.Popen( 
1890             [encodeArgument(exe
)] + args
, 
1891             stdin
=subprocess
.PIPE
, 
1892             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1895     if isinstance(out
, bytes):  # Python 2.x 
1896         out 
= out
.decode('ascii', 'ignore') 
1897     return detect_exe_version(out
, version_re
, unrecognized
) 
1900 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
1901     assert isinstance(output
, compat_str
) 
1902     if version_re 
is None: 
1903         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
1904     m 
= re
.search(version_re
, output
) 
1911 class PagedList(object): 
1913         # This is only useful for tests 
1914         return len(self
.getslice()) 
1917 class OnDemandPagedList(PagedList
): 
1918     def __init__(self
, pagefunc
, pagesize
, use_cache
=False): 
1919         self
._pagefunc 
= pagefunc
 
1920         self
._pagesize 
= pagesize
 
1921         self
._use
_cache 
= use_cache
 
1925     def getslice(self
, start
=0, end
=None): 
1927         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1928             firstid 
= pagenum 
* self
._pagesize
 
1929             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
1930             if start 
>= nextfirstid
: 
1935                 page_results 
= self
._cache
.get(pagenum
) 
1936             if page_results 
is None: 
1937                 page_results 
= list(self
._pagefunc
(pagenum
)) 
1939                 self
._cache
[pagenum
] = page_results
 
1942                 start 
% self
._pagesize
 
1943                 if firstid 
<= start 
< nextfirstid
 
1947                 ((end 
- 1) % self
._pagesize
) + 1 
1948                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
1951             if startv 
!= 0 or endv 
is not None: 
1952                 page_results 
= page_results
[startv
:endv
] 
1953             res
.extend(page_results
) 
1955             # A little optimization - if current page is not "full", ie. does 
1956             # not contain page_size videos then we can assume that this page 
1957             # is the last one - there are no more ids on further pages - 
1958             # i.e. no need to query again. 
1959             if len(page_results
) + startv 
< self
._pagesize
: 
1962             # If we got the whole page, but the next page is not interesting, 
1963             # break out early as well 
1964             if end 
== nextfirstid
: 
1969 class InAdvancePagedList(PagedList
): 
1970     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
1971         self
._pagefunc 
= pagefunc
 
1972         self
._pagecount 
= pagecount
 
1973         self
._pagesize 
= pagesize
 
1975     def getslice(self
, start
=0, end
=None): 
1977         start_page 
= start 
// self
._pagesize
 
1979             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
1980         skip_elems 
= start 
- start_page 
* self
._pagesize
 
1981         only_more 
= None if end 
is None else end 
- start
 
1982         for pagenum 
in range(start_page
, end_page
): 
1983             page 
= list(self
._pagefunc
(pagenum
)) 
1985                 page 
= page
[skip_elems
:] 
1987             if only_more 
is not None: 
1988                 if len(page
) < only_more
: 
1989                     only_more 
-= len(page
) 
1991                     page 
= page
[:only_more
] 
1998 def uppercase_escape(s
): 
1999     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2001         r
'\\U[0-9a-fA-F]{8}', 
2002         lambda m
: unicode_escape(m
.group(0))[0], 
2006 def lowercase_escape(s
): 
2007     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
2009         r
'\\u[0-9a-fA-F]{4}', 
2010         lambda m
: unicode_escape(m
.group(0))[0], 
2014 def escape_rfc3986(s
): 
2015     """Escape non-ASCII characters as suggested by RFC 3986""" 
2016     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
2017         s 
= s
.encode('utf-8') 
2018     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
2021 def escape_url(url
): 
2022     """Escape URL as suggested by RFC 3986""" 
2023     url_parsed 
= compat_urllib_parse_urlparse(url
) 
2024     return url_parsed
._replace
( 
2025         netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'), 
2026         path
=escape_rfc3986(url_parsed
.path
), 
2027         params
=escape_rfc3986(url_parsed
.params
), 
2028         query
=escape_rfc3986(url_parsed
.query
), 
2029         fragment
=escape_rfc3986(url_parsed
.fragment
) 
2033 def read_batch_urls(batch_fd
): 
2035         if not isinstance(url
, compat_str
): 
2036             url 
= url
.decode('utf-8', 'replace') 
2037         BOM_UTF8 
= '\xef\xbb\xbf' 
2038         if url
.startswith(BOM_UTF8
): 
2039             url 
= url
[len(BOM_UTF8
):] 
2041         if url
.startswith(('#', ';', ']')): 
2045     with contextlib
.closing(batch_fd
) as fd
: 
2046         return [url 
for url 
in map(fixup
, fd
) if url
] 
2049 def urlencode_postdata(*args
, **kargs
): 
2050     return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii') 
2053 def update_url_query(url
, query
): 
2056     parsed_url 
= compat_urlparse
.urlparse(url
) 
2057     qs 
= compat_parse_qs(parsed_url
.query
) 
2059     return compat_urlparse
.urlunparse(parsed_url
._replace
( 
2060         query
=compat_urllib_parse_urlencode(qs
, True))) 
2063 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}): 
2064     req_headers 
= req
.headers
.copy() 
2065     req_headers
.update(headers
) 
2066     req_data 
= data 
or req
.data
 
2067     req_url 
= update_url_query(url 
or req
.get_full_url(), query
) 
2068     req_get_method 
= req
.get_method() 
2069     if req_get_method 
== 'HEAD': 
2070         req_type 
= HEADRequest
 
2071     elif req_get_method 
== 'PUT': 
2072         req_type 
= PUTRequest
 
2074         req_type 
= compat_urllib_request
.Request
 
2076         req_url
, data
=req_data
, headers
=req_headers
, 
2077         origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
) 
2078     if hasattr(req
, 'timeout'): 
2079         new_req
.timeout 
= req
.timeout
 
2083 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True): 
2084     if isinstance(key_or_keys
, (list, tuple)): 
2085         for key 
in key_or_keys
: 
2086             if key 
not in d 
or d
[key
] is None or skip_false_values 
and not d
[key
]: 
2090     return d
.get(key_or_keys
, default
) 
2093 def try_get(src
, getter
, expected_type
=None): 
2096     except (AttributeError, KeyError, TypeError, IndexError): 
2099         if expected_type 
is None or isinstance(v
, expected_type
): 
2103 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'): 
2104     return string 
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
) 
2116 TV_PARENTAL_GUIDELINES 
= { 
2126 def parse_age_limit(s
): 
2128         return s 
if 0 <= s 
<= 21 else None 
2129     if not isinstance(s
, compat_basestring
): 
2131     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
2133         return int(m
.group('age')) 
2135         return US_RATINGS
[s
] 
2136     return TV_PARENTAL_GUIDELINES
.get(s
) 
2139 def strip_jsonp(code
): 
2141         r
'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
) 
2144 def js_to_json(code
): 
2145     COMMENT_RE 
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*' 
2146     SKIP_RE 
= r
'\s*(?:{comment})?\s*'.format(comment
=COMMENT_RE
) 
2148         (r
'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip
=SKIP_RE
), 16), 
2149         (r
'(?s)^(0+[0-7]+){skip}:?$'.format(skip
=SKIP_RE
), 8), 
2154         if v 
in ('true', 'false', 'null'): 
2156         elif v
.startswith('/*') or v
.startswith('//') or v 
== ',': 
2159         if v
[0] in ("'", '"'): 
2160             v 
= re
.sub(r
'(?s)\\.|"', lambda m
: { 
2165             }.get(m
.group(0), m
.group(0)), v
[1:-1]) 
2167         for regex
, base 
in INTEGER_TABLE
: 
2168             im 
= re
.match(regex
, v
) 
2170                 i 
= int(im
.group(1), base
) 
2171                 return '"%d":' % i 
if v
.endswith(':') else '%d' % i
 
2175     return re
.sub(r
'''(?sx) 
2176         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 
2177         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 
2178         {comment}|,(?={skip}[\]}}])| 
2179         [a-zA-Z_][.a-zA-Z_0-9]*| 
2180         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| 
2182         '''.format(comment
=COMMENT_RE
, skip
=SKIP_RE
), fix_kv
, code
) 
2185 def qualities(quality_ids
): 
2186     """ Get a numeric quality value out of a list of possible values """ 
2189             return quality_ids
.index(qid
) 
2195 DEFAULT_OUTTMPL 
= '%(title)s-%(id)s.%(ext)s' 
2198 def limit_length(s
, length
): 
2199     """ Add ellipses to overly long strings """ 
2204         return s
[:length 
- len(ELLIPSES
)] + ELLIPSES
 
2208 def version_tuple(v
): 
2209     return tuple(int(e
) for e 
in re
.split(r
'[-.]', v
)) 
2212 def is_outdated_version(version
, limit
, assume_new
=True): 
2214         return not assume_new
 
2216         return version_tuple(version
) < version_tuple(limit
) 
2218         return not assume_new
 
2221 def ytdl_is_updateable(): 
2222     """ Returns if youtube-dl can be updated with -U """ 
2223     from zipimport 
import zipimporter
 
2225     return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen') 
2228 def args_to_str(args
): 
2229     # Get a short string representation for a subprocess command 
2230     return ' '.join(compat_shlex_quote(a
) for a 
in args
) 
2233 def error_to_compat_str(err
): 
2235     # On python 2 error byte string must be decoded with proper 
2236     # encoding rather than ascii 
2237     if sys
.version_info
[0] < 3: 
2238         err_str 
= err_str
.decode(preferredencoding()) 
2242 def mimetype2ext(mt
): 
2248         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 
2249         # it's the most popular one 
2250         'audio/mpeg': 'mp3', 
2255     _
, _
, res 
= mt
.rpartition('/') 
2256     res 
= res
.split(';')[0].strip().lower() 
2260         'smptett+xml': 'tt', 
2266         'x-mp4-fragmented': 'mp4', 
2269         'x-mpegurl': 'm3u8', 
2270         'vnd.apple.mpegurl': 'm3u8', 
2275         'vnd.ms-sstr+xml': 'ism', 
2280 def parse_codecs(codecs_str
): 
2281     # http://tools.ietf.org/html/rfc6381 
2284     splited_codecs 
= list(filter(None, map( 
2285         lambda str: str.strip(), codecs_str
.strip().strip(',').split(',')))) 
2286     vcodec
, acodec 
= None, None 
2287     for full_codec 
in splited_codecs
: 
2288         codec 
= full_codec
.split('.')[0] 
2289         if codec 
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): 
2292         elif codec 
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'): 
2296             write_string('WARNING: Unknown codec %s' % full_codec
, sys
.stderr
) 
2297     if not vcodec 
and not acodec
: 
2298         if len(splited_codecs
) == 2: 
2303         elif len(splited_codecs
) == 1: 
2310             'vcodec': vcodec 
or 'none', 
2311             'acodec': acodec 
or 'none', 
2316 def urlhandle_detect_ext(url_handle
): 
2317     getheader 
= url_handle
.headers
.get
 
2319     cd 
= getheader('Content-Disposition') 
2321         m 
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
) 
2323             e 
= determine_ext(m
.group('filename'), default_ext
=None) 
2327     return mimetype2ext(getheader('Content-Type')) 
2330 def encode_data_uri(data
, mime_type
): 
2331     return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii')) 
2334 def age_restricted(content_limit
, age_limit
): 
2335     """ Returns True iff the content should be blocked """ 
2337     if age_limit 
is None:  # No limit set 
2339     if content_limit 
is None: 
2340         return False  # Content available for everyone 
2341     return age_limit 
< content_limit
 
2344 def is_html(first_bytes
): 
2345     """ Detect whether a file contains HTML by examining its first bytes. """ 
2348         (b
'\xef\xbb\xbf', 'utf-8'), 
2349         (b
'\x00\x00\xfe\xff', 'utf-32-be'), 
2350         (b
'\xff\xfe\x00\x00', 'utf-32-le'), 
2351         (b
'\xff\xfe', 'utf-16-le'), 
2352         (b
'\xfe\xff', 'utf-16-be'), 
2354     for bom
, enc 
in BOMS
: 
2355         if first_bytes
.startswith(bom
): 
2356             s 
= first_bytes
[len(bom
):].decode(enc
, 'replace') 
2359         s 
= first_bytes
.decode('utf-8', 'replace') 
2361     return re
.match(r
'^\s*<', s
) 
2364 def determine_protocol(info_dict
): 
2365     protocol 
= info_dict
.get('protocol') 
2366     if protocol 
is not None: 
2369     url 
= info_dict
['url'] 
2370     if url
.startswith('rtmp'): 
2372     elif url
.startswith('mms'): 
2374     elif url
.startswith('rtsp'): 
2377     ext 
= determine_ext(url
) 
2383     return compat_urllib_parse_urlparse(url
).scheme
 
2386 def render_table(header_row
, data
): 
2387     """ Render a list of rows, each as a list of values """ 
2388     table 
= [header_row
] + data
 
2389     max_lens 
= [max(len(compat_str(v
)) for v 
in col
) for col 
in zip(*table
)] 
2390     format_str 
= ' '.join('%-' + compat_str(ml 
+ 1) + 's' for ml 
in max_lens
[:-1]) + '%s' 
2391     return '\n'.join(format_str 
% tuple(row
) for row 
in table
) 
2394 def _match_one(filter_part
, dct
): 
2395     COMPARISON_OPERATORS 
= { 
2403     operator_rex 
= re
.compile(r
'''(?x)\s* 
2405         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
2407             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
2408             (?P<quote>["\'])(?P
<quotedstrval
>(?
:\\.|
(?
!(?P
=quote
)|
\\).)+?
)(?P
=quote
)|
 
2409             (?P
<strval
>(?
![0-9.])[a
-z0
-9A
-Z
]*) 
2412         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) 
2413     m = operator_rex.search(filter_part) 
2415         op = COMPARISON_OPERATORS[m.group('op')] 
2416         actual_value = dct.get(m.group('key')) 
2417         if (m.group('quotedstrval') is not None or 
2418             m.group('strval') is not None or 
2419             # If the original field is a string and matching comparisonvalue is 
2420             # a number we should respect the origin of the original field 
2421             # and process comparison value as a string (see 
2422             # https://github.com/rg3/youtube-dl/issues/11082). 
2423             actual_value is not None and m.group('intval') is not None and 
2424                 isinstance(actual_value, compat_str)): 
2425             if m.group('op') not in ('=', '!='): 
2427                     'Operator %s does not support string values!' % m.group('op')) 
2428             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') 
2429             quote = m.group('quote') 
2430             if quote is not None: 
2431                 comparison_value = comparison_value.replace(r'\%s' % quote, quote) 
2434                 comparison_value = int(m.group('intval')) 
2436                 comparison_value = parse_filesize(m.group('intval')) 
2437                 if comparison_value is None: 
2438                     comparison_value = parse_filesize(m.group('intval') + 'B') 
2439                 if comparison_value is None: 
2441                         'Invalid integer value %r in filter part %r' % ( 
2442                             m.group('intval'), filter_part)) 
2443         if actual_value is None: 
2444             return m.group('none_inclusive') 
2445         return op(actual_value, comparison_value) 
2448         '': lambda v: v is not None, 
2449         '!': lambda v: v is None, 
2451     operator_rex = re.compile(r'''(?x
)\s
* 
2452         (?P
<op
>%s)\s
*(?P
<key
>[a
-z_
]+) 
2454         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) 
2455     m = operator_rex.search(filter_part) 
2457         op = UNARY_OPERATORS[m.group('op')] 
2458         actual_value = dct.get(m.group('key')) 
2459         return op(actual_value) 
2461     raise ValueError('Invalid filter part %r' % filter_part) 
2464 def match_str(filter_str, dct): 
2465     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
2468         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) 
2471 def match_filter_func(filter_str): 
2472     def _match_func(info_dict): 
2473         if match_str(filter_str, info_dict): 
2476             video_title = info_dict.get('title', info_dict.get('id', 'video')) 
2477             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) 
2481 def parse_dfxp_time_expr(time_expr): 
2485     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) 
2487         return float(mobj.group('time_offset')) 
2489     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr) 
2491         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.')) 
2494 def srt_subtitles_timecode(seconds): 
2495     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) 
2498 def dfxp2srt(dfxp_data): 
2499     _x = functools.partial(xpath_with_ns, ns_map={ 
2500         'ttml': 'http://www.w3.org/ns/ttml', 
2501         'ttaf1': 'http://www.w3.org/2006/10/ttaf1', 
2502         'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1', 
2505     class TTMLPElementParser(object): 
2508         def start(self, tag, attrib): 
2509             if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): 
2515         def data(self, data): 
2519             return self.out.strip() 
2521     def parse_node(node): 
2522         target = TTMLPElementParser() 
2523         parser = xml.etree.ElementTree.XMLParser(target=target) 
2524         parser.feed(xml.etree.ElementTree.tostring(node)) 
2525         return parser.close() 
2527     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) 
2529     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p') 
2532         raise ValueError('Invalid dfxp/TTML subtitle') 
2534     for para, index in zip(paras, itertools.count(1)): 
2535         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) 
2536         end_time = parse_dfxp_time_expr(para.attrib.get('end')) 
2537         dur = parse_dfxp_time_expr(para.attrib.get('dur')) 
2538         if begin_time is None: 
2543             end_time = begin_time + dur 
2544         out.append('%d\n%s --> %s\n%s\n\n' % ( 
2546             srt_subtitles_timecode(begin_time), 
2547             srt_subtitles_timecode(end_time), 
2553 def cli_option(params, command_option, param): 
2554     param = params.get(param) 
2556         param = compat_str(param) 
2557     return [command_option, param] if param is not None else [] 
2560 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): 
2561     param = params.get(param) 
2562     assert isinstance(param, bool) 
2564         return [command_option + separator + (true_value if param else false_value)] 
2565     return [command_option, true_value if param else false_value] 
2568 def cli_valueless_option(params, command_option, param, expected_value=True): 
2569     param = params.get(param) 
2570     return [command_option] if param == expected_value else [] 
2573 def cli_configuration_args(params, param, default=[]): 
2574     ex_args = params.get(param) 
2577     assert isinstance(ex_args, list) 
2581 class ISO639Utils(object): 
2582     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 
2771     def short2long(cls, code): 
2772         """Convert language code from ISO 639-1 to ISO 639-2/T""" 
2773         return cls._lang_map.get(code[:2]) 
2776     def long2short(cls, code): 
2777         """Convert language code from ISO 639-2/T to ISO 639-1""" 
2778         for short_name, long_name in cls._lang_map.items(): 
2779             if long_name == code: 
2783 class ISO3166Utils(object): 
2784     # From http://data.okfn.org/data/core/country-list 
2786         'AF': 'Afghanistan', 
2787         'AX': 'Åland Islands', 
2790         'AS': 'American Samoa', 
2795         'AG': 'Antigua and Barbuda', 
2812         'BO': 'Bolivia, Plurinational State of', 
2813         'BQ': 'Bonaire, Sint Eustatius and Saba', 
2814         'BA': 'Bosnia and Herzegovina', 
2816         'BV': 'Bouvet Island', 
2818         'IO': 'British Indian Ocean Territory', 
2819         'BN': 'Brunei Darussalam', 
2821         'BF': 'Burkina Faso', 
2827         'KY': 'Cayman Islands', 
2828         'CF': 'Central African Republic', 
2832         'CX': 'Christmas Island', 
2833         'CC': 'Cocos (Keeling) Islands', 
2837         'CD': 'Congo, the Democratic Republic of the', 
2838         'CK': 'Cook Islands', 
2840         'CI': 'Côte d\'Ivoire', 
2845         'CZ': 'Czech Republic', 
2849         'DO': 'Dominican Republic', 
2852         'SV': 'El Salvador', 
2853         'GQ': 'Equatorial Guinea', 
2857         'FK': 'Falkland Islands (Malvinas)', 
2858         'FO': 'Faroe Islands', 
2862         'GF': 'French Guiana', 
2863         'PF': 'French Polynesia', 
2864         'TF': 'French Southern Territories', 
2879         'GW': 'Guinea-Bissau', 
2882         'HM': 'Heard Island and McDonald Islands', 
2883         'VA': 'Holy See (Vatican City State)', 
2890         'IR': 'Iran, Islamic Republic of', 
2893         'IM': 'Isle of Man', 
2903         'KP': 'Korea, Democratic People\'s Republic of', 
2904         'KR': 'Korea, Republic of', 
2907         'LA': 'Lao People\'s Democratic Republic', 
2913         'LI': 'Liechtenstein', 
2917         'MK': 'Macedonia, the Former Yugoslav Republic of', 
2924         'MH': 'Marshall Islands', 
2930         'FM': 'Micronesia, Federated States of', 
2931         'MD': 'Moldova, Republic of', 
2942         'NL': 'Netherlands', 
2943         'NC': 'New Caledonia', 
2944         'NZ': 'New Zealand', 
2949         'NF': 'Norfolk Island', 
2950         'MP': 'Northern Mariana Islands', 
2955         'PS': 'Palestine, State of', 
2957         'PG': 'Papua New Guinea', 
2960         'PH': 'Philippines', 
2964         'PR': 'Puerto Rico', 
2968         'RU': 'Russian Federation', 
2970         'BL': 'Saint Barthélemy', 
2971         'SH': 'Saint Helena, Ascension and Tristan da Cunha', 
2972         'KN': 'Saint Kitts and Nevis', 
2973         'LC': 'Saint Lucia', 
2974         'MF': 'Saint Martin (French part)', 
2975         'PM': 'Saint Pierre and Miquelon', 
2976         'VC': 'Saint Vincent and the Grenadines', 
2979         'ST': 'Sao Tome and Principe', 
2980         'SA': 'Saudi Arabia', 
2984         'SL': 'Sierra Leone', 
2986         'SX': 'Sint Maarten (Dutch part)', 
2989         'SB': 'Solomon Islands', 
2991         'ZA': 'South Africa', 
2992         'GS': 'South Georgia and the South Sandwich Islands', 
2993         'SS': 'South Sudan', 
2998         'SJ': 'Svalbard and Jan Mayen', 
3001         'CH': 'Switzerland', 
3002         'SY': 'Syrian Arab Republic', 
3003         'TW': 'Taiwan, Province of China', 
3005         'TZ': 'Tanzania, United Republic of', 
3007         'TL': 'Timor-Leste', 
3011         'TT': 'Trinidad and Tobago', 
3014         'TM': 'Turkmenistan', 
3015         'TC': 'Turks and Caicos Islands', 
3019         'AE': 'United Arab Emirates', 
3020         'GB': 'United Kingdom', 
3021         'US': 'United States', 
3022         'UM': 'United States Minor Outlying Islands', 
3026         'VE': 'Venezuela, Bolivarian Republic of', 
3028         'VG': 'Virgin Islands, British', 
3029         'VI': 'Virgin Islands, U.S.', 
3030         'WF': 'Wallis and Futuna', 
3031         'EH': 'Western Sahara', 
3038     def short2full(cls, code): 
3039         """Convert an ISO 3166-2 country code to the corresponding full name""" 
3040         return cls._country_map.get(code.upper()) 
3043 class GeoUtils(object): 
3044     # Major IPv4 address blocks per country 
3046         'AD': '85.94.160.0/19', 
3047         'AE': '94.200.0.0/13', 
3048         'AF': '149.54.0.0/17', 
3049         'AG': '209.59.64.0/18', 
3050         'AI': '204.14.248.0/21', 
3051         'AL': '46.99.0.0/16', 
3052         'AM': '46.70.0.0/15', 
3053         'AO': '105.168.0.0/13', 
3054         'AP': '159.117.192.0/21', 
3055         'AR': '181.0.0.0/12', 
3056         'AS': '202.70.112.0/20', 
3057         'AT': '84.112.0.0/13', 
3058         'AU': '1.128.0.0/11', 
3059         'AW': '181.41.0.0/18', 
3060         'AZ': '5.191.0.0/16', 
3061         'BA': '31.176.128.0/17', 
3062         'BB': '65.48.128.0/17', 
3063         'BD': '114.130.0.0/16', 
3065         'BF': '129.45.128.0/17', 
3066         'BG': '95.42.0.0/15', 
3067         'BH': '37.131.0.0/17', 
3068         'BI': '154.117.192.0/18', 
3069         'BJ': '137.255.0.0/16', 
3070         'BL': '192.131.134.0/24', 
3071         'BM': '196.12.64.0/18', 
3072         'BN': '156.31.0.0/16', 
3073         'BO': '161.56.0.0/16', 
3074         'BQ': '161.0.80.0/20', 
3075         'BR': '152.240.0.0/12', 
3076         'BS': '24.51.64.0/18', 
3077         'BT': '119.2.96.0/19', 
3078         'BW': '168.167.0.0/16', 
3079         'BY': '178.120.0.0/13', 
3080         'BZ': '179.42.192.0/18', 
3081         'CA': '99.224.0.0/11', 
3082         'CD': '41.243.0.0/16', 
3083         'CF': '196.32.200.0/21', 
3084         'CG': '197.214.128.0/17', 
3085         'CH': '85.0.0.0/13', 
3086         'CI': '154.232.0.0/14', 
3087         'CK': '202.65.32.0/19', 
3088         'CL': '152.172.0.0/14', 
3089         'CM': '165.210.0.0/15', 
3090         'CN': '36.128.0.0/10', 
3091         'CO': '181.240.0.0/12', 
3092         'CR': '201.192.0.0/12', 
3093         'CU': '152.206.0.0/15', 
3094         'CV': '165.90.96.0/19', 
3095         'CW': '190.88.128.0/17', 
3096         'CY': '46.198.0.0/15', 
3097         'CZ': '88.100.0.0/14', 
3099         'DJ': '197.241.0.0/17', 
3100         'DK': '87.48.0.0/12', 
3101         'DM': '192.243.48.0/20', 
3102         'DO': '152.166.0.0/15', 
3103         'DZ': '41.96.0.0/12', 
3104         'EC': '186.68.0.0/15', 
3105         'EE': '90.190.0.0/15', 
3106         'EG': '156.160.0.0/11', 
3107         'ER': '196.200.96.0/20', 
3108         'ES': '88.0.0.0/11', 
3109         'ET': '196.188.0.0/14', 
3110         'EU': '2.16.0.0/13', 
3111         'FI': '91.152.0.0/13', 
3112         'FJ': '144.120.0.0/16', 
3113         'FM': '119.252.112.0/20', 
3114         'FO': '88.85.32.0/19', 
3116         'GA': '41.158.0.0/15', 
3118         'GD': '74.122.88.0/21', 
3119         'GE': '31.146.0.0/16', 
3120         'GF': '161.22.64.0/18', 
3121         'GG': '62.68.160.0/19', 
3122         'GH': '45.208.0.0/14', 
3123         'GI': '85.115.128.0/19', 
3124         'GL': '88.83.0.0/19', 
3125         'GM': '160.182.0.0/15', 
3126         'GN': '197.149.192.0/18', 
3127         'GP': '104.250.0.0/19', 
3128         'GQ': '105.235.224.0/20', 
3129         'GR': '94.64.0.0/13', 
3130         'GT': '168.234.0.0/16', 
3131         'GU': '168.123.0.0/16', 
3132         'GW': '197.214.80.0/20', 
3133         'GY': '181.41.64.0/18', 
3134         'HK': '113.252.0.0/14', 
3135         'HN': '181.210.0.0/16', 
3136         'HR': '93.136.0.0/13', 
3137         'HT': '148.102.128.0/17', 
3138         'HU': '84.0.0.0/14', 
3139         'ID': '39.192.0.0/10', 
3140         'IE': '87.32.0.0/12', 
3141         'IL': '79.176.0.0/13', 
3142         'IM': '5.62.80.0/20', 
3143         'IN': '117.192.0.0/10', 
3144         'IO': '203.83.48.0/21', 
3145         'IQ': '37.236.0.0/14', 
3146         'IR': '2.176.0.0/12', 
3147         'IS': '82.221.0.0/16', 
3148         'IT': '79.0.0.0/10', 
3149         'JE': '87.244.64.0/18', 
3150         'JM': '72.27.0.0/17', 
3151         'JO': '176.29.0.0/16', 
3152         'JP': '126.0.0.0/8', 
3153         'KE': '105.48.0.0/12', 
3154         'KG': '158.181.128.0/17', 
3155         'KH': '36.37.128.0/17', 
3156         'KI': '103.25.140.0/22', 
3157         'KM': '197.255.224.0/20', 
3158         'KN': '198.32.32.0/19', 
3159         'KP': '175.45.176.0/22', 
3160         'KR': '175.192.0.0/10', 
3161         'KW': '37.36.0.0/14', 
3162         'KY': '64.96.0.0/15', 
3163         'KZ': '2.72.0.0/13', 
3164         'LA': '115.84.64.0/18', 
3165         'LB': '178.135.0.0/16', 
3166         'LC': '192.147.231.0/24', 
3167         'LI': '82.117.0.0/19', 
3168         'LK': '112.134.0.0/15', 
3169         'LR': '41.86.0.0/19', 
3170         'LS': '129.232.0.0/17', 
3171         'LT': '78.56.0.0/13', 
3172         'LU': '188.42.0.0/16', 
3173         'LV': '46.109.0.0/16', 
3174         'LY': '41.252.0.0/14', 
3175         'MA': '105.128.0.0/11', 
3176         'MC': '88.209.64.0/18', 
3177         'MD': '37.246.0.0/16', 
3178         'ME': '178.175.0.0/17', 
3179         'MF': '74.112.232.0/21', 
3180         'MG': '154.126.0.0/17', 
3181         'MH': '117.103.88.0/21', 
3182         'MK': '77.28.0.0/15', 
3183         'ML': '154.118.128.0/18', 
3184         'MM': '37.111.0.0/17', 
3185         'MN': '49.0.128.0/17', 
3186         'MO': '60.246.0.0/16', 
3187         'MP': '202.88.64.0/20', 
3188         'MQ': '109.203.224.0/19', 
3189         'MR': '41.188.64.0/18', 
3190         'MS': '208.90.112.0/22', 
3191         'MT': '46.11.0.0/16', 
3192         'MU': '105.16.0.0/12', 
3193         'MV': '27.114.128.0/18', 
3194         'MW': '105.234.0.0/16', 
3195         'MX': '187.192.0.0/11', 
3196         'MY': '175.136.0.0/13', 
3197         'MZ': '197.218.0.0/15', 
3198         'NA': '41.182.0.0/16', 
3199         'NC': '101.101.0.0/18', 
3200         'NE': '197.214.0.0/18', 
3201         'NF': '203.17.240.0/22', 
3202         'NG': '105.112.0.0/12', 
3203         'NI': '186.76.0.0/15', 
3204         'NL': '145.96.0.0/11', 
3205         'NO': '84.208.0.0/13', 
3206         'NP': '36.252.0.0/15', 
3207         'NR': '203.98.224.0/19', 
3208         'NU': '49.156.48.0/22', 
3209         'NZ': '49.224.0.0/14', 
3210         'OM': '5.36.0.0/15', 
3211         'PA': '186.72.0.0/15', 
3212         'PE': '186.160.0.0/14', 
3213         'PF': '123.50.64.0/18', 
3214         'PG': '124.240.192.0/19', 
3215         'PH': '49.144.0.0/13', 
3216         'PK': '39.32.0.0/11', 
3217         'PL': '83.0.0.0/11', 
3218         'PM': '70.36.0.0/20', 
3219         'PR': '66.50.0.0/16', 
3220         'PS': '188.161.0.0/16', 
3221         'PT': '85.240.0.0/13', 
3222         'PW': '202.124.224.0/20', 
3223         'PY': '181.120.0.0/14', 
3224         'QA': '37.210.0.0/15', 
3225         'RE': '139.26.0.0/16', 
3226         'RO': '79.112.0.0/13', 
3227         'RS': '178.220.0.0/14', 
3228         'RU': '5.136.0.0/13', 
3229         'RW': '105.178.0.0/15', 
3230         'SA': '188.48.0.0/13', 
3231         'SB': '202.1.160.0/19', 
3232         'SC': '154.192.0.0/11', 
3233         'SD': '154.96.0.0/13', 
3234         'SE': '78.64.0.0/12', 
3235         'SG': '152.56.0.0/14', 
3236         'SI': '188.196.0.0/14', 
3237         'SK': '78.98.0.0/15', 
3238         'SL': '197.215.0.0/17', 
3239         'SM': '89.186.32.0/19', 
3240         'SN': '41.82.0.0/15', 
3241         'SO': '197.220.64.0/19', 
3242         'SR': '186.179.128.0/17', 
3243         'SS': '105.235.208.0/21', 
3244         'ST': '197.159.160.0/19', 
3245         'SV': '168.243.0.0/16', 
3246         'SX': '190.102.0.0/20', 
3248         'SZ': '41.84.224.0/19', 
3249         'TC': '65.255.48.0/20', 
3250         'TD': '154.68.128.0/19', 
3251         'TG': '196.168.0.0/14', 
3252         'TH': '171.96.0.0/13', 
3253         'TJ': '85.9.128.0/18', 
3254         'TK': '27.96.24.0/21', 
3255         'TL': '180.189.160.0/20', 
3256         'TM': '95.85.96.0/19', 
3257         'TN': '197.0.0.0/11', 
3258         'TO': '175.176.144.0/21', 
3259         'TR': '78.160.0.0/11', 
3260         'TT': '186.44.0.0/15', 
3261         'TV': '202.2.96.0/19', 
3262         'TW': '120.96.0.0/11', 
3263         'TZ': '156.156.0.0/14', 
3264         'UA': '93.72.0.0/13', 
3265         'UG': '154.224.0.0/13', 
3267         'UY': '167.56.0.0/13', 
3268         'UZ': '82.215.64.0/18', 
3269         'VA': '212.77.0.0/19', 
3270         'VC': '24.92.144.0/20', 
3271         'VE': '186.88.0.0/13', 
3272         'VG': '172.103.64.0/18', 
3273         'VI': '146.226.0.0/16', 
3274         'VN': '14.160.0.0/11', 
3275         'VU': '202.80.32.0/20', 
3276         'WF': '117.20.32.0/21', 
3277         'WS': '202.4.32.0/19', 
3278         'YE': '134.35.0.0/16', 
3279         'YT': '41.242.116.0/22', 
3280         'ZA': '41.0.0.0/11', 
3281         'ZM': '165.56.0.0/13', 
3282         'ZW': '41.85.192.0/19', 
3286     def random_ipv4(cls, code): 
3287         block = cls._country_ip_map.get(code.upper()) 
3290         addr, preflen = block.split('/') 
3291         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] 
3292         addr_max = addr_min | (0xffffffff >> int(preflen)) 
3293         return compat_str(socket.inet_ntoa( 
3294             compat_struct_pack('!L', random.randint(addr_min, addr_max)))) 
3297 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): 
3298     def __init__(self, proxies=None): 
3299         # Set default handlers 
3300         for type in ('http', 'https'): 
3301             setattr(self, '%s_open' % type, 
3302                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: 
3303                         meth(r, proxy, type)) 
3304         return compat_urllib_request.ProxyHandler.__init__(self, proxies) 
3306     def proxy_open(self, req, proxy, type): 
3307         req_proxy = req.headers.get('Ytdl-request-proxy') 
3308         if req_proxy is not None: 
3310             del req.headers['Ytdl-request-proxy'] 
3312         if proxy == '__noproxy__': 
3313             return None  # No Proxy 
3314         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 
3315             req.add_header('Ytdl-socks-proxy', proxy) 
3316             # youtube-dl's http/https handlers do wrapping the socket with socks 
3318         return compat_urllib_request.ProxyHandler.proxy_open( 
3319             self, req, proxy, type) 
3322 def ohdave_rsa_encrypt(data, exponent, modulus): 
3324     Implement OHDave
's RSA algorithm. See http://www.ohdave.com/rsa/ 
3327         data: data to encrypt, bytes-like object 
3328         exponent, modulus: parameter e and N of RSA algorithm, both integer 
3329     Output: hex string of encrypted data 
3331     Limitation: supports one block encryption only 
3334     payload = int(binascii.hexlify(data[::-1]), 16) 
3335     encrypted = pow(payload, exponent, modulus) 
3336     return '%x' % encrypted 
3339 def encode_base_n(num, n, table=None): 
3340     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
' 
3342         table = FULL_TABLE[:n] 
3345         raise ValueError('base 
%d exceeds table length 
%d' % (n, len(table))) 
3352         ret = table[num % n] + ret 
3357 def decode_packed_codes(code): 
3358     mobj = re.search(PACKED_CODES_RE, code) 
3359     obfucasted_code, base, count, symbols = mobj.groups() 
3362     symbols = symbols.split('|
') 
3367         base_n_count = encode_base_n(count, base) 
3368         symbol_table[base_n_count] = symbols[count] or base_n_count 
3371         r'\b(\w
+)\b', lambda mobj: symbol_table[mobj.group(0)], 
3375 def parse_m3u8_attributes(attrib): 
3377     for (key, val) in re.findall(r'(?P
<key
>[A
-Z0
-9-]+)=(?P
<val
>"[^"]+"|[^",]+)(?
:,|$
)', attrib): 
3378         if val.startswith('"'): 
3384 def urshift(val, n): 
3385     return val >> n if val >= 0 else (val + 0x100000000) >> n 
3388 # Based on png2str() written by @gdkchan and improved by @yokrysty 
3389 # Originally posted at https://github.com/rg3/youtube-dl/issues/9706 
3390 def decode_png(png_data): 
3391     # Reference: https://www.w3.org/TR/PNG/ 
3392     header = png_data[8:] 
3394     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR': 
3395         raise IOError('Not a valid PNG file.') 
3397     int_map = {1: '>B', 2: '>H', 4: '>I'} 
3398     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0] 
3403         length = unpack_integer(header[:4]) 
3406         chunk_type = header[:4] 
3409         chunk_data = header[:length] 
3410         header = header[length:] 
3412         header = header[4:]  # Skip CRC 
3420     ihdr = chunks[0]['data'] 
3422     width = unpack_integer(ihdr[:4]) 
3423     height = unpack_integer(ihdr[4:8]) 
3427     for chunk in chunks: 
3428         if chunk['type'] == b'IDAT': 
3429             idat += chunk['data'] 
3432         raise IOError('Unable to read PNG data.') 
3434     decompressed_data = bytearray(zlib.decompress(idat)) 
3439     def _get_pixel(idx): 
3444     for y in range(height): 
3445         basePos = y * (1 + stride) 
3446         filter_type = decompressed_data[basePos] 
3450         pixels.append(current_row) 
3452         for x in range(stride): 
3453             color = decompressed_data[1 + basePos + x] 
3454             basex = y * stride + x 
3459                 left = _get_pixel(basex - 3) 
3461                 up = _get_pixel(basex - stride) 
3463             if filter_type == 1:  # Sub 
3464                 color = (color + left) & 0xff 
3465             elif filter_type == 2:  # Up 
3466                 color = (color + up) & 0xff 
3467             elif filter_type == 3:  # Average 
3468                 color = (color + ((left + up) >> 1)) & 0xff 
3469             elif filter_type == 4:  # Paeth 
3475                     c = _get_pixel(basex - stride - 3) 
3483                 if pa <= pb and pa <= pc: 
3484                     color = (color + a) & 0xff 
3486                     color = (color + b) & 0xff 
3488                     color = (color + c) & 0xff 
3490             current_row.append(color) 
3492     return width, height, pixels 
3495 def write_xattr(path, key, value): 
3496     # This mess below finds the best xattr tool for the job 
3498         # try the pyxattr module... 
3501         if hasattr(xattr, 'set'):  # pyxattr 
3502             # Unicode arguments are not supported in python-pyxattr until 
3504             # See https://github.com/rg3/youtube-dl/issues/5498 
3505             pyxattr_required_version = '0.5.0' 
3506             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): 
3507                 # TODO: fallback to CLI tools 
3508                 raise XAttrUnavailableError( 
3509                     'python-pyxattr is detected but is too old. ' 
3510                     'youtube-dl requires %s or above while your version is %s. ' 
3511                     'Falling back to other xattr implementations' % ( 
3512                         pyxattr_required_version, xattr.__version__)) 
3514             setxattr = xattr.set 
3516             setxattr = xattr.setxattr 
3519             setxattr(path, key, value) 
3520         except EnvironmentError as e: 
3521             raise XAttrMetadataError(e.errno, e.strerror) 
3524         if compat_os_name == 'nt': 
3525             # Write xattrs to NTFS Alternate Data Streams: 
3526             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 
3527             assert ':' not in key 
3528             assert os.path.exists(path) 
3530             ads_fn = path + ':' + key 
3532                 with open(ads_fn, 'wb') as f: 
3534             except EnvironmentError as e: 
3535                 raise XAttrMetadataError(e.errno, e.strerror) 
3537             user_has_setfattr = check_executable('setfattr', ['--version']) 
3538             user_has_xattr = check_executable('xattr', ['-h']) 
3540             if user_has_setfattr or user_has_xattr: 
3542                 value = value.decode('utf-8') 
3543                 if user_has_setfattr: 
3544                     executable = 'setfattr' 
3545                     opts = ['-n', key, '-v', value] 
3546                 elif user_has_xattr: 
3547                     executable = 'xattr' 
3548                     opts = ['-w', key, value] 
3550                 cmd = ([encodeFilename(executable, True)] + 
3551                        [encodeArgument(o) for o in opts] + 
3552                        [encodeFilename(path, True)]) 
3555                     p = subprocess.Popen( 
3556                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) 
3557                 except EnvironmentError as e: 
3558                     raise XAttrMetadataError(e.errno, e.strerror) 
3559                 stdout, stderr = p.communicate() 
3560                 stderr = stderr.decode('utf-8', 'replace') 
3561                 if p.returncode != 0: 
3562                     raise XAttrMetadataError(p.returncode, stderr) 
3565                 # On Unix, and can't find pyxattr, setfattr, or xattr. 
3566                 if sys.platform.startswith('linux'): 
3567                     raise XAttrUnavailableError( 
3568                         "Couldn
't find a tool to set the xattrs. " 
3569                         "Install either the python 'pyxattr
' or 'xattr
' " 
3570                         "modules, or the GNU 'attr
' package " 
3571                         "(which contains the 'setfattr
' tool).") 
3573                     raise XAttrUnavailableError( 
3574                         "Couldn't find a tool to 
set the xattrs
. " 
3575                         "Install either the python 
'xattr' module
, " 
3576                         "or the 
'xattr' binary
.")