4 from __future__ 
import unicode_literals
 
  36 import xml
.etree
.ElementTree
 
  40     compat_HTMLParseError
, 
  45     compat_ctypes_WINFUNCTYPE
, 
  46     compat_etree_fromstring
, 
  49     compat_html_entities_html5
, 
  61     compat_urllib_parse_urlencode
, 
  62     compat_urllib_parse_urlparse
, 
  63     compat_urllib_parse_unquote_plus
, 
  64     compat_urllib_request
, 
  75 def register_socks_protocols(): 
  76     # "Register" SOCKS protocols 
  77     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 
  78     # URLs with protocols not in urlparse.uses_netloc are not handled correctly 
  79     for scheme 
in ('socks', 'socks4', 'socks4a', 'socks5'): 
  80         if scheme 
not in compat_urlparse
.uses_netloc
: 
  81             compat_urlparse
.uses_netloc
.append(scheme
) 
  84 # This is not clearly defined otherwise 
  85 compiled_regex_type 
= type(re
.compile('')) 
  88 def random_user_agent(): 
  89     _USER_AGENT_TPL 
= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' 
1668     return _USER_AGENT_TPL 
% random
.choice(_CHROME_VERSIONS
) 
1672     'User-Agent': random_user_agent(), 
1673     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
1674     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
1675     'Accept-Encoding': 'gzip, deflate', 
1676     'Accept-Language': 'en-us,en;q=0.5', 
1681     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', 
1685 NO_DEFAULT 
= object() 
1687 ENGLISH_MONTH_NAMES 
= [ 
1688     'January', 'February', 'March', 'April', 'May', 'June', 
1689     'July', 'August', 'September', 'October', 'November', 'December'] 
1692     'en': ENGLISH_MONTH_NAMES
, 
1694         'janvier', 'fƩvrier', 'mars', 'avril', 'mai', 'juin', 
1695         'juillet', 'aoƻt', 'septembre', 'octobre', 'novembre', 'dƩcembre'], 
1698 KNOWN_EXTENSIONS 
= ( 
1699     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 
1700     'flv', 'f4v', 'f4a', 'f4b', 
1701     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 
1702     'mkv', 'mka', 'mk3d', 
1705     'asf', 'wmv', 'wma', 
1711     'f4f', 'f4m', 'm3u8', 'smil') 
1713 # needed for sanitizing filenames in restricted mode 
1714 ACCENT_CHARS 
= dict(zip('ĆĆĆĆĆĆ
ĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆÅĆÅĆĆĆĆŰĆĆĆàÔâãäÄæçèéêëìĆîïðñòóÓõöÅĆøÅùúûüűýþÿ', 
1715                         itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'], 
1716                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'))) 
1739     '%Y/%m/%d %H:%M:%S', 
1741     '%Y-%m-%d %H:%M:%S', 
1742     '%Y-%m-%d %H:%M:%S.%f', 
1745     '%Y-%m-%dT%H:%M:%SZ', 
1746     '%Y-%m-%dT%H:%M:%S.%fZ', 
1747     '%Y-%m-%dT%H:%M:%S.%f0Z', 
1748     '%Y-%m-%dT%H:%M:%S', 
1749     '%Y-%m-%dT%H:%M:%S.%f', 
1751     '%b %d %Y at %H:%M', 
1752     '%b %d %Y at %H:%M:%S', 
1753     '%B %d %Y at %H:%M', 
1754     '%B %d %Y at %H:%M:%S', 
1757 DATE_FORMATS_DAY_FIRST 
= list(DATE_FORMATS
) 
1758 DATE_FORMATS_DAY_FIRST
.extend([ 
1764     '%d/%m/%Y %H:%M:%S', 
1767 DATE_FORMATS_MONTH_FIRST 
= list(DATE_FORMATS
) 
1768 DATE_FORMATS_MONTH_FIRST
.extend([ 
1773     '%m/%d/%Y %H:%M:%S', 
1776 PACKED_CODES_RE 
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" 
1777 JSON_LD_RE 
= r
'(?is)<script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>(?P
<json_ld
>.+?
)</script
>' 
1780 def preferredencoding(): 
1781     """Get preferred encoding. 
1783     Returns the best encoding scheme for the system, based on 
1784     locale.getpreferredencoding() and some further tweaks. 
1787         pref = locale.getpreferredencoding() 
1795 def write_json_file(obj, fn): 
1796     """ Encode obj as JSON and write it to fn, atomically if possible """ 
1798     fn = encodeFilename(fn) 
1799     if sys.version_info < (3, 0) and sys.platform != 'win32
': 
1800         encoding = get_filesystem_encoding() 
1801         # os.path.basename returns a bytes object, but NamedTemporaryFile 
1802         # will fail if the filename contains non ascii characters unless we 
1803         # use a unicode object 
1804         path_basename = lambda f: os.path.basename(fn).decode(encoding) 
1805         # the same for os.path.dirname 
1806         path_dirname = lambda f: os.path.dirname(fn).decode(encoding) 
1808         path_basename = os.path.basename 
1809         path_dirname = os.path.dirname 
1813         'prefix
': path_basename(fn) + '.', 
1814         'dir': path_dirname(fn), 
1818     # In Python 2.x, json.dump expects a bytestream. 
1819     # In Python 3.x, it writes to a character stream 
1820     if sys.version_info < (3, 0): 
1825             'encoding
': 'utf
-8', 
1828     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args)) 
1833         if sys.platform == 'win32
': 
1834             # Need to remove existing file on Windows, else os.rename raises 
1835             # WindowsError or FileExistsError. 
1840         os.rename(tf.name, fn) 
1849 if sys.version_info >= (2, 7): 
1850     def find_xpath_attr(node, xpath, key, val=None): 
1851         """ Find the xpath xpath[@key=val] """ 
1852         assert re.match(r'^
[a
-zA
-Z_
-]+$
', key) 
1853         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) 
1854         return node.find(expr) 
1856     def find_xpath_attr(node, xpath, key, val=None): 
1857         for f in node.findall(compat_xpath(xpath)): 
1858             if key not in f.attrib: 
1860             if val is None or f.attrib.get(key) == val: 
1864 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 
1865 # the namespace parameter 
1868 def xpath_with_ns(path
, ns_map
): 
1869     components 
= [c
.split(':') for c 
in path
.split('/')] 
1871     for c 
in components
: 
1873             replaced
.append(c
[0]) 
1876             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
1877     return '/'.join(replaced
) 
1880 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
1881     def _find_xpath(xpath
): 
1882         return node
.find(compat_xpath(xpath
)) 
1884     if isinstance(xpath
, (str, compat_str
)): 
1885         n 
= _find_xpath(xpath
) 
1893         if default 
is not NO_DEFAULT
: 
1896             name 
= xpath 
if name 
is None else name
 
1897             raise ExtractorError('Could not find XML element %s' % name
) 
1903 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
1904     n 
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
) 
1905     if n 
is None or n 
== default
: 
1908         if default 
is not NO_DEFAULT
: 
1911             name 
= xpath 
if name 
is None else name
 
1912             raise ExtractorError('Could not find XML element\'s text %s' % name
) 
1918 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
1919     n 
= find_xpath_attr(node
, xpath
, key
) 
1921         if default 
is not NO_DEFAULT
: 
1924             name 
= '%s[@%s]' % (xpath
, key
) if name 
is None else name
 
1925             raise ExtractorError('Could not find XML attribute %s' % name
) 
1928     return n
.attrib
[key
] 
1931 def get_element_by_id(id, html
): 
1932     """Return the content of the tag with the specified ID in the passed HTML document""" 
1933     return get_element_by_attribute('id', id, html
) 
1936 def get_element_by_class(class_name
, html
): 
1937     """Return the content of the first tag with the specified class in the passed HTML document""" 
1938     retval 
= get_elements_by_class(class_name
, html
) 
1939     return retval
[0] if retval 
else None 
1942 def get_element_by_attribute(attribute
, value
, html
, escape_value
=True): 
1943     retval 
= get_elements_by_attribute(attribute
, value
, html
, escape_value
) 
1944     return retval
[0] if retval 
else None 
1947 def get_elements_by_class(class_name
, html
): 
1948     """Return the content of all tags with the specified class in the passed HTML document as a list""" 
1949     return get_elements_by_attribute( 
1950         'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 
1951         html, escape_value=False) 
1954 def get_elements_by_attribute(attribute, value, html, escape_value=True): 
1955     """Return the content of the tag with the specified attribute in the passed HTML document""" 
1957     value = re.escape(value) if escape_value else value 
1960     for m in re.finditer(r'''(?xs) 
1962          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'|
))*?
 
1964          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'|))*? 
1968     ''' % (re.escape(attribute), value), html): 
1969         res = m.group('content
') 
1971         if res.startswith('"') or res.startswith("'"): 
1974         retlist.append(unescapeHTML(res)) 
1979 class HTMLAttributeParser(compat_HTMLParser): 
1980     """Trivial HTML parser to gather the attributes for a single element""" 
1983         compat_HTMLParser.__init__(self) 
1985     def handle_starttag(self, tag, attrs): 
1986         self.attrs = dict(attrs) 
1989 def extract_attributes(html_element): 
1990     """Given a string for an HTML element such as 
1992          a="foo" B="bar" c="&98;az" d=boz 
1993          empty= noval entity="&" 
1996     Decode and return a dictionary of attributes. 
1998         'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
', 
1999         'empty
': '', 'noval
': None, 'entity
': '&', 
2000         'sq
': '"', 'dq': '\'' 
2002     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 
2003     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 
2005     parser = HTMLAttributeParser() 
2007         parser.feed(html_element) 
2009     # Older Python may throw HTMLParseError in case of malformed HTML 
2010     except compat_HTMLParseError: 
2015 def clean_html(html): 
2016     """Clean an HTML snippet into a readable string""" 
2018     if html is None:  # Convenience for sanitizing descriptions etc. 
2022     html = html.replace('\n', ' ') 
2023     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) 
2024     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 
2026     html = re.sub('<.*?>', '', html) 
2027     # Replace html entities 
2028     html = unescapeHTML(html) 
2032 def sanitize_open(filename, open_mode): 
2033     """Try to open the given filename, and slightly tweak it if this fails. 
2035     Attempts to open the given filename. If this fails, it tries to change 
2036     the filename slightly, step by step, until it's either able to open it 
2037     or it fails and raises a final exception, like the standard open() 
2040     It returns the tuple (stream, definitive_file_name). 
2044             if sys.platform == 'win32': 
2046                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 
2047             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 
2048         stream = open(encodeFilename(filename), open_mode) 
2049         return (stream, filename) 
2050     except (IOError, OSError) as err: 
2051         if err.errno in (errno.EACCES,): 
2054         # In case of error, try to remove win32 forbidden chars 
2055         alt_filename = sanitize_path(filename) 
2056         if alt_filename == filename: 
2059             # An exception here should be caught in the caller 
2060             stream = open(encodeFilename(alt_filename), open_mode) 
2061             return (stream, alt_filename) 
2064 def timeconvert(timestr): 
2065     """Convert RFC 2822 defined time string into system timestamp""" 
2067     timetuple = email.utils.parsedate_tz(timestr) 
2068     if timetuple is not None: 
2069         timestamp = email.utils.mktime_tz(timetuple) 
2073 def sanitize_filename(s, restricted=False, is_id=False): 
2074     """Sanitizes a string so it could be used as part of a filename. 
2075     If restricted is set, use a stricter subset of allowed characters. 
2076     Set is_id if this is not an arbitrary string, but an ID that should be kept 
2079     def replace_insane(char): 
2080         if restricted and char in ACCENT_CHARS: 
2081             return ACCENT_CHARS[char] 
2082         if char == '?' or ord(char) < 32 or ord(char) == 127: 
2085             return '' if restricted else '\'' 
2087             return '_
-' if restricted else ' -' 
2088         elif char in '\\/|
*<>': 
2090         if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()): 
2092         if restricted 
and ord(char
) > 127: 
2097     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
2098     result 
= ''.join(map(replace_insane
, s
)) 
2100         while '__' in result
: 
2101             result 
= result
.replace('__', '_') 
2102         result 
= result
.strip('_') 
2103         # Common case of "Foreign band name - English song title" 
2104         if restricted 
and result
.startswith('-_'): 
2106         if result
.startswith('-'): 
2107             result 
= '_' + result
[len('-'):] 
2108         result 
= result
.lstrip('.') 
2114 def sanitize_path(s
): 
2115     """Sanitizes and normalizes path on Windows""" 
2116     if sys
.platform 
!= 'win32': 
2118     drive_or_unc
, _ 
= os
.path
.splitdrive(s
) 
2119     if sys
.version_info 
< (2, 7) and not drive_or_unc
: 
2120         drive_or_unc
, _ 
= os
.path
.splitunc(s
) 
2121     norm_path 
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
) 
2125         path_part 
if path_part 
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
) 
2126         for path_part 
in norm_path
] 
2128         sanitized_path
.insert(0, drive_or_unc 
+ os
.path
.sep
) 
2129     return os
.path
.join(*sanitized_path
) 
2132 def sanitize_url(url
): 
2133     # Prepend protocol-less URLs with `http:` scheme in order to mitigate 
2134     # the number of unwanted failures due to missing protocol 
2135     if url
.startswith('//'): 
2136         return 'http:%s' % url
 
2137     # Fix some common typos seen so far 
2139         # https://github.com/ytdl-org/youtube-dl/issues/15649 
2140         (r
'^httpss://', r
'https://'), 
2141         # https://bx1.be/lives/direct-tv/ 
2142         (r
'^rmtp([es]?)://', r
'rtmp\1://'), 
2144     for mistake
, fixup 
in COMMON_TYPOS
: 
2145         if re
.match(mistake
, url
): 
2146             return re
.sub(mistake
, fixup
, url
) 
2150 def sanitized_Request(url
, *args
, **kwargs
): 
2151     return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
) 
2155     """Expand shell variables and ~""" 
2156     return os
.path
.expandvars(compat_expanduser(s
)) 
2159 def orderedSet(iterable
): 
2160     """ Remove all duplicates from the input iterable """ 
2168 def _htmlentity_transform(entity_with_semicolon
): 
2169     """Transforms an HTML entity to a character.""" 
2170     entity 
= entity_with_semicolon
[:-1] 
2172     # Known non-numeric HTML entity 
2173     if entity 
in compat_html_entities
.name2codepoint
: 
2174         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
2176     # TODO: HTML5 allows entities without a semicolon. For example, 
2177     # 'Éric' should be decoded as 'Ćric'. 
2178     if entity_with_semicolon 
in compat_html_entities_html5
: 
2179         return compat_html_entities_html5
[entity_with_semicolon
] 
2181     mobj 
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
) 
2182     if mobj 
is not None: 
2183         numstr 
= mobj
.group(1) 
2184         if numstr
.startswith('x'): 
2186             numstr 
= '0%s' % numstr
 
2189         # See https://github.com/ytdl-org/youtube-dl/issues/7518 
2191             return compat_chr(int(numstr
, base
)) 
2195     # Unknown entity in name, return its literal representation 
2196     return '&%s;' % entity
 
2199 def unescapeHTML(s
): 
2202     assert type(s
) == compat_str
 
2205         r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
2208 def get_subprocess_encoding(): 
2209     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
2210         # For subprocess calls, encode with locale encoding 
2211         # Refer to http://stackoverflow.com/a/9951851/35070 
2212         encoding 
= preferredencoding() 
2214         encoding 
= sys
.getfilesystemencoding() 
2215     if encoding 
is None: 
2220 def encodeFilename(s
, for_subprocess
=False): 
2222     @param s The name of the file 
2225     assert type(s
) == compat_str
 
2227     # Python 3 has a Unicode API 
2228     if sys
.version_info 
>= (3, 0): 
2231     # Pass '' directly to use Unicode APIs on Windows 2000 and up 
2232     # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
2233     # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
2234     if not for_subprocess 
and sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
2237     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 
2238     if sys
.platform
.startswith('java'): 
2241     return s
.encode(get_subprocess_encoding(), 'ignore') 
2244 def decodeFilename(b
, for_subprocess
=False): 
2246     if sys
.version_info 
>= (3, 0): 
2249     if not isinstance(b
, bytes): 
2252     return b
.decode(get_subprocess_encoding(), 'ignore') 
2255 def encodeArgument(s
): 
2256     if not isinstance(s
, compat_str
): 
2257         # Legacy code that uses byte strings 
2258         # Uncomment the following line after fixing all post processors 
2259         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
2260         s 
= s
.decode('ascii') 
2261     return encodeFilename(s
, True) 
2264 def decodeArgument(b
): 
2265     return decodeFilename(b
, True) 
2268 def decodeOption(optval
): 
2271     if isinstance(optval
, bytes): 
2272         optval 
= optval
.decode(preferredencoding()) 
2274     assert isinstance(optval
, compat_str
) 
2278 def formatSeconds(secs
): 
2280         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
2282         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
2287 def make_HTTPS_handler(params
, **kwargs
): 
2288     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
2289     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
2290         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
2291         if opts_no_check_certificate
: 
2292             context
.check_hostname 
= False 
2293             context
.verify_mode 
= ssl
.CERT_NONE
 
2295             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
2298             # (create_default_context present but HTTPSHandler has no context=) 
2301     if sys
.version_info 
< (3, 2): 
2302         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
2303     else:  # Python < 3.4 
2304         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
2305         context
.verify_mode 
= (ssl
.CERT_NONE
 
2306                                if opts_no_check_certificate
 
2307                                else ssl
.CERT_REQUIRED
) 
2308         context
.set_default_verify_paths() 
2309         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
2312 def bug_reports_message(): 
2313     if ytdl_is_updateable(): 
2314         update_cmd 
= 'type  youtube-dl -U  to update' 
2316         update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
2317     msg 
= '; please report this issue on https://yt-dl.org/bug .' 
2318     msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
2319     msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
2323 class YoutubeDLError(Exception): 
2324     """Base exception for YoutubeDL errors.""" 
2328 class ExtractorError(YoutubeDLError
): 
2329     """Error during info extraction.""" 
2331     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
2332         """ tb, if given, is the original traceback (so that it can be printed out). 
2333         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
2336         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
2338         if video_id 
is not None: 
2339             msg 
= video_id 
+ ': ' + msg
 
2341             msg 
+= ' (caused by %r)' % cause
 
2343             msg 
+= bug_reports_message() 
2344         super(ExtractorError
, self
).__init
__(msg
) 
2347         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
2349         self
.video_id 
= video_id
 
2351     def format_traceback(self
): 
2352         if self
.traceback 
is None: 
2354         return ''.join(traceback
.format_tb(self
.traceback
)) 
2357 class UnsupportedError(ExtractorError
): 
2358     def __init__(self
, url
): 
2359         super(UnsupportedError
, self
).__init
__( 
2360             'Unsupported URL: %s' % url
, expected
=True) 
2364 class RegexNotFoundError(ExtractorError
): 
2365     """Error when a regex didn't match""" 
2369 class GeoRestrictedError(ExtractorError
): 
2370     """Geographic restriction Error exception. 
2372     This exception may be thrown when a video is not available from your 
2373     geographic location due to geographic restrictions imposed by a website. 
2375     def __init__(self
, msg
, countries
=None): 
2376         super(GeoRestrictedError
, self
).__init
__(msg
, expected
=True) 
2378         self
.countries 
= countries
 
2381 class DownloadError(YoutubeDLError
): 
2382     """Download Error exception. 
2384     This exception may be thrown by FileDownloader objects if they are not 
2385     configured to continue on errors. They will contain the appropriate 
2389     def __init__(self
, msg
, exc_info
=None): 
2390         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
2391         super(DownloadError
, self
).__init
__(msg
) 
2392         self
.exc_info 
= exc_info
 
2395 class SameFileError(YoutubeDLError
): 
2396     """Same File exception. 
2398     This exception will be thrown by FileDownloader objects if they detect 
2399     multiple files would have to be downloaded to the same file on disk. 
2404 class PostProcessingError(YoutubeDLError
): 
2405     """Post Processing exception. 
2407     This exception may be raised by PostProcessor's .run() method to 
2408     indicate an error in the postprocessing task. 
2411     def __init__(self
, msg
): 
2412         super(PostProcessingError
, self
).__init
__(msg
) 
2416 class MaxDownloadsReached(YoutubeDLError
): 
2417     """ --max-downloads limit has been reached. """ 
2421 class UnavailableVideoError(YoutubeDLError
): 
2422     """Unavailable Format exception. 
2424     This exception will be thrown when a video is requested 
2425     in a format that is not available for that video. 
2430 class ContentTooShortError(YoutubeDLError
): 
2431     """Content Too Short exception. 
2433     This exception may be raised by FileDownloader objects when a file they 
2434     download is too small for what the server announced first, indicating 
2435     the connection was probably interrupted. 
2438     def __init__(self
, downloaded
, expected
): 
2439         super(ContentTooShortError
, self
).__init
__( 
2440             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded
, expected
) 
2443         self
.downloaded 
= downloaded
 
2444         self
.expected 
= expected
 
2447 class XAttrMetadataError(YoutubeDLError
): 
2448     def __init__(self
, code
=None, msg
='Unknown error'): 
2449         super(XAttrMetadataError
, self
).__init
__(msg
) 
2453         # Parsing code and msg 
2454         if (self
.code 
in (errno
.ENOSPC
, errno
.EDQUOT
) 
2455                 or 'No space left' in self
.msg 
or 'Disk quota excedded' in self
.msg
): 
2456             self
.reason 
= 'NO_SPACE' 
2457         elif self
.code 
== errno
.E2BIG 
or 'Argument list too long' in self
.msg
: 
2458             self
.reason 
= 'VALUE_TOO_LONG' 
2460             self
.reason 
= 'NOT_SUPPORTED' 
2463 class XAttrUnavailableError(YoutubeDLError
): 
2467 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
2468     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 
2469     # expected HTTP responses to meet HTTP/1.0 or later (see also 
2470     # https://github.com/ytdl-org/youtube-dl/issues/6727) 
2471     if sys
.version_info 
< (3, 0): 
2472         kwargs
['strict'] = True 
2473     hc 
= http_class(*args
, **compat_kwargs(kwargs
)) 
2474     source_address 
= ydl_handler
._params
.get('source_address') 
2476     if source_address 
is not None: 
2477         # This is to workaround _create_connection() from socket where it will try all 
2478         # address data from getaddrinfo() including IPv6. This filters the result from 
2479         # getaddrinfo() based on the source_address value. 
2480         # This is based on the cpython socket.create_connection() function. 
2481         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691 
2482         def _create_connection(address
, timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
, source_address
=None): 
2483             host
, port 
= address
 
2485             addrs 
= socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
) 
2486             af 
= socket
.AF_INET 
if '.' in source_address
[0] else socket
.AF_INET6
 
2487             ip_addrs 
= [addr 
for addr 
in addrs 
if addr
[0] == af
] 
2488             if addrs 
and not ip_addrs
: 
2489                 ip_version 
= 'v4' if af 
== socket
.AF_INET 
else 'v6' 
2491                     "No remote IP%s addresses available for connect, can't use '%s' as source address" 
2492                     % (ip_version
, source_address
[0])) 
2493             for res 
in ip_addrs
: 
2494                 af
, socktype
, proto
, canonname
, sa 
= res
 
2497                     sock 
= socket
.socket(af
, socktype
, proto
) 
2498                     if timeout 
is not socket
._GLOBAL
_DEFAULT
_TIMEOUT
: 
2499                         sock
.settimeout(timeout
) 
2500                     sock
.bind(source_address
) 
2502                     err 
= None  # Explicitly break reference cycle 
2504                 except socket
.error 
as _
: 
2506                     if sock 
is not None: 
2511                 raise socket
.error('getaddrinfo returns an empty list') 
2512         if hasattr(hc
, '_create_connection'): 
2513             hc
._create
_connection 
= _create_connection
 
2514         sa 
= (source_address
, 0) 
2515         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
2516             hc
.source_address 
= sa
 
2518             def _hc_connect(self
, *args
, **kwargs
): 
2519                 sock 
= _create_connection( 
2520                     (self
.host
, self
.port
), self
.timeout
, sa
) 
2522                     self
.sock 
= ssl
.wrap_socket( 
2523                         sock
, self
.key_file
, self
.cert_file
, 
2524                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
2527             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
2532 def handle_youtubedl_headers(headers
): 
2533     filtered_headers 
= headers
 
2535     if 'Youtubedl-no-compression' in filtered_headers
: 
2536         filtered_headers 
= dict((k
, v
) for k
, v 
in filtered_headers
.items() if k
.lower() != 'accept-encoding') 
2537         del filtered_headers
['Youtubedl-no-compression'] 
2539     return filtered_headers
 
2542 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
2543     """Handler for HTTP requests and responses. 
2545     This class, when installed with an OpenerDirector, automatically adds 
2546     the standard headers to every HTTP request and handles gzipped and 
2547     deflated responses from web servers. If compression is to be avoided in 
2548     a particular request, the original request in the program code only has 
2549     to include the HTTP header "Youtubedl-no-compression", which will be 
2550     removed before making the real request. 
2552     Part of this code was copied from: 
2554     http://techknack.net/python-urllib2-handlers/ 
2556     Andrew Rowls, the author of that code, agreed to release it to the 
2560     def __init__(self
, params
, *args
, **kwargs
): 
2561         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
2562         self
._params 
= params
 
2564     def http_open(self
, req
): 
2565         conn_class 
= compat_http_client
.HTTPConnection
 
2567         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
2569             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
2570             del req
.headers
['Ytdl-socks-proxy'] 
2572         return self
.do_open(functools
.partial( 
2573             _create_http_connection
, self
, conn_class
, False), 
2579             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
2581             return zlib
.decompress(data
) 
2583     def http_request(self
, req
): 
2584         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 
2585         # always respected by websites, some tend to give out URLs with non percent-encoded 
2586         # non-ASCII characters (see telemb.py, ard.py [#3412]) 
2587         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 
2588         # To work around aforementioned issue we will replace request's original URL with 
2589         # percent-encoded one 
2590         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 
2591         # the code of this workaround has been moved here from YoutubeDL.urlopen() 
2592         url 
= req
.get_full_url() 
2593         url_escaped 
= escape_url(url
) 
2595         # Substitute URL if any change after escaping 
2596         if url 
!= url_escaped
: 
2597             req 
= update_Request(req
, url
=url_escaped
) 
2599         for h
, v 
in std_headers
.items(): 
2600             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
2601             # The dict keys are capitalized because of this bug by urllib 
2602             if h
.capitalize() not in req
.headers
: 
2603                 req
.add_header(h
, v
) 
2605         req
.headers 
= handle_youtubedl_headers(req
.headers
) 
2607         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
2608             # Python 2.6 is brain-dead when it comes to fragments 
2609             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
2610             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
2614     def http_response(self
, req
, resp
): 
2617         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
2618             content 
= resp
.read() 
2619             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
2621                 uncompressed 
= io
.BytesIO(gz
.read()) 
2622             except IOError as original_ioerror
: 
2623                 # There may be junk add the end of the file 
2624                 # See http://stackoverflow.com/q/4928560/35070 for details 
2625                 for i 
in range(1, 1024): 
2627                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
2628                         uncompressed 
= io
.BytesIO(gz
.read()) 
2633                     raise original_ioerror
 
2634             resp 
= compat_urllib_request
.addinfourl(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
2635             resp
.msg 
= old_resp
.msg
 
2636             del resp
.headers
['Content-encoding'] 
2638         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
2639             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
2640             resp 
= compat_urllib_request
.addinfourl(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
2641             resp
.msg 
= old_resp
.msg
 
2642             del resp
.headers
['Content-encoding'] 
2643         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 
2644         # https://github.com/ytdl-org/youtube-dl/issues/6457). 
2645         if 300 <= resp
.code 
< 400: 
2646             location 
= resp
.headers
.get('Location') 
2648                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 
2649                 if sys
.version_info 
>= (3, 0): 
2650                     location 
= location
.encode('iso-8859-1').decode('utf-8') 
2652                     location 
= location
.decode('utf-8') 
2653                 location_escaped 
= escape_url(location
) 
2654                 if location 
!= location_escaped
: 
2655                     del resp
.headers
['Location'] 
2656                     if sys
.version_info 
< (3, 0): 
2657                         location_escaped 
= location_escaped
.encode('utf-8') 
2658                     resp
.headers
['Location'] = location_escaped
 
2661     https_request 
= http_request
 
2662     https_response 
= http_response
 
2665 def make_socks_conn_class(base_class
, socks_proxy
): 
2666     assert issubclass(base_class
, ( 
2667         compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
)) 
2669     url_components 
= compat_urlparse
.urlparse(socks_proxy
) 
2670     if url_components
.scheme
.lower() == 'socks5': 
2671         socks_type 
= ProxyType
.SOCKS5
 
2672     elif url_components
.scheme
.lower() in ('socks', 'socks4'): 
2673         socks_type 
= ProxyType
.SOCKS4
 
2674     elif url_components
.scheme
.lower() == 'socks4a': 
2675         socks_type 
= ProxyType
.SOCKS4A
 
2677     def unquote_if_non_empty(s
): 
2680         return compat_urllib_parse_unquote_plus(s
) 
2684         url_components
.hostname
, url_components
.port 
or 1080, 
2686         unquote_if_non_empty(url_components
.username
), 
2687         unquote_if_non_empty(url_components
.password
), 
2690     class SocksConnection(base_class
): 
2692             self
.sock 
= sockssocket() 
2693             self
.sock
.setproxy(*proxy_args
) 
2694             if type(self
.timeout
) in (int, float): 
2695                 self
.sock
.settimeout(self
.timeout
) 
2696             self
.sock
.connect((self
.host
, self
.port
)) 
2698             if isinstance(self
, compat_http_client
.HTTPSConnection
): 
2699                 if hasattr(self
, '_context'):  # Python > 2.6 
2700                     self
.sock 
= self
._context
.wrap_socket( 
2701                         self
.sock
, server_hostname
=self
.host
) 
2703                     self
.sock 
= ssl
.wrap_socket(self
.sock
) 
2705     return SocksConnection
 
2708 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
2709     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
2710         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
2711         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
2712         self
._params 
= params
 
2714     def https_open(self
, req
): 
2716         conn_class 
= self
._https
_conn
_class
 
2718         if hasattr(self
, '_context'):  # python > 2.6 
2719             kwargs
['context'] = self
._context
 
2720         if hasattr(self
, '_check_hostname'):  # python 3.x 
2721             kwargs
['check_hostname'] = self
._check
_hostname
 
2723         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
2725             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
2726             del req
.headers
['Ytdl-socks-proxy'] 
2728         return self
.do_open(functools
.partial( 
2729             _create_http_connection
, self
, conn_class
, True), 
2733 class YoutubeDLCookieJar(compat_cookiejar
.MozillaCookieJar
): 
2735     See [1] for cookie file format. 
2737     1. https://curl.haxx.se/docs/http-cookies.html 
2739     _HTTPONLY_PREFIX 
= '#HttpOnly_' 
2741     _HEADER 
= '''# Netscape HTTP Cookie File 
2742 # This file is generated by youtube-dl.  Do not edit. 
2745     _CookieFileEntry 
= collections
.namedtuple( 
2747         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) 
2749     def save(self
, filename
=None, ignore_discard
=False, ignore_expires
=False): 
2751         Save cookies to a file. 
2753         Most of the code is taken from CPython 3.8 and slightly adapted 
2754         to support cookie files with UTF-8 in both python 2 and 3. 
2756         if filename 
is None: 
2757             if self
.filename 
is not None: 
2758                 filename 
= self
.filename
 
2760                 raise ValueError(compat_cookiejar
.MISSING_FILENAME_TEXT
) 
2762         # Store session cookies with `expires` set to 0 instead of an empty 
2765             if cookie
.expires 
is None: 
2768         with io
.open(filename
, 'w', encoding
='utf-8') as f
: 
2769             f
.write(self
._HEADER
) 
2772                 if not ignore_discard 
and cookie
.discard
: 
2774                 if not ignore_expires 
and cookie
.is_expired(now
): 
2780                 if cookie
.domain
.startswith('.'): 
2781                     initial_dot 
= 'TRUE' 
2783                     initial_dot 
= 'FALSE' 
2784                 if cookie
.expires 
is not None: 
2785                     expires 
= compat_str(cookie
.expires
) 
2788                 if cookie
.value 
is None: 
2789                     # cookies.txt regards 'Set-Cookie: foo' as a cookie 
2790                     # with no name, whereas http.cookiejar regards it as a 
2791                     # cookie with no value. 
2796                     value 
= cookie
.value
 
2798                     '\t'.join([cookie
.domain
, initial_dot
, cookie
.path
, 
2799                                secure
, expires
, name
, value
]) + '\n') 
2801     def load(self
, filename
=None, ignore_discard
=False, ignore_expires
=False): 
2802         """Load cookies from a file.""" 
2803         if filename 
is None: 
2804             if self
.filename 
is not None: 
2805                 filename 
= self
.filename
 
2807                 raise ValueError(compat_cookiejar
.MISSING_FILENAME_TEXT
) 
2809         def prepare_line(line
): 
2810             if line
.startswith(self
._HTTPONLY
_PREFIX
): 
2811                 line 
= line
[len(self
._HTTPONLY
_PREFIX
):] 
2812             # comments and empty lines are fine 
2813             if line
.startswith('#') or not line
.strip(): 
2815             cookie_list 
= line
.split('\t') 
2816             if len(cookie_list
) != self
._ENTRY
_LEN
: 
2817                 raise compat_cookiejar
.LoadError('invalid length %d' % len(cookie_list
)) 
2818             cookie 
= self
._CookieFileEntry
(*cookie_list
) 
2819             if cookie
.expires_at 
and not cookie
.expires_at
.isdigit(): 
2820                 raise compat_cookiejar
.LoadError('invalid expires at %s' % cookie
.expires_at
) 
2824         with io
.open(filename
, encoding
='utf-8') as f
: 
2827                     cf
.write(prepare_line(line
)) 
2828                 except compat_cookiejar
.LoadError 
as e
: 
2830                         'WARNING: skipping cookie file entry due to %s: %r\n' 
2831                         % (e
, line
), sys
.stderr
) 
2834         self
._really
_load
(cf
, filename
, ignore_discard
, ignore_expires
) 
2835         # Session cookies are denoted by either `expires` field set to 
2836         # an empty string or 0. MozillaCookieJar only recognizes the former 
2837         # (see [1]). So we need force the latter to be recognized as session 
2838         # cookies on our own. 
2839         # Session cookies may be important for cookies-based authentication, 
2840         # e.g. usually, when user does not check 'Remember me' check box while 
2841         # logging in on a site, some important cookies are stored as session 
2842         # cookies so that not recognizing them will result in failed login. 
2843         # 1. https://bugs.python.org/issue17164 
2845             # Treat `expires=0` cookies as session cookies 
2846             if cookie
.expires 
== 0: 
2847                 cookie
.expires 
= None 
2848                 cookie
.discard 
= True 
2851 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
): 
2852     def __init__(self
, cookiejar
=None): 
2853         compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
) 
2855     def http_response(self
, request
, response
): 
2856         # Python 2 will choke on next HTTP request in row if there are non-ASCII 
2857         # characters in Set-Cookie HTTP header of last response (see 
2858         # https://github.com/ytdl-org/youtube-dl/issues/6769). 
2859         # In order to at least prevent crashing we will percent encode Set-Cookie 
2860         # header before HTTPCookieProcessor starts processing it. 
2861         # if sys.version_info < (3, 0) and response.headers: 
2862         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 
2863         #         set_cookie = response.headers.get(set_cookie_header) 
2865         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 
2866         #             if set_cookie != set_cookie_escaped: 
2867         #                 del response.headers[set_cookie_header] 
2868         #                 response.headers[set_cookie_header] = set_cookie_escaped 
2869         return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
) 
2871     https_request 
= compat_urllib_request
.HTTPCookieProcessor
.http_request
 
2872     https_response 
= http_response
 
2875 class YoutubeDLRedirectHandler(compat_urllib_request
.HTTPRedirectHandler
): 
2876     if sys
.version_info
[0] < 3: 
2877         def redirect_request(self
, req
, fp
, code
, msg
, headers
, newurl
): 
2878             # On python 2 urlh.geturl() may sometimes return redirect URL 
2879             # as byte string instead of unicode. This workaround allows 
2880             # to force it always return unicode. 
2881             return compat_urllib_request
.HTTPRedirectHandler
.redirect_request(self
, req
, fp
, code
, msg
, headers
, compat_str(newurl
)) 
2884 def extract_timezone(date_str
): 
2886         r
'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
2889         timezone 
= datetime
.timedelta() 
2891         date_str 
= date_str
[:-len(m
.group('tz'))] 
2892         if not m
.group('sign'): 
2893             timezone 
= datetime
.timedelta() 
2895             sign 
= 1 if m
.group('sign') == '+' else -1 
2896             timezone 
= datetime
.timedelta( 
2897                 hours
=sign 
* int(m
.group('hours')), 
2898                 minutes
=sign 
* int(m
.group('minutes'))) 
2899     return timezone
, date_str
 
2902 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
2903     """ Return a UNIX timestamp from the given date """ 
2905     if date_str 
is None: 
2908     date_str 
= re
.sub(r
'\.[0-9]+', '', date_str
) 
2910     if timezone 
is None: 
2911         timezone
, date_str 
= extract_timezone(date_str
) 
2914         date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
2915         dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
2916         return calendar
.timegm(dt
.timetuple()) 
2921 def date_formats(day_first
=True): 
2922     return DATE_FORMATS_DAY_FIRST 
if day_first 
else DATE_FORMATS_MONTH_FIRST
 
2925 def unified_strdate(date_str
, day_first
=True): 
2926     """Return a string with the date in the format YYYYMMDD""" 
2928     if date_str 
is None: 
2932     date_str 
= date_str
.replace(',', ' ') 
2933     # Remove AM/PM + timezone 
2934     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
2935     _
, date_str 
= extract_timezone(date_str
) 
2937     for expression 
in date_formats(day_first
): 
2939             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
2942     if upload_date 
is None: 
2943         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
2946                 upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
2949     if upload_date 
is not None: 
2950         return compat_str(upload_date
) 
2953 def unified_timestamp(date_str
, day_first
=True): 
2954     if date_str 
is None: 
2957     date_str 
= re
.sub(r
'[,|]', '', date_str
) 
2959     pm_delta 
= 12 if re
.search(r
'(?i)PM', date_str
) else 0 
2960     timezone
, date_str 
= extract_timezone(date_str
) 
2962     # Remove AM/PM + timezone 
2963     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
2965     # Remove unrecognized timezones from ISO 8601 alike timestamps 
2966     m 
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
) 
2968         date_str 
= date_str
[:-len(m
.group('tz'))] 
2970     # Python only supports microseconds, so remove nanoseconds 
2971     m 
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
) 
2973         date_str 
= m
.group(1) 
2975     for expression 
in date_formats(day_first
): 
2977             dt 
= datetime
.datetime
.strptime(date_str
, expression
) - timezone 
+ datetime
.timedelta(hours
=pm_delta
) 
2978             return calendar
.timegm(dt
.timetuple()) 
2981     timetuple 
= email
.utils
.parsedate_tz(date_str
) 
2983         return calendar
.timegm(timetuple
) + pm_delta 
* 3600 
2986 def determine_ext(url
, default_ext
='unknown_video'): 
2987     if url 
is None or '.' not in url
: 
2989     guess 
= url
.partition('?')[0].rpartition('.')[2] 
2990     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
2992     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 
2993     elif guess
.rstrip('/') in KNOWN_EXTENSIONS
: 
2994         return guess
.rstrip('/') 
2999 def subtitles_filename(filename
, sub_lang
, sub_format
, expected_real_ext
=None): 
3000     return replace_extension(filename
, sub_lang 
+ '.' + sub_format
, expected_real_ext
) 
3003 def date_from_str(date_str
): 
3005     Return a datetime object from a string in the format YYYYMMDD or 
3006     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
3007     today 
= datetime
.date
.today() 
3008     if date_str 
in ('now', 'today'): 
3010     if date_str 
== 'yesterday': 
3011         return today 
- datetime
.timedelta(days
=1) 
3012     match 
= re
.match(r
'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
3013     if match 
is not None: 
3014         sign 
= match
.group('sign') 
3015         time 
= int(match
.group('time')) 
3018         unit 
= match
.group('unit') 
3019         # A bad approximation? 
3023         elif unit 
== 'year': 
3027         delta 
= datetime
.timedelta(**{unit
: time
}) 
3028         return today 
+ delta
 
3029     return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date() 
3032 def hyphenate_date(date_str
): 
3034     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
3035     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
3036     if match 
is not None: 
3037         return '-'.join(match
.groups()) 
3042 class DateRange(object): 
3043     """Represents a time interval between two dates""" 
3045     def __init__(self
, start
=None, end
=None): 
3046         """start and end must be strings in the format accepted by date""" 
3047         if start 
is not None: 
3048             self
.start 
= date_from_str(start
) 
3050             self
.start 
= datetime
.datetime
.min.date() 
3052             self
.end 
= date_from_str(end
) 
3054             self
.end 
= datetime
.datetime
.max.date() 
3055         if self
.start 
> self
.end
: 
3056             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
3060         """Returns a range that only contains the given day""" 
3061         return cls(day
, day
) 
3063     def __contains__(self
, date
): 
3064         """Check if the date is in the range""" 
3065         if not isinstance(date
, datetime
.date
): 
3066             date 
= date_from_str(date
) 
3067         return self
.start 
<= date 
<= self
.end
 
3070         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
3073 def platform_name(): 
3074     """ Returns the platform name as a compat_str """ 
3075     res 
= platform
.platform() 
3076     if isinstance(res
, bytes): 
3077         res 
= res
.decode(preferredencoding()) 
3079     assert isinstance(res
, compat_str
) 
3083 def _windows_write_string(s
, out
): 
3084     """ Returns True if the string was written using special methods, 
3085     False if it has yet to be written out.""" 
3086     # Adapted from http://stackoverflow.com/a/3259271/35070 
3089     import ctypes
.wintypes
 
3097         fileno 
= out
.fileno() 
3098     except AttributeError: 
3099         # If the output stream doesn't have a fileno, it's virtual 
3101     except io
.UnsupportedOperation
: 
3102         # Some strange Windows pseudo files? 
3104     if fileno 
not in WIN_OUTPUT_IDS
: 
3107     GetStdHandle 
= compat_ctypes_WINFUNCTYPE( 
3108         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
3109         ('GetStdHandle', ctypes
.windll
.kernel32
)) 
3110     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
3112     WriteConsoleW 
= compat_ctypes_WINFUNCTYPE( 
3113         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
3114         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
3115         ctypes
.wintypes
.LPVOID
)(('WriteConsoleW', ctypes
.windll
.kernel32
)) 
3116     written 
= ctypes
.wintypes
.DWORD(0) 
3118     GetFileType 
= compat_ctypes_WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)(('GetFileType', ctypes
.windll
.kernel32
)) 
3119     FILE_TYPE_CHAR 
= 0x0002 
3120     FILE_TYPE_REMOTE 
= 0x8000 
3121     GetConsoleMode 
= compat_ctypes_WINFUNCTYPE( 
3122         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
3123         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
3124         ('GetConsoleMode', ctypes
.windll
.kernel32
)) 
3125     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
3127     def not_a_console(handle
): 
3128         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
3130         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR
 
3131                 or GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
3133     if not_a_console(h
): 
3136     def next_nonbmp_pos(s
): 
3138             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
3139         except StopIteration: 
3143         count 
= min(next_nonbmp_pos(s
), 1024) 
3145         ret 
= WriteConsoleW( 
3146             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
3148             raise OSError('Failed to write string') 
3149         if not count
:  # We just wrote a non-BMP character 
3150             assert written
.value 
== 2 
3153             assert written
.value 
> 0 
3154             s 
= s
[written
.value
:] 
3158 def write_string(s
, out
=None, encoding
=None): 
3161     assert type(s
) == compat_str
 
3163     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
3164         if _windows_write_string(s
, out
): 
3167     if ('b' in getattr(out
, 'mode', '') 
3168             or sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
3169         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
3171     elif hasattr(out
, 'buffer'): 
3172         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
3173         byt 
= s
.encode(enc
, 'ignore') 
3174         out
.buffer.write(byt
) 
3180 def bytes_to_intlist(bs
): 
3183     if isinstance(bs
[0], int):  # Python 3 
3186         return [ord(c
) for c 
in bs
] 
3189 def intlist_to_bytes(xs
): 
3192     return compat_struct_pack('%dB' % len(xs
), *xs
) 
3195 # Cross-platform file locking 
3196 if sys
.platform 
== 'win32': 
3197     import ctypes
.wintypes
 
3200     class OVERLAPPED(ctypes
.Structure
): 
3202             ('Internal', ctypes
.wintypes
.LPVOID
), 
3203             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
3204             ('Offset', ctypes
.wintypes
.DWORD
), 
3205             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
3206             ('hEvent', ctypes
.wintypes
.HANDLE
), 
3209     kernel32 
= ctypes
.windll
.kernel32
 
3210     LockFileEx 
= kernel32
.LockFileEx
 
3211     LockFileEx
.argtypes 
= [ 
3212         ctypes
.wintypes
.HANDLE
,     # hFile 
3213         ctypes
.wintypes
.DWORD
,      # dwFlags 
3214         ctypes
.wintypes
.DWORD
,      # dwReserved 
3215         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
3216         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
3217         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
3219     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
3220     UnlockFileEx 
= kernel32
.UnlockFileEx
 
3221     UnlockFileEx
.argtypes 
= [ 
3222         ctypes
.wintypes
.HANDLE
,     # hFile 
3223         ctypes
.wintypes
.DWORD
,      # dwReserved 
3224         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
3225         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
3226         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
3228     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
3229     whole_low 
= 0xffffffff 
3230     whole_high 
= 0x7fffffff 
3232     def _lock_file(f
, exclusive
): 
3233         overlapped 
= OVERLAPPED() 
3234         overlapped
.Offset 
= 0 
3235         overlapped
.OffsetHigh 
= 0 
3236         overlapped
.hEvent 
= 0 
3237         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
3238         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
3239         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
3240                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
3241             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
3243     def _unlock_file(f
): 
3244         assert f
._lock
_file
_overlapped
_p
 
3245         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
3246         if not UnlockFileEx(handle
, 0, 
3247                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
3248             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
3251     # Some platforms, such as Jython, is missing fcntl 
3255         def _lock_file(f
, exclusive
): 
3256             fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
3258         def _unlock_file(f
): 
3259             fcntl
.flock(f
, fcntl
.LOCK_UN
) 
3261         UNSUPPORTED_MSG 
= 'file locking is not supported on this platform' 
3263         def _lock_file(f
, exclusive
): 
3264             raise IOError(UNSUPPORTED_MSG
) 
3266         def _unlock_file(f
): 
3267             raise IOError(UNSUPPORTED_MSG
) 
3270 class locked_file(object): 
3271     def __init__(self
, filename
, mode
, encoding
=None): 
3272         assert mode 
in ['r', 'a', 'w'] 
3273         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
3276     def __enter__(self
): 
3277         exclusive 
= self
.mode 
!= 'r' 
3279             _lock_file(self
.f
, exclusive
) 
3285     def __exit__(self
, etype
, value
, traceback
): 
3287             _unlock_file(self
.f
) 
3294     def write(self
, *args
): 
3295         return self
.f
.write(*args
) 
3297     def read(self
, *args
): 
3298         return self
.f
.read(*args
) 
3301 def get_filesystem_encoding(): 
3302     encoding 
= sys
.getfilesystemencoding() 
3303     return encoding 
if encoding 
is not None else 'utf-8' 
3306 def shell_quote(args
): 
3308     encoding 
= get_filesystem_encoding() 
3310         if isinstance(a
, bytes): 
3311             # We may get a filename encoded with 'encodeFilename' 
3312             a 
= a
.decode(encoding
) 
3313         quoted_args
.append(compat_shlex_quote(a
)) 
3314     return ' '.join(quoted_args
) 
3317 def smuggle_url(url
, data
): 
3318     """ Pass additional data in a URL for internal use. """ 
3320     url
, idata 
= unsmuggle_url(url
, {}) 
3322     sdata 
= compat_urllib_parse_urlencode( 
3323         {'__youtubedl_smuggle': json
.dumps(data
)}) 
3324     return url 
+ '#' + sdata
 
3327 def unsmuggle_url(smug_url
, default
=None): 
3328     if '#__youtubedl_smuggle' not in smug_url
: 
3329         return smug_url
, default
 
3330     url
, _
, sdata 
= smug_url
.rpartition('#') 
3331     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
3332     data 
= json
.loads(jsond
) 
3336 def format_bytes(bytes): 
3339     if type(bytes) is str: 
3340         bytes = float(bytes) 
3344         exponent 
= int(math
.log(bytes, 1024.0)) 
3345     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
3346     converted 
= float(bytes) / float(1024 ** exponent
) 
3347     return '%.2f%s' % (converted
, suffix
) 
3350 def lookup_unit_table(unit_table
, s
): 
3351     units_re 
= '|'.join(re
.escape(u
) for u 
in unit_table
) 
3353         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
) 
3356     num_str 
= m
.group('num').replace(',', '.') 
3357     mult 
= unit_table
[m
.group('unit')] 
3358     return int(float(num_str
) * mult
) 
3361 def parse_filesize(s
): 
3365     # The lower-case forms are of course incorrect and unofficial, 
3366     # but we support those too 
3383         'megabytes': 1000 ** 2, 
3384         'mebibytes': 1024 ** 2, 
3390         'gigabytes': 1000 ** 3, 
3391         'gibibytes': 1024 ** 3, 
3397         'terabytes': 1000 ** 4, 
3398         'tebibytes': 1024 ** 4, 
3404         'petabytes': 1000 ** 5, 
3405         'pebibytes': 1024 ** 5, 
3411         'exabytes': 1000 ** 6, 
3412         'exbibytes': 1024 ** 6, 
3418         'zettabytes': 1000 ** 7, 
3419         'zebibytes': 1024 ** 7, 
3425         'yottabytes': 1000 ** 8, 
3426         'yobibytes': 1024 ** 8, 
3429     return lookup_unit_table(_UNIT_TABLE
, s
) 
3438     if re
.match(r
'^[\d,.]+$', s
): 
3439         return str_to_int(s
) 
3450     return lookup_unit_table(_UNIT_TABLE
, s
) 
3453 def parse_resolution(s
): 
3457     mobj 
= re
.search(r
'\b(?P<w>\d+)\s*[xXĆ]\s*(?P<h>\d+)\b', s
) 
3460             'width': int(mobj
.group('w')), 
3461             'height': int(mobj
.group('h')), 
3464     mobj 
= re
.search(r
'\b(\d+)[pPiI]\b', s
) 
3466         return {'height': int(mobj
.group(1))} 
3468     mobj 
= re
.search(r
'\b([48])[kK]\b', s
) 
3470         return {'height': int(mobj
.group(1)) * 540} 
3475 def parse_bitrate(s
): 
3476     if not isinstance(s
, compat_str
): 
3478     mobj 
= re
.search(r
'\b(\d+)\s*kbps', s
) 
3480         return int(mobj
.group(1)) 
3483 def month_by_name(name
, lang
='en'): 
3484     """ Return the number of a month by (locale-independently) English name """ 
3486     month_names 
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en']) 
3489         return month_names
.index(name
) + 1 
3494 def month_by_abbreviation(abbrev
): 
3495     """ Return the number of a month by (locale-independently) English 
3499         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
3504 def fix_xml_ampersands(xml_str
): 
3505     """Replace all the '&' by '&' in XML""" 
3507         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
3512 def setproctitle(title
): 
3513     assert isinstance(title
, compat_str
) 
3515     # ctypes in Jython is not complete 
3516     # http://bugs.jython.org/issue2148 
3517     if sys
.platform
.startswith('java'): 
3521         libc 
= ctypes
.cdll
.LoadLibrary('libc.so.6') 
3525         # LoadLibrary in Windows Python 2.7.13 only expects 
3526         # a bytestring, but since unicode_literals turns 
3527         # every string into a unicode string, it fails. 
3529     title_bytes 
= title
.encode('utf-8') 
3530     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
3531     buf
.value 
= title_bytes
 
3533         libc
.prctl(15, buf
, 0, 0, 0) 
3534     except AttributeError: 
3535         return  # Strange libc, just skip this 
3538 def remove_start(s
, start
): 
3539     return s
[len(start
):] if s 
is not None and s
.startswith(start
) else s
 
3542 def remove_end(s
, end
): 
3543     return s
[:-len(end
)] if s 
is not None and s
.endswith(end
) else s
 
3546 def remove_quotes(s
): 
3547     if s 
is None or len(s
) < 2: 
3549     for quote 
in ('"', "'", ): 
3550         if s
[0] == quote 
and s
[-1] == quote
: 
3555 def url_basename(url
): 
3556     path 
= compat_urlparse
.urlparse(url
).path
 
3557     return path
.strip('/').split('/')[-1] 
3561     return re
.match(r
'https?://[^?#&]+/', url
).group() 
3564 def urljoin(base
, path
): 
3565     if isinstance(path
, bytes): 
3566         path 
= path
.decode('utf-8') 
3567     if not isinstance(path
, compat_str
) or not path
: 
3569     if re
.match(r
'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path
): 
3571     if isinstance(base
, bytes): 
3572         base 
= base
.decode('utf-8') 
3573     if not isinstance(base
, compat_str
) or not re
.match( 
3574             r
'^(?:https?:)?//', base
): 
3576     return compat_urlparse
.urljoin(base
, path
) 
3579 class HEADRequest(compat_urllib_request
.Request
): 
3580     def get_method(self
): 
3584 class PUTRequest(compat_urllib_request
.Request
): 
3585     def get_method(self
): 
3589 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
3592             v 
= getattr(v
, get_attr
, None) 
3598         return int(v
) * invscale 
// scale
 
3599     except (ValueError, TypeError): 
3603 def str_or_none(v
, default
=None): 
3604     return default 
if v 
is None else compat_str(v
) 
3607 def str_to_int(int_str
): 
3608     """ A more relaxed version of int_or_none """ 
3609     if isinstance(int_str
, compat_integer_types
): 
3611     elif isinstance(int_str
, compat_str
): 
3612         int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
3613         return int_or_none(int_str
) 
3616 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
3620         return float(v
) * invscale 
/ scale
 
3621     except (ValueError, TypeError): 
3625 def bool_or_none(v
, default
=None): 
3626     return v 
if isinstance(v
, bool) else default
 
3629 def strip_or_none(v
, default
=None): 
3630     return v
.strip() if isinstance(v
, compat_str
) else default
 
3633 def url_or_none(url
): 
3634     if not url 
or not isinstance(url
, compat_str
): 
3637     return url 
if re
.match(r
'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url
) else None 
3640 def parse_duration(s
): 
3641     if not isinstance(s
, compat_basestring
): 
3646     days
, hours
, mins
, secs
, ms 
= [None] * 5 
3647     m 
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s
) 
3649         days
, hours
, mins
, secs
, ms 
= m
.groups() 
3654                     [0-9]+\s*y(?:ears?)?\s* 
3657                     [0-9]+\s*m(?:onths?)?\s* 
3660                     [0-9]+\s*w(?:eeks?)?\s* 
3663                     (?P<days>[0-9]+)\s*d(?:ays?)?\s* 
3667                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 
3670                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 
3673                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 
3676             days
, hours
, mins
, secs
, ms 
= m
.groups() 
3678             m 
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
) 
3680                 hours
, mins 
= m
.groups() 
3686         duration 
+= float(secs
) 
3688         duration 
+= float(mins
) * 60 
3690         duration 
+= float(hours
) * 60 * 60 
3692         duration 
+= float(days
) * 24 * 60 * 60 
3694         duration 
+= float(ms
) 
3698 def prepend_extension(filename
, ext
, expected_real_ext
=None): 
3699     name
, real_ext 
= os
.path
.splitext(filename
) 
3701         '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
3702         if not expected_real_ext 
or real_ext
[1:] == expected_real_ext
 
3703         else '{0}.{1}'.format(filename
, ext
)) 
3706 def replace_extension(filename
, ext
, expected_real_ext
=None): 
3707     name
, real_ext 
= os
.path
.splitext(filename
) 
3708     return '{0}.{1}'.format( 
3709         name 
if not expected_real_ext 
or real_ext
[1:] == expected_real_ext 
else filename
, 
3713 def check_executable(exe
, args
=[]): 
3714     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
3715     args can be a list of arguments for a short output (like -version) """ 
3717         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
3723 def get_exe_version(exe
, args
=['--version'], 
3724                     version_re
=None, unrecognized
='present'): 
3725     """ Returns the version of the specified executable, 
3726     or False if the executable is not present """ 
3728         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers 
3729         # SIGTTOU if youtube-dl is run in the background. 
3730         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656 
3731         out
, _ 
= subprocess
.Popen( 
3732             [encodeArgument(exe
)] + args
, 
3733             stdin
=subprocess
.PIPE
, 
3734             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
3737     if isinstance(out
, bytes):  # Python 2.x 
3738         out 
= out
.decode('ascii', 'ignore') 
3739     return detect_exe_version(out
, version_re
, unrecognized
) 
3742 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
3743     assert isinstance(output
, compat_str
) 
3744     if version_re 
is None: 
3745         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
3746     m 
= re
.search(version_re
, output
) 
3753 class PagedList(object): 
3755         # This is only useful for tests 
3756         return len(self
.getslice()) 
3759 class OnDemandPagedList(PagedList
): 
3760     def __init__(self
, pagefunc
, pagesize
, use_cache
=True): 
3761         self
._pagefunc 
= pagefunc
 
3762         self
._pagesize 
= pagesize
 
3763         self
._use
_cache 
= use_cache
 
3767     def getslice(self
, start
=0, end
=None): 
3769         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
3770             firstid 
= pagenum 
* self
._pagesize
 
3771             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
3772             if start 
>= nextfirstid
: 
3777                 page_results 
= self
._cache
.get(pagenum
) 
3778             if page_results 
is None: 
3779                 page_results 
= list(self
._pagefunc
(pagenum
)) 
3781                 self
._cache
[pagenum
] = page_results
 
3784                 start 
% self
._pagesize
 
3785                 if firstid 
<= start 
< nextfirstid
 
3789                 ((end 
- 1) % self
._pagesize
) + 1 
3790                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
3793             if startv 
!= 0 or endv 
is not None: 
3794                 page_results 
= page_results
[startv
:endv
] 
3795             res
.extend(page_results
) 
3797             # A little optimization - if current page is not "full", ie. does 
3798             # not contain page_size videos then we can assume that this page 
3799             # is the last one - there are no more ids on further pages - 
3800             # i.e. no need to query again. 
3801             if len(page_results
) + startv 
< self
._pagesize
: 
3804             # If we got the whole page, but the next page is not interesting, 
3805             # break out early as well 
3806             if end 
== nextfirstid
: 
3811 class InAdvancePagedList(PagedList
): 
3812     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
3813         self
._pagefunc 
= pagefunc
 
3814         self
._pagecount 
= pagecount
 
3815         self
._pagesize 
= pagesize
 
3817     def getslice(self
, start
=0, end
=None): 
3819         start_page 
= start 
// self
._pagesize
 
3821             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
3822         skip_elems 
= start 
- start_page 
* self
._pagesize
 
3823         only_more 
= None if end 
is None else end 
- start
 
3824         for pagenum 
in range(start_page
, end_page
): 
3825             page 
= list(self
._pagefunc
(pagenum
)) 
3827                 page 
= page
[skip_elems
:] 
3829             if only_more 
is not None: 
3830                 if len(page
) < only_more
: 
3831                     only_more 
-= len(page
) 
3833                     page 
= page
[:only_more
] 
3840 def uppercase_escape(s
): 
3841     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
3843         r
'\\U[0-9a-fA-F]{8}', 
3844         lambda m
: unicode_escape(m
.group(0))[0], 
3848 def lowercase_escape(s
): 
3849     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
3851         r
'\\u[0-9a-fA-F]{4}', 
3852         lambda m
: unicode_escape(m
.group(0))[0], 
3856 def escape_rfc3986(s
): 
3857     """Escape non-ASCII characters as suggested by RFC 3986""" 
3858     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
3859         s 
= s
.encode('utf-8') 
3860     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
3863 def escape_url(url
): 
3864     """Escape URL as suggested by RFC 3986""" 
3865     url_parsed 
= compat_urllib_parse_urlparse(url
) 
3866     return url_parsed
._replace
( 
3867         netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'), 
3868         path
=escape_rfc3986(url_parsed
.path
), 
3869         params
=escape_rfc3986(url_parsed
.params
), 
3870         query
=escape_rfc3986(url_parsed
.query
), 
3871         fragment
=escape_rfc3986(url_parsed
.fragment
) 
3875 def read_batch_urls(batch_fd
): 
3877         if not isinstance(url
, compat_str
): 
3878             url 
= url
.decode('utf-8', 'replace') 
3879         BOM_UTF8 
= '\xef\xbb\xbf' 
3880         if url
.startswith(BOM_UTF8
): 
3881             url 
= url
[len(BOM_UTF8
):] 
3883         if url
.startswith(('#', ';', ']')): 
3887     with contextlib
.closing(batch_fd
) as fd
: 
3888         return [url 
for url 
in map(fixup
, fd
) if url
] 
3891 def urlencode_postdata(*args
, **kargs
): 
3892     return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii') 
3895 def update_url_query(url
, query
): 
3898     parsed_url 
= compat_urlparse
.urlparse(url
) 
3899     qs 
= compat_parse_qs(parsed_url
.query
) 
3901     return compat_urlparse
.urlunparse(parsed_url
._replace
( 
3902         query
=compat_urllib_parse_urlencode(qs
, True))) 
3905 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}): 
3906     req_headers 
= req
.headers
.copy() 
3907     req_headers
.update(headers
) 
3908     req_data 
= data 
or req
.data
 
3909     req_url 
= update_url_query(url 
or req
.get_full_url(), query
) 
3910     req_get_method 
= req
.get_method() 
3911     if req_get_method 
== 'HEAD': 
3912         req_type 
= HEADRequest
 
3913     elif req_get_method 
== 'PUT': 
3914         req_type 
= PUTRequest
 
3916         req_type 
= compat_urllib_request
.Request
 
3918         req_url
, data
=req_data
, headers
=req_headers
, 
3919         origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
) 
3920     if hasattr(req
, 'timeout'): 
3921         new_req
.timeout 
= req
.timeout
 
3925 def _multipart_encode_impl(data
, boundary
): 
3926     content_type 
= 'multipart/form-data; boundary=%s' % boundary
 
3929     for k
, v 
in data
.items(): 
3930         out 
+= b
'--' + boundary
.encode('ascii') + b
'\r\n' 
3931         if isinstance(k
, compat_str
): 
3932             k 
= k
.encode('utf-8') 
3933         if isinstance(v
, compat_str
): 
3934             v 
= v
.encode('utf-8') 
3935         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 
3936         # suggests sending UTF-8 directly. Firefox sends UTF-8, too 
3937         content 
= b
'Content-Disposition: form-data; name="' + k 
+ b
'"\r\n\r\n' + v 
+ b
'\r\n' 
3938         if boundary
.encode('ascii') in content
: 
3939             raise ValueError('Boundary overlaps with data') 
3942     out 
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n' 
3944     return out
, content_type
 
3947 def multipart_encode(data
, boundary
=None): 
3949     Encode a dict to RFC 7578-compliant form-data 
3952         A dict where keys and values can be either Unicode or bytes-like 
3955         If specified a Unicode object, it's used as the boundary. Otherwise 
3956         a random boundary is generated. 
3958     Reference: https://tools.ietf.org/html/rfc7578 
3960     has_specified_boundary 
= boundary 
is not None 
3963         if boundary 
is None: 
3964             boundary 
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff)) 
3967             out
, content_type 
= _multipart_encode_impl(data
, boundary
) 
3970             if has_specified_boundary
: 
3974     return out
, content_type
 
3977 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True): 
3978     if isinstance(key_or_keys
, (list, tuple)): 
3979         for key 
in key_or_keys
: 
3980             if key 
not in d 
or d
[key
] is None or skip_false_values 
and not d
[key
]: 
3984     return d
.get(key_or_keys
, default
) 
3987 def try_get(src
, getter
, expected_type
=None): 
3988     if not isinstance(getter
, (list, tuple)): 
3993         except (AttributeError, KeyError, TypeError, IndexError): 
3996             if expected_type 
is None or isinstance(v
, expected_type
): 
4000 def merge_dicts(*dicts
): 
4002     for a_dict 
in dicts
: 
4003         for k
, v 
in a_dict
.items(): 
4007                     or (isinstance(v
, compat_str
) and v
 
4008                         and isinstance(merged
[k
], compat_str
) 
4009                         and not merged
[k
])): 
4014 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'): 
4015     return string 
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
) 
4027 TV_PARENTAL_GUIDELINES 
= { 
4037 def parse_age_limit(s
): 
4039         return s 
if 0 <= s 
<= 21 else None 
4040     if not isinstance(s
, compat_basestring
): 
4042     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
4044         return int(m
.group('age')) 
4046         return US_RATINGS
[s
] 
4047     m 
= re
.match(r
'^TV[_-]?(%s)$' % '|'.join(k
[3:] for k 
in TV_PARENTAL_GUIDELINES
), s
) 
4049         return TV_PARENTAL_GUIDELINES
['TV-' + m
.group(1)] 
4053 def strip_jsonp(code
): 
4056             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*) 
4057             (?:\s*&&\s*(?P=func_name))? 
4058             \s*\(\s*(?P<callback_data>.*)\);? 
4059             \s*?(?://[^\n]*)*$''', 
4060         r
'\g<callback_data>', code
) 
4063 def js_to_json(code
): 
4064     COMMENT_RE 
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*' 
4065     SKIP_RE 
= r
'\s*(?:{comment})?\s*'.format(comment
=COMMENT_RE
) 
4067         (r
'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip
=SKIP_RE
), 16), 
4068         (r
'(?s)^(0+[0-7]+){skip}:?$'.format(skip
=SKIP_RE
), 8), 
4073         if v 
in ('true', 'false', 'null'): 
4075         elif v
.startswith('/*') or v
.startswith('//') or v 
== ',': 
4078         if v
[0] in ("'", '"'): 
4079             v 
= re
.sub(r
'(?s)\\.|"', lambda m
: { 
4084             }.get(m
.group(0), m
.group(0)), v
[1:-1]) 
4086         for regex
, base 
in INTEGER_TABLE
: 
4087             im 
= re
.match(regex
, v
) 
4089                 i 
= int(im
.group(1), base
) 
4090                 return '"%d":' % i 
if v
.endswith(':') else '%d' % i
 
4094     return re
.sub(r
'''(?sx) 
4095         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 
4096         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 
4097         {comment}|,(?={skip}[\]}}])| 
4098         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*| 
4099         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| 
4101         '''.format(comment
=COMMENT_RE
, skip
=SKIP_RE
), fix_kv
, code
) 
4104 def qualities(quality_ids
): 
4105     """ Get a numeric quality value out of a list of possible values """ 
4108             return quality_ids
.index(qid
) 
4114 DEFAULT_OUTTMPL 
= '%(title)s-%(id)s.%(ext)s' 
4117 def limit_length(s
, length
): 
4118     """ Add ellipses to overly long strings """ 
4123         return s
[:length 
- len(ELLIPSES
)] + ELLIPSES
 
4127 def version_tuple(v
): 
4128     return tuple(int(e
) for e 
in re
.split(r
'[-.]', v
)) 
4131 def is_outdated_version(version
, limit
, assume_new
=True): 
4133         return not assume_new
 
4135         return version_tuple(version
) < version_tuple(limit
) 
4137         return not assume_new
 
4140 def ytdl_is_updateable(): 
4141     """ Returns if youtube-dl can be updated with -U """ 
4142     from zipimport 
import zipimporter
 
4144     return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen') 
4147 def args_to_str(args
): 
4148     # Get a short string representation for a subprocess command 
4149     return ' '.join(compat_shlex_quote(a
) for a 
in args
) 
4152 def error_to_compat_str(err
): 
4154     # On python 2 error byte string must be decoded with proper 
4155     # encoding rather than ascii 
4156     if sys
.version_info
[0] < 3: 
4157         err_str 
= err_str
.decode(preferredencoding()) 
4161 def mimetype2ext(mt
): 
4167         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 
4168         # it's the most popular one 
4169         'audio/mpeg': 'mp3', 
4174     _
, _
, res 
= mt
.rpartition('/') 
4175     res 
= res
.split(';')[0].strip().lower() 
4179         'smptett+xml': 'tt', 
4183         'x-mp4-fragmented': 'mp4', 
4184         'x-ms-sami': 'sami', 
4187         'x-mpegurl': 'm3u8', 
4188         'vnd.apple.mpegurl': 'm3u8', 
4192         'vnd.ms-sstr+xml': 'ism', 
4198 def parse_codecs(codecs_str
): 
4199     # http://tools.ietf.org/html/rfc6381 
4202     splited_codecs 
= list(filter(None, map( 
4203         lambda str: str.strip(), codecs_str
.strip().strip(',').split(',')))) 
4204     vcodec
, acodec 
= None, None 
4205     for full_codec 
in splited_codecs
: 
4206         codec 
= full_codec
.split('.')[0] 
4207         if codec 
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): 
4210         elif codec 
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): 
4214             write_string('WARNING: Unknown codec %s\n' % full_codec
, sys
.stderr
) 
4215     if not vcodec 
and not acodec
: 
4216         if len(splited_codecs
) == 2: 
4218                 'vcodec': splited_codecs
[0], 
4219                 'acodec': splited_codecs
[1], 
4223             'vcodec': vcodec 
or 'none', 
4224             'acodec': acodec 
or 'none', 
4229 def urlhandle_detect_ext(url_handle
): 
4230     getheader 
= url_handle
.headers
.get
 
4232     cd 
= getheader('Content-Disposition') 
4234         m 
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
) 
4236             e 
= determine_ext(m
.group('filename'), default_ext
=None) 
4240     return mimetype2ext(getheader('Content-Type')) 
4243 def encode_data_uri(data
, mime_type
): 
4244     return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii')) 
4247 def age_restricted(content_limit
, age_limit
): 
4248     """ Returns True iff the content should be blocked """ 
4250     if age_limit 
is None:  # No limit set 
4252     if content_limit 
is None: 
4253         return False  # Content available for everyone 
4254     return age_limit 
< content_limit
 
4257 def is_html(first_bytes
): 
4258     """ Detect whether a file contains HTML by examining its first bytes. """ 
4261         (b
'\xef\xbb\xbf', 'utf-8'), 
4262         (b
'\x00\x00\xfe\xff', 'utf-32-be'), 
4263         (b
'\xff\xfe\x00\x00', 'utf-32-le'), 
4264         (b
'\xff\xfe', 'utf-16-le'), 
4265         (b
'\xfe\xff', 'utf-16-be'), 
4267     for bom
, enc 
in BOMS
: 
4268         if first_bytes
.startswith(bom
): 
4269             s 
= first_bytes
[len(bom
):].decode(enc
, 'replace') 
4272         s 
= first_bytes
.decode('utf-8', 'replace') 
4274     return re
.match(r
'^\s*<', s
) 
4277 def determine_protocol(info_dict
): 
4278     protocol 
= info_dict
.get('protocol') 
4279     if protocol 
is not None: 
4282     url 
= info_dict
['url'] 
4283     if url
.startswith('rtmp'): 
4285     elif url
.startswith('mms'): 
4287     elif url
.startswith('rtsp'): 
4290     ext 
= determine_ext(url
) 
4296     return compat_urllib_parse_urlparse(url
).scheme
 
4299 def render_table(header_row
, data
): 
4300     """ Render a list of rows, each as a list of values """ 
4301     table 
= [header_row
] + data
 
4302     max_lens 
= [max(len(compat_str(v
)) for v 
in col
) for col 
in zip(*table
)] 
4303     format_str 
= ' '.join('%-' + compat_str(ml 
+ 1) + 's' for ml 
in max_lens
[:-1]) + '%s' 
4304     return '\n'.join(format_str 
% tuple(row
) for row 
in table
) 
4307 def _match_one(filter_part
, dct
): 
4308     COMPARISON_OPERATORS 
= { 
4316     operator_rex 
= re
.compile(r
'''(?x)\s* 
4318         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
4320             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
4321             (?P<quote>["\'])(?P
<quotedstrval
>(?
:\\.|
(?
!(?P
=quote
)|
\\).)+?
)(?P
=quote
)|
 
4322             (?P
<strval
>(?
![0-9.])[a
-z0
-9A
-Z
]*) 
4325         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) 
4326     m = operator_rex.search(filter_part) 
4328         op = COMPARISON_OPERATORS[m.group('op')] 
4329         actual_value = dct.get(m.group('key')) 
4330         if (m.group('quotedstrval') is not None 
4331             or m.group('strval') is not None 
4332             # If the original field is a string and matching comparisonvalue is 
4333             # a number we should respect the origin of the original field 
4334             # and process comparison value as a string (see 
4335             # https://github.com/ytdl-org/youtube-dl/issues/11082). 
4336             or actual_value is not None and m.group('intval') is not None 
4337                 and isinstance(actual_value, compat_str)): 
4338             if m.group('op') not in ('=', '!='): 
4340                     'Operator %s does not support string values!' % m.group('op')) 
4341             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') 
4342             quote = m.group('quote') 
4343             if quote is not None: 
4344                 comparison_value = comparison_value.replace(r'\%s' % quote, quote) 
4347                 comparison_value = int(m.group('intval')) 
4349                 comparison_value = parse_filesize(m.group('intval')) 
4350                 if comparison_value is None: 
4351                     comparison_value = parse_filesize(m.group('intval') + 'B') 
4352                 if comparison_value is None: 
4354                         'Invalid integer value %r in filter part %r' % ( 
4355                             m.group('intval'), filter_part)) 
4356         if actual_value is None: 
4357             return m.group('none_inclusive') 
4358         return op(actual_value, comparison_value) 
4361         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None), 
4362         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None), 
4364     operator_rex = re.compile(r'''(?x
)\s
* 
4365         (?P
<op
>%s)\s
*(?P
<key
>[a
-z_
]+) 
4367         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) 
4368     m = operator_rex.search(filter_part) 
4370         op = UNARY_OPERATORS[m.group('op')] 
4371         actual_value = dct.get(m.group('key')) 
4372         return op(actual_value) 
4374     raise ValueError('Invalid filter part %r' % filter_part) 
4377 def match_str(filter_str, dct): 
4378     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
4381         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) 
4384 def match_filter_func(filter_str): 
4385     def _match_func(info_dict): 
4386         if match_str(filter_str, info_dict): 
4389             video_title = info_dict.get('title', info_dict.get('id', 'video')) 
4390             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) 
4394 def parse_dfxp_time_expr(time_expr): 
4398     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) 
4400         return float(mobj.group('time_offset')) 
4402     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr) 
4404         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.')) 
4407 def srt_subtitles_timecode(seconds): 
4408     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) 
4411 def dfxp2srt(dfxp_data): 
4413     @param dfxp_data A 
bytes-like 
object containing DFXP data
 
4414     @returns A 
unicode object containing converted SRT data
 
4416     LEGACY_NAMESPACES = ( 
4417         (b'http://www.w3.org/ns/ttml', [ 
4418             b'http://www.w3.org/2004/11/ttaf1', 
4419             b'http://www.w3.org/2006/04/ttaf1', 
4420             b'http://www.w3.org/2006/10/ttaf1', 
4422         (b'http://www.w3.org/ns/ttml#styling', [ 
4423             b'http://www.w3.org/ns/ttml#style', 
4427     SUPPORTED_STYLING = [ 
4436     _x = functools.partial(xpath_with_ns, ns_map={ 
4437         'xml': 'http://www.w3.org/XML/1998/namespace', 
4438         'ttml': 'http://www.w3.org/ns/ttml', 
4439         'tts': 'http://www.w3.org/ns/ttml#styling', 
4445     class TTMLPElementParser(object): 
4447         _unclosed_elements = [] 
4448         _applied_styles = [] 
4450         def start(self, tag, attrib): 
4451             if tag in (_x('ttml:br'), 'br'): 
4454                 unclosed_elements = [] 
4456                 element_style_id = attrib.get('style') 
4458                     style.update(default_style) 
4459                 if element_style_id: 
4460                     style.update(styles.get(element_style_id, {})) 
4461                 for prop in SUPPORTED_STYLING: 
4462                     prop_val = attrib.get(_x('tts:' + prop)) 
4464                         style[prop] = prop_val 
4467                     for k, v in sorted(style.items()): 
4468                         if self._applied_styles and self._applied_styles[-1].get(k) == v: 
4471                             font += ' color="%s"' % v 
4472                         elif k == 'fontSize': 
4473                             font += ' size="%s"' % v 
4474                         elif k == 'fontFamily': 
4475                             font += ' face="%s"' % v 
4476                         elif k == 'fontWeight' and v == 'bold': 
4478                             unclosed_elements.append('b') 
4479                         elif k == 'fontStyle' and v == 'italic': 
4481                             unclosed_elements.append('i') 
4482                         elif k == 'textDecoration' and v == 'underline': 
4484                             unclosed_elements.append('u') 
4486                         self._out += '<font' + font + '>' 
4487                         unclosed_elements.append('font') 
4489                     if self._applied_styles: 
4490                         applied_style.update(self._applied_styles[-1]) 
4491                     applied_style.update(style) 
4492                     self._applied_styles.append(applied_style) 
4493                 self._unclosed_elements.append(unclosed_elements) 
4496             if tag not in (_x('ttml:br'), 'br'): 
4497                 unclosed_elements = self._unclosed_elements.pop() 
4498                 for element in reversed(unclosed_elements): 
4499                     self._out += '</%s>' % element 
4500                 if unclosed_elements and self._applied_styles: 
4501                     self._applied_styles.pop() 
4503         def data(self, data): 
4507             return self._out.strip() 
4509     def parse_node(node): 
4510         target = TTMLPElementParser() 
4511         parser = xml.etree.ElementTree.XMLParser(target=target) 
4512         parser.feed(xml.etree.ElementTree.tostring(node)) 
4513         return parser.close() 
4515     for k, v in LEGACY_NAMESPACES: 
4517             dfxp_data = dfxp_data.replace(ns, k) 
4519     dfxp = compat_etree_fromstring(dfxp_data) 
4521     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') 
4524         raise ValueError('Invalid dfxp/TTML subtitle') 
4528         for style in dfxp.findall(_x('.//ttml:style')): 
4529             style_id = style.get('id') or style.get(_x('xml:id')) 
4532             parent_style_id = style.get('style') 
4534                 if parent_style_id not in styles: 
4537                 styles[style_id] = styles[parent_style_id].copy() 
4538             for prop in SUPPORTED_STYLING: 
4539                 prop_val = style.get(_x('tts:' + prop)) 
4541                     styles.setdefault(style_id, {})[prop] = prop_val 
4547     for p in ('body', 'div'): 
4548         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p]) 
4551         style = styles.get(ele.get('style')) 
4554         default_style.update(style) 
4556     for para, index in zip(paras, itertools.count(1)): 
4557         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) 
4558         end_time = parse_dfxp_time_expr(para.attrib.get('end')) 
4559         dur = parse_dfxp_time_expr(para.attrib.get('dur')) 
4560         if begin_time is None: 
4565             end_time = begin_time + dur 
4566         out.append('%d\n%s --> %s\n%s\n\n' % ( 
4568             srt_subtitles_timecode(begin_time), 
4569             srt_subtitles_timecode(end_time), 
4575 def cli_option(params, command_option, param): 
4576     param = params.get(param) 
4578         param = compat_str(param) 
4579     return [command_option, param] if param is not None else [] 
4582 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): 
4583     param = params.get(param) 
4586     assert isinstance(param, bool) 
4588         return [command_option + separator + (true_value if param else false_value)] 
4589     return [command_option, true_value if param else false_value] 
4592 def cli_valueless_option(params, command_option, param, expected_value=True): 
4593     param = params.get(param) 
4594     return [command_option] if param == expected_value else [] 
4597 def cli_configuration_args(params, param, default=[]): 
4598     ex_args = params.get(param) 
4601     assert isinstance(ex_args, list) 
4605 class ISO639Utils(object): 
4606     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 
4665         'iw': 'heb',  # Replaced by he in 1989 revision 
4675         'in': 'ind',  # Replaced by id in 1989 revision 
4790         'ji': 'yid',  # Replaced by yi in 1989 revision 
4798     def short2long(cls, code): 
4799         """Convert language code from ISO 639-1 to ISO 639-2/T""" 
4800         return cls._lang_map.get(code[:2]) 
4803     def long2short(cls, code): 
4804         """Convert language code from ISO 639-2/T to ISO 639-1""" 
4805         for short_name, long_name in cls._lang_map.items(): 
4806             if long_name == code: 
4810 class ISO3166Utils(object): 
4811     # From http://data.okfn.org/data/core/country-list 
4813         'AF': 'Afghanistan', 
4814         'AX': 'Ć
land Islands', 
4817         'AS': 'American Samoa', 
4822         'AG': 'Antigua and Barbuda', 
4839         'BO': 'Bolivia, Plurinational State of', 
4840         'BQ': 'Bonaire, Sint Eustatius and Saba', 
4841         'BA': 'Bosnia and Herzegovina', 
4843         'BV': 'Bouvet Island', 
4845         'IO': 'British Indian Ocean Territory', 
4846         'BN': 'Brunei Darussalam', 
4848         'BF': 'Burkina Faso', 
4854         'KY': 'Cayman Islands', 
4855         'CF': 'Central African Republic', 
4859         'CX': 'Christmas Island', 
4860         'CC': 'Cocos (Keeling) Islands', 
4864         'CD': 'Congo, the Democratic Republic of the', 
4865         'CK': 'Cook Islands', 
4867         'CI': 'CƓte d\'Ivoire', 
4872         'CZ': 'Czech Republic', 
4876         'DO': 'Dominican Republic', 
4879         'SV': 'El Salvador', 
4880         'GQ': 'Equatorial Guinea', 
4884         'FK': 'Falkland Islands (Malvinas)', 
4885         'FO': 'Faroe Islands', 
4889         'GF': 'French Guiana', 
4890         'PF': 'French Polynesia', 
4891         'TF': 'French Southern Territories', 
4906         'GW': 'Guinea-Bissau', 
4909         'HM': 'Heard Island and McDonald Islands', 
4910         'VA': 'Holy See (Vatican City State)', 
4917         'IR': 'Iran, Islamic Republic of', 
4920         'IM': 'Isle of Man', 
4930         'KP': 'Korea, Democratic People\'s Republic of', 
4931         'KR': 'Korea, Republic of', 
4934         'LA': 'Lao People\'s Democratic Republic', 
4940         'LI': 'Liechtenstein', 
4944         'MK': 'Macedonia, the Former Yugoslav Republic of', 
4951         'MH': 'Marshall Islands', 
4957         'FM': 'Micronesia, Federated States of', 
4958         'MD': 'Moldova, Republic of', 
4969         'NL': 'Netherlands', 
4970         'NC': 'New Caledonia', 
4971         'NZ': 'New Zealand', 
4976         'NF': 'Norfolk Island', 
4977         'MP': 'Northern Mariana Islands', 
4982         'PS': 'Palestine, State of', 
4984         'PG': 'Papua New Guinea', 
4987         'PH': 'Philippines', 
4991         'PR': 'Puerto Rico', 
4995         'RU': 'Russian Federation', 
4997         'BL': 'Saint BarthƩlemy', 
4998         'SH': 'Saint Helena, Ascension and Tristan da Cunha', 
4999         'KN': 'Saint Kitts and Nevis', 
5000         'LC': 'Saint Lucia', 
5001         'MF': 'Saint Martin (French part)', 
5002         'PM': 'Saint Pierre and Miquelon', 
5003         'VC': 'Saint Vincent and the Grenadines', 
5006         'ST': 'Sao Tome and Principe', 
5007         'SA': 'Saudi Arabia', 
5011         'SL': 'Sierra Leone', 
5013         'SX': 'Sint Maarten (Dutch part)', 
5016         'SB': 'Solomon Islands', 
5018         'ZA': 'South Africa', 
5019         'GS': 'South Georgia and the South Sandwich Islands', 
5020         'SS': 'South Sudan', 
5025         'SJ': 'Svalbard and Jan Mayen', 
5028         'CH': 'Switzerland', 
5029         'SY': 'Syrian Arab Republic', 
5030         'TW': 'Taiwan, Province of China', 
5032         'TZ': 'Tanzania, United Republic of', 
5034         'TL': 'Timor-Leste', 
5038         'TT': 'Trinidad and Tobago', 
5041         'TM': 'Turkmenistan', 
5042         'TC': 'Turks and Caicos Islands', 
5046         'AE': 'United Arab Emirates', 
5047         'GB': 'United Kingdom', 
5048         'US': 'United States', 
5049         'UM': 'United States Minor Outlying Islands', 
5053         'VE': 'Venezuela, Bolivarian Republic of', 
5055         'VG': 'Virgin Islands, British', 
5056         'VI': 'Virgin Islands, U.S.', 
5057         'WF': 'Wallis and Futuna', 
5058         'EH': 'Western Sahara', 
5065     def short2full(cls, code): 
5066         """Convert an ISO 3166-2 country code to the corresponding full name""" 
5067         return cls._country_map.get(code.upper()) 
5070 class GeoUtils(object): 
5071     # Major IPv4 address blocks per country 
5073         'AD': '46.172.224.0/19', 
5074         'AE': '94.200.0.0/13', 
5075         'AF': '149.54.0.0/17', 
5076         'AG': '209.59.64.0/18', 
5077         'AI': '204.14.248.0/21', 
5078         'AL': '46.99.0.0/16', 
5079         'AM': '46.70.0.0/15', 
5080         'AO': '105.168.0.0/13', 
5081         'AP': '182.50.184.0/21', 
5082         'AQ': '23.154.160.0/24', 
5083         'AR': '181.0.0.0/12', 
5084         'AS': '202.70.112.0/20', 
5085         'AT': '77.116.0.0/14', 
5086         'AU': '1.128.0.0/11', 
5087         'AW': '181.41.0.0/18', 
5088         'AX': '185.217.4.0/22', 
5089         'AZ': '5.197.0.0/16', 
5090         'BA': '31.176.128.0/17', 
5091         'BB': '65.48.128.0/17', 
5092         'BD': '114.130.0.0/16', 
5094         'BF': '102.178.0.0/15', 
5095         'BG': '95.42.0.0/15', 
5096         'BH': '37.131.0.0/17', 
5097         'BI': '154.117.192.0/18', 
5098         'BJ': '137.255.0.0/16', 
5099         'BL': '185.212.72.0/23', 
5100         'BM': '196.12.64.0/18', 
5101         'BN': '156.31.0.0/16', 
5102         'BO': '161.56.0.0/16', 
5103         'BQ': '161.0.80.0/20', 
5104         'BR': '191.128.0.0/12', 
5105         'BS': '24.51.64.0/18', 
5106         'BT': '119.2.96.0/19', 
5107         'BW': '168.167.0.0/16', 
5108         'BY': '178.120.0.0/13', 
5109         'BZ': '179.42.192.0/18', 
5110         'CA': '99.224.0.0/11', 
5111         'CD': '41.243.0.0/16', 
5112         'CF': '197.242.176.0/21', 
5113         'CG': '160.113.0.0/16', 
5114         'CH': '85.0.0.0/13', 
5115         'CI': '102.136.0.0/14', 
5116         'CK': '202.65.32.0/19', 
5117         'CL': '152.172.0.0/14', 
5118         'CM': '102.244.0.0/14', 
5119         'CN': '36.128.0.0/10', 
5120         'CO': '181.240.0.0/12', 
5121         'CR': '201.192.0.0/12', 
5122         'CU': '152.206.0.0/15', 
5123         'CV': '165.90.96.0/19', 
5124         'CW': '190.88.128.0/17', 
5125         'CY': '31.153.0.0/16', 
5126         'CZ': '88.100.0.0/14', 
5128         'DJ': '197.241.0.0/17', 
5129         'DK': '87.48.0.0/12', 
5130         'DM': '192.243.48.0/20', 
5131         'DO': '152.166.0.0/15', 
5132         'DZ': '41.96.0.0/12', 
5133         'EC': '186.68.0.0/15', 
5134         'EE': '90.190.0.0/15', 
5135         'EG': '156.160.0.0/11', 
5136         'ER': '196.200.96.0/20', 
5137         'ES': '88.0.0.0/11', 
5138         'ET': '196.188.0.0/14', 
5139         'EU': '2.16.0.0/13', 
5140         'FI': '91.152.0.0/13', 
5141         'FJ': '144.120.0.0/16', 
5142         'FK': '80.73.208.0/21', 
5143         'FM': '119.252.112.0/20', 
5144         'FO': '88.85.32.0/19', 
5146         'GA': '41.158.0.0/15', 
5148         'GD': '74.122.88.0/21', 
5149         'GE': '31.146.0.0/16', 
5150         'GF': '161.22.64.0/18', 
5151         'GG': '62.68.160.0/19', 
5152         'GH': '154.160.0.0/12', 
5153         'GI': '95.164.0.0/16', 
5154         'GL': '88.83.0.0/19', 
5155         'GM': '160.182.0.0/15', 
5156         'GN': '197.149.192.0/18', 
5157         'GP': '104.250.0.0/19', 
5158         'GQ': '105.235.224.0/20', 
5159         'GR': '94.64.0.0/13', 
5160         'GT': '168.234.0.0/16', 
5161         'GU': '168.123.0.0/16', 
5162         'GW': '197.214.80.0/20', 
5163         'GY': '181.41.64.0/18', 
5164         'HK': '113.252.0.0/14', 
5165         'HN': '181.210.0.0/16', 
5166         'HR': '93.136.0.0/13', 
5167         'HT': '148.102.128.0/17', 
5168         'HU': '84.0.0.0/14', 
5169         'ID': '39.192.0.0/10', 
5170         'IE': '87.32.0.0/12', 
5171         'IL': '79.176.0.0/13', 
5172         'IM': '5.62.80.0/20', 
5173         'IN': '117.192.0.0/10', 
5174         'IO': '203.83.48.0/21', 
5175         'IQ': '37.236.0.0/14', 
5176         'IR': '2.176.0.0/12', 
5177         'IS': '82.221.0.0/16', 
5178         'IT': '79.0.0.0/10', 
5179         'JE': '87.244.64.0/18', 
5180         'JM': '72.27.0.0/17', 
5181         'JO': '176.29.0.0/16', 
5182         'JP': '133.0.0.0/8', 
5183         'KE': '105.48.0.0/12', 
5184         'KG': '158.181.128.0/17', 
5185         'KH': '36.37.128.0/17', 
5186         'KI': '103.25.140.0/22', 
5187         'KM': '197.255.224.0/20', 
5188         'KN': '198.167.192.0/19', 
5189         'KP': '175.45.176.0/22', 
5190         'KR': '175.192.0.0/10', 
5191         'KW': '37.36.0.0/14', 
5192         'KY': '64.96.0.0/15', 
5193         'KZ': '2.72.0.0/13', 
5194         'LA': '115.84.64.0/18', 
5195         'LB': '178.135.0.0/16', 
5196         'LC': '24.92.144.0/20', 
5197         'LI': '82.117.0.0/19', 
5198         'LK': '112.134.0.0/15', 
5199         'LR': '102.183.0.0/16', 
5200         'LS': '129.232.0.0/17', 
5201         'LT': '78.56.0.0/13', 
5202         'LU': '188.42.0.0/16', 
5203         'LV': '46.109.0.0/16', 
5204         'LY': '41.252.0.0/14', 
5205         'MA': '105.128.0.0/11', 
5206         'MC': '88.209.64.0/18', 
5207         'MD': '37.246.0.0/16', 
5208         'ME': '178.175.0.0/17', 
5209         'MF': '74.112.232.0/21', 
5210         'MG': '154.126.0.0/17', 
5211         'MH': '117.103.88.0/21', 
5212         'MK': '77.28.0.0/15', 
5213         'ML': '154.118.128.0/18', 
5214         'MM': '37.111.0.0/17', 
5215         'MN': '49.0.128.0/17', 
5216         'MO': '60.246.0.0/16', 
5217         'MP': '202.88.64.0/20', 
5218         'MQ': '109.203.224.0/19', 
5219         'MR': '41.188.64.0/18', 
5220         'MS': '208.90.112.0/22', 
5221         'MT': '46.11.0.0/16', 
5222         'MU': '105.16.0.0/12', 
5223         'MV': '27.114.128.0/18', 
5224         'MW': '102.70.0.0/15', 
5225         'MX': '187.192.0.0/11', 
5226         'MY': '175.136.0.0/13', 
5227         'MZ': '197.218.0.0/15', 
5228         'NA': '41.182.0.0/16', 
5229         'NC': '101.101.0.0/18', 
5230         'NE': '197.214.0.0/18', 
5231         'NF': '203.17.240.0/22', 
5232         'NG': '105.112.0.0/12', 
5233         'NI': '186.76.0.0/15', 
5234         'NL': '145.96.0.0/11', 
5235         'NO': '84.208.0.0/13', 
5236         'NP': '36.252.0.0/15', 
5237         'NR': '203.98.224.0/19', 
5238         'NU': '49.156.48.0/22', 
5239         'NZ': '49.224.0.0/14', 
5240         'OM': '5.36.0.0/15', 
5241         'PA': '186.72.0.0/15', 
5242         'PE': '186.160.0.0/14', 
5243         'PF': '123.50.64.0/18', 
5244         'PG': '124.240.192.0/19', 
5245         'PH': '49.144.0.0/13', 
5246         'PK': '39.32.0.0/11', 
5247         'PL': '83.0.0.0/11', 
5248         'PM': '70.36.0.0/20', 
5249         'PR': '66.50.0.0/16', 
5250         'PS': '188.161.0.0/16', 
5251         'PT': '85.240.0.0/13', 
5252         'PW': '202.124.224.0/20', 
5253         'PY': '181.120.0.0/14', 
5254         'QA': '37.210.0.0/15', 
5255         'RE': '102.35.0.0/16', 
5256         'RO': '79.112.0.0/13', 
5257         'RS': '93.86.0.0/15', 
5258         'RU': '5.136.0.0/13', 
5259         'RW': '41.186.0.0/16', 
5260         'SA': '188.48.0.0/13', 
5261         'SB': '202.1.160.0/19', 
5262         'SC': '154.192.0.0/11', 
5263         'SD': '102.120.0.0/13', 
5264         'SE': '78.64.0.0/12', 
5265         'SG': '8.128.0.0/10', 
5266         'SI': '188.196.0.0/14', 
5267         'SK': '78.98.0.0/15', 
5268         'SL': '102.143.0.0/17', 
5269         'SM': '89.186.32.0/19', 
5270         'SN': '41.82.0.0/15', 
5271         'SO': '154.115.192.0/18', 
5272         'SR': '186.179.128.0/17', 
5273         'SS': '105.235.208.0/21', 
5274         'ST': '197.159.160.0/19', 
5275         'SV': '168.243.0.0/16', 
5276         'SX': '190.102.0.0/20', 
5278         'SZ': '41.84.224.0/19', 
5279         'TC': '65.255.48.0/20', 
5280         'TD': '154.68.128.0/19', 
5281         'TG': '196.168.0.0/14', 
5282         'TH': '171.96.0.0/13', 
5283         'TJ': '85.9.128.0/18', 
5284         'TK': '27.96.24.0/21', 
5285         'TL': '180.189.160.0/20', 
5286         'TM': '95.85.96.0/19', 
5287         'TN': '197.0.0.0/11', 
5288         'TO': '175.176.144.0/21', 
5289         'TR': '78.160.0.0/11', 
5290         'TT': '186.44.0.0/15', 
5291         'TV': '202.2.96.0/19', 
5292         'TW': '120.96.0.0/11', 
5293         'TZ': '156.156.0.0/14', 
5294         'UA': '37.52.0.0/14', 
5295         'UG': '102.80.0.0/13', 
5297         'UY': '167.56.0.0/13', 
5298         'UZ': '84.54.64.0/18', 
5299         'VA': '212.77.0.0/19', 
5300         'VC': '207.191.240.0/21', 
5301         'VE': '186.88.0.0/13', 
5302         'VG': '66.81.192.0/20', 
5303         'VI': '146.226.0.0/16', 
5304         'VN': '14.160.0.0/11', 
5305         'VU': '202.80.32.0/20', 
5306         'WF': '117.20.32.0/21', 
5307         'WS': '202.4.32.0/19', 
5308         'YE': '134.35.0.0/16', 
5309         'YT': '41.242.116.0/22', 
5310         'ZA': '41.0.0.0/11', 
5311         'ZM': '102.144.0.0/13', 
5312         'ZW': '102.177.192.0/18', 
5316     def random_ipv4(cls, code_or_block): 
5317         if len(code_or_block) == 2: 
5318             block = cls._country_ip_map.get(code_or_block.upper()) 
5322             block = code_or_block 
5323         addr, preflen = block.split('/') 
5324         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] 
5325         addr_max = addr_min | (0xffffffff >> int(preflen)) 
5326         return compat_str(socket.inet_ntoa( 
5327             compat_struct_pack('!L', random.randint(addr_min, addr_max)))) 
5330 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): 
5331     def __init__(self, proxies=None): 
5332         # Set default handlers 
5333         for type in ('http', 'https'): 
5334             setattr(self, '%s_open' % type, 
5335                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: 
5336                         meth(r, proxy, type)) 
5337         compat_urllib_request.ProxyHandler.__init__(self, proxies) 
5339     def proxy_open(self, req, proxy, type): 
5340         req_proxy = req.headers.get('Ytdl-request-proxy') 
5341         if req_proxy is not None: 
5343             del req.headers['Ytdl-request-proxy'] 
5345         if proxy == '__noproxy__': 
5346             return None  # No Proxy 
5347         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 
5348             req.add_header('Ytdl-socks-proxy', proxy) 
5349             # youtube-dl's http/https handlers do wrapping the socket with socks 
5351         return compat_urllib_request.ProxyHandler.proxy_open( 
5352             self, req, proxy, type) 
5355 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is 
5356 # released into Public Domain 
5357 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 
5359 def long_to_bytes(n, blocksize=0): 
5360     """long_to_bytes(n:long, blocksize:int) : string 
5361     Convert a long integer to a byte string. 
5363     If optional blocksize is given and greater than zero, pad the front of the 
5364     byte string with binary zeros so that the length is a multiple of 
5367     # after much testing, this algorithm was deemed to be the fastest 
5371         s = compat_struct_pack('>I', n & 0xffffffff) + s 
5373     # strip off leading zeros 
5374     for i in range(len(s)): 
5375         if s[i] != b'\000'[0]: 
5378         # only happens when n == 0 
5382     # add back some pad bytes.  this could be done more efficiently w.r.t. the 
5383     # de-padding being done above, but sigh... 
5384     if blocksize > 0 and len(s) % blocksize: 
5385         s = (blocksize - len(s) % blocksize) * b'\000' + s 
5389 def bytes_to_long(s): 
5390     """bytes_to_long(string) : long 
5391     Convert a byte string to a long integer. 
5393     This is (essentially) the inverse of long_to_bytes(). 
5398         extra = (4 - length % 4) 
5399         s = b'\000' * extra + s 
5400         length = length + extra 
5401     for i in range(0, length, 4): 
5402         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0] 
5406 def ohdave_rsa_encrypt(data, exponent, modulus): 
5408     Implement OHDave
's RSA algorithm. See http://www.ohdave.com/rsa/ 
5411         data: data to encrypt, bytes-like object 
5412         exponent, modulus: parameter e and N of RSA algorithm, both integer 
5413     Output: hex string of encrypted data 
5415     Limitation: supports one block encryption only 
5418     payload = int(binascii.hexlify(data[::-1]), 16) 
5419     encrypted = pow(payload, exponent, modulus) 
5420     return '%x' % encrypted 
5423 def pkcs1pad(data, length): 
5425     Padding input data with PKCS#1 scheme 
5427     @param {int[]} data        input data 
5428     @param {int}   length      target length 
5429     @returns {int[]}           padded data 
5431     if len(data) > length - 11: 
5432         raise ValueError('Input data too 
long for PKCS
#1 padding') 
5434     pseudo_random 
= [random
.randint(0, 254) for _ 
in range(length 
- len(data
) - 3)] 
5435     return [0, 2] + pseudo_random 
+ [0] + data
 
5438 def encode_base_n(num
, n
, table
=None): 
5439     FULL_TABLE 
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 
5441         table 
= FULL_TABLE
[:n
] 
5444         raise ValueError('base %d exceeds table length %d' % (n
, len(table
))) 
5451         ret 
= table
[num 
% n
] + ret
 
5456 def decode_packed_codes(code
): 
5457     mobj 
= re
.search(PACKED_CODES_RE
, code
) 
5458     obfucasted_code
, base
, count
, symbols 
= mobj
.groups() 
5461     symbols 
= symbols
.split('|') 
5466         base_n_count 
= encode_base_n(count
, base
) 
5467         symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
 
5470         r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)], 
5474 def caesar(s
, alphabet
, shift
): 
5479         alphabet
[(alphabet
.index(c
) + shift
) % l
] if c 
in alphabet 
else c
 
5484     return caesar(s
, r
'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47) 
5487 def parse_m3u8_attributes(attrib
): 
5489     for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
): 
5490         if val
.startswith('"'): 
5496 def urshift(val
, n
): 
5497     return val 
>> n 
if val 
>= 0 else (val 
+ 0x100000000) >> n
 
5500 # Based on png2str() written by @gdkchan and improved by @yokrysty 
5501 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706 
5502 def decode_png(png_data
): 
5503     # Reference: https://www.w3.org/TR/PNG/ 
5504     header 
= png_data
[8:] 
5506     if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR': 
5507         raise IOError('Not a valid PNG file.') 
5509     int_map 
= {1: '>B', 2: '>H', 4: '>I'} 
5510     unpack_integer 
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0] 
5515         length 
= unpack_integer(header
[:4]) 
5518         chunk_type 
= header
[:4] 
5521         chunk_data 
= header
[:length
] 
5522         header 
= header
[length
:] 
5524         header 
= header
[4:]  # Skip CRC 
5532     ihdr 
= chunks
[0]['data'] 
5534     width 
= unpack_integer(ihdr
[:4]) 
5535     height 
= unpack_integer(ihdr
[4:8]) 
5539     for chunk 
in chunks
: 
5540         if chunk
['type'] == b
'IDAT': 
5541             idat 
+= chunk
['data'] 
5544         raise IOError('Unable to read PNG data.') 
5546     decompressed_data 
= bytearray(zlib
.decompress(idat
)) 
5551     def _get_pixel(idx
): 
5556     for y 
in range(height
): 
5557         basePos 
= y 
* (1 + stride
) 
5558         filter_type 
= decompressed_data
[basePos
] 
5562         pixels
.append(current_row
) 
5564         for x 
in range(stride
): 
5565             color 
= decompressed_data
[1 + basePos 
+ x
] 
5566             basex 
= y 
* stride 
+ x
 
5571                 left 
= _get_pixel(basex 
- 3) 
5573                 up 
= _get_pixel(basex 
- stride
) 
5575             if filter_type 
== 1:  # Sub 
5576                 color 
= (color 
+ left
) & 0xff 
5577             elif filter_type 
== 2:  # Up 
5578                 color 
= (color 
+ up
) & 0xff 
5579             elif filter_type 
== 3:  # Average 
5580                 color 
= (color 
+ ((left 
+ up
) >> 1)) & 0xff 
5581             elif filter_type 
== 4:  # Paeth 
5587                     c 
= _get_pixel(basex 
- stride 
- 3) 
5595                 if pa 
<= pb 
and pa 
<= pc
: 
5596                     color 
= (color 
+ a
) & 0xff 
5598                     color 
= (color 
+ b
) & 0xff 
5600                     color 
= (color 
+ c
) & 0xff 
5602             current_row
.append(color
) 
5604     return width
, height
, pixels
 
5607 def write_xattr(path
, key
, value
): 
5608     # This mess below finds the best xattr tool for the job 
5610         # try the pyxattr module... 
5613         if hasattr(xattr
, 'set'):  # pyxattr 
5614             # Unicode arguments are not supported in python-pyxattr until 
5616             # See https://github.com/ytdl-org/youtube-dl/issues/5498 
5617             pyxattr_required_version 
= '0.5.0' 
5618             if version_tuple(xattr
.__version
__) < version_tuple(pyxattr_required_version
): 
5619                 # TODO: fallback to CLI tools 
5620                 raise XAttrUnavailableError( 
5621                     'python-pyxattr is detected but is too old. ' 
5622                     'youtube-dl requires %s or above while your version is %s. ' 
5623                     'Falling back to other xattr implementations' % ( 
5624                         pyxattr_required_version
, xattr
.__version
__)) 
5626             setxattr 
= xattr
.set 
5628             setxattr 
= xattr
.setxattr
 
5631             setxattr(path
, key
, value
) 
5632         except EnvironmentError as e
: 
5633             raise XAttrMetadataError(e
.errno
, e
.strerror
) 
5636         if compat_os_name 
== 'nt': 
5637             # Write xattrs to NTFS Alternate Data Streams: 
5638             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 
5639             assert ':' not in key
 
5640             assert os
.path
.exists(path
) 
5642             ads_fn 
= path 
+ ':' + key
 
5644                 with open(ads_fn
, 'wb') as f
: 
5646             except EnvironmentError as e
: 
5647                 raise XAttrMetadataError(e
.errno
, e
.strerror
) 
5649             user_has_setfattr 
= check_executable('setfattr', ['--version']) 
5650             user_has_xattr 
= check_executable('xattr', ['-h']) 
5652             if user_has_setfattr 
or user_has_xattr
: 
5654                 value 
= value
.decode('utf-8') 
5655                 if user_has_setfattr
: 
5656                     executable 
= 'setfattr' 
5657                     opts 
= ['-n', key
, '-v', value
] 
5658                 elif user_has_xattr
: 
5659                     executable 
= 'xattr' 
5660                     opts 
= ['-w', key
, value
] 
5662                 cmd 
= ([encodeFilename(executable
, True)] 
5663                        + [encodeArgument(o
) for o 
in opts
] 
5664                        + [encodeFilename(path
, True)]) 
5667                     p 
= subprocess
.Popen( 
5668                         cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
) 
5669                 except EnvironmentError as e
: 
5670                     raise XAttrMetadataError(e
.errno
, e
.strerror
) 
5671                 stdout
, stderr 
= p
.communicate() 
5672                 stderr 
= stderr
.decode('utf-8', 'replace') 
5673                 if p
.returncode 
!= 0: 
5674                     raise XAttrMetadataError(p
.returncode
, stderr
) 
5677                 # On Unix, and can't find pyxattr, setfattr, or xattr. 
5678                 if sys
.platform
.startswith('linux'): 
5679                     raise XAttrUnavailableError( 
5680                         "Couldn't find a tool to set the xattrs. " 
5681                         "Install either the python 'pyxattr' or 'xattr' " 
5682                         "modules, or the GNU 'attr' package " 
5683                         "(which contains the 'setfattr' tool).") 
5685                     raise XAttrUnavailableError( 
5686                         "Couldn't find a tool to set the xattrs. " 
5687                         "Install either the python 'xattr' module, " 
5688                         "or the 'xattr' binary.") 
5691 def random_birthday(year_field
, month_field
, day_field
): 
5692     start_date 
= datetime
.date(1950, 1, 1) 
5693     end_date 
= datetime
.date(1995, 12, 31) 
5694     offset 
= random
.randint(0, (end_date 
- start_date
).days
) 
5695     random_date 
= start_date 
+ datetime
.timedelta(offset
) 
5697         year_field
: str(random_date
.year
), 
5698         month_field
: str(random_date
.month
), 
5699         day_field
: str(random_date
.day
),