4 from __future__ 
import unicode_literals
 
  33 import xml
.etree
.ElementTree
 
  40     compat_etree_fromstring
, 
  42     compat_html_entities_html5
, 
  48     compat_socket_create_connection
, 
  54     compat_urllib_parse_urlencode
, 
  55     compat_urllib_parse_urlparse
, 
  56     compat_urllib_parse_unquote_plus
, 
  57     compat_urllib_request
, 
  68 def register_socks_protocols(): 
  69     # "Register" SOCKS protocols 
  70     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 
  71     # URLs with protocols not in urlparse.uses_netloc are not handled correctly 
  72     for scheme 
in ('socks', 'socks4', 'socks4a', 'socks5'): 
  73         if scheme 
not in compat_urlparse
.uses_netloc
: 
  74             compat_urlparse
.uses_netloc
.append(scheme
) 
  77 # This is not clearly defined otherwise 
  78 compiled_regex_type 
= type(re
.compile('')) 
  81     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', 
  82     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  83     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  84     'Accept-Encoding': 'gzip, deflate', 
  85     'Accept-Language': 'en-us,en;q=0.5', 
  91 ENGLISH_MONTH_NAMES 
= [ 
  92     'January', 'February', 'March', 'April', 'May', 'June', 
  93     'July', 'August', 'September', 'October', 'November', 'December'] 
  96     'en': ENGLISH_MONTH_NAMES
, 
  98         'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 
  99         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], 
 103     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 
 104     'flv', 'f4v', 'f4a', 'f4b', 
 105     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 
 106     'mkv', 'mka', 'mk3d', 
 115     'f4f', 'f4m', 'm3u8', 'smil') 
 117 # needed for sanitizing filenames in restricted mode 
 118 ACCENT_CHARS 
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', 
 119                         itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], 
 120                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 
 136     '%Y-%m-%d %H:%M:%S.%f', 
 139     '%Y-%m-%dT%H:%M:%SZ', 
 140     '%Y-%m-%dT%H:%M:%S.%fZ', 
 141     '%Y-%m-%dT%H:%M:%S.%f0Z', 
 143     '%Y-%m-%dT%H:%M:%S.%f', 
 146     '%b %d %Y at %H:%M:%S', 
 149 DATE_FORMATS_DAY_FIRST 
= list(DATE_FORMATS
) 
 150 DATE_FORMATS_DAY_FIRST
.extend([ 
 159 DATE_FORMATS_MONTH_FIRST 
= list(DATE_FORMATS
) 
 160 DATE_FORMATS_MONTH_FIRST
.extend([ 
 168 PACKED_CODES_RE 
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" 
 171 def preferredencoding(): 
 172     """Get preferred encoding. 
 174     Returns the best encoding scheme for the system, based on 
 175     locale.getpreferredencoding() and some further tweaks. 
 178         pref 
= locale
.getpreferredencoding() 
 186 def write_json_file(obj
, fn
): 
 187     """ Encode obj as JSON and write it to fn, atomically if possible """ 
 189     fn 
= encodeFilename(fn
) 
 190     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
 191         encoding 
= get_filesystem_encoding() 
 192         # os.path.basename returns a bytes object, but NamedTemporaryFile 
 193         # will fail if the filename contains non ascii characters unless we 
 194         # use a unicode object 
 195         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
 196         # the same for os.path.dirname 
 197         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
 199         path_basename 
= os
.path
.basename
 
 200         path_dirname 
= os
.path
.dirname
 
 204         'prefix': path_basename(fn
) + '.', 
 205         'dir': path_dirname(fn
), 
 209     # In Python 2.x, json.dump expects a bytestream. 
 210     # In Python 3.x, it writes to a character stream 
 211     if sys
.version_info 
< (3, 0): 
 219     tf 
= tempfile
.NamedTemporaryFile(**compat_kwargs(args
)) 
 224         if sys
.platform 
== 'win32': 
 225             # Need to remove existing file on Windows, else os.rename raises 
 226             # WindowsError or FileExistsError. 
 231         os
.rename(tf
.name
, fn
) 
 240 if sys
.version_info 
>= (2, 7): 
 241     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 242         """ Find the xpath xpath[@key=val] """ 
 243         assert re
.match(r
'^[a-zA-Z_-]+$', key
) 
 244         expr 
= xpath 
+ ('[@%s]' % key 
if val 
is None else "[@%s='%s']" % (key
, val
)) 
 245         return node
.find(expr
) 
 247     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 248         for f 
in node
.findall(compat_xpath(xpath
)): 
 249             if key 
not in f
.attrib
: 
 251             if val 
is None or f
.attrib
.get(key
) == val
: 
 255 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 256 # the namespace parameter 
 259 def xpath_with_ns(path
, ns_map
): 
 260     components 
= [c
.split(':') for c 
in path
.split('/')] 
 264             replaced
.append(c
[0]) 
 267             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 268     return '/'.join(replaced
) 
 271 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 272     def _find_xpath(xpath
): 
 273         return node
.find(compat_xpath(xpath
)) 
 275     if isinstance(xpath
, (str, compat_str
)): 
 276         n 
= _find_xpath(xpath
) 
 284         if default 
is not NO_DEFAULT
: 
 287             name 
= xpath 
if name 
is None else name
 
 288             raise ExtractorError('Could not find XML element %s' % name
) 
 294 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 295     n 
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
) 
 296     if n 
is None or n 
== default
: 
 299         if default 
is not NO_DEFAULT
: 
 302             name 
= xpath 
if name 
is None else name
 
 303             raise ExtractorError('Could not find XML element\'s text %s' % name
) 
 309 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 310     n 
= find_xpath_attr(node
, xpath
, key
) 
 312         if default 
is not NO_DEFAULT
: 
 315             name 
= '%s[@%s]' % (xpath
, key
) if name 
is None else name
 
 316             raise ExtractorError('Could not find XML attribute %s' % name
) 
 322 def get_element_by_id(id, html
): 
 323     """Return the content of the tag with the specified ID in the passed HTML document""" 
 324     return get_element_by_attribute('id', id, html
) 
 327 def get_element_by_class(class_name
, html
): 
 328     return get_element_by_attribute( 
 329         'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 
 330         html, escape_value=False) 
 333 def get_element_by_attribute(attribute, value, html, escape_value=True): 
 334     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 336     value = re.escape(value) if escape_value else value 
 338     m = re.search(r'''(?xs) 
 340          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'))*?
 
 342          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'))*? 
 346     ''' % (re.escape(attribute), value), html) 
 350     res = m.group('content
') 
 352     if res.startswith('"') or res.startswith("'"): 
 355     return unescapeHTML(res) 
 358 class HTMLAttributeParser(compat_HTMLParser): 
 359     """Trivial HTML parser to gather the attributes for a single element""" 
 362         compat_HTMLParser.__init__(self) 
 364     def handle_starttag(self, tag, attrs): 
 365         self.attrs = dict(attrs) 
 368 def extract_attributes(html_element): 
 369     """Given a string for an HTML element such as 
 371          a="foo" B="bar" c="&98;az" d=boz 
 372          empty= noval entity="&" 
 375     Decode and return a dictionary of attributes. 
 377         'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
', 
 378         'empty
': '', 'noval
': None, 'entity
': '&', 
 379         'sq
': '"', 'dq': '\'' 
 381     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 
 382     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 
 384     parser = HTMLAttributeParser() 
 385     parser.feed(html_element) 
 390 def clean_html(html): 
 391     """Clean an HTML snippet into a readable string""" 
 393     if html is None:  # Convenience for sanitizing descriptions etc. 
 397     html = html.replace('\n', ' ') 
 398     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) 
 399     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 
 401     html = re.sub('<.*?>', '', html) 
 402     # Replace html entities 
 403     html = unescapeHTML(html) 
 407 def sanitize_open(filename, open_mode): 
 408     """Try to open the given filename, and slightly tweak it if this fails. 
 410     Attempts to open the given filename. If this fails, it tries to change 
 411     the filename slightly, step by step, until it's either able to open it 
 412     or it fails and raises a final exception, like the standard open() 
 415     It returns the tuple (stream, definitive_file_name). 
 419             if sys.platform == 'win32': 
 421                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 
 422             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 
 423         stream = open(encodeFilename(filename), open_mode) 
 424         return (stream, filename) 
 425     except (IOError, OSError) as err: 
 426         if err.errno in (errno.EACCES,): 
 429         # In case of error, try to remove win32 forbidden chars 
 430         alt_filename = sanitize_path(filename) 
 431         if alt_filename == filename: 
 434             # An exception here should be caught in the caller 
 435             stream = open(encodeFilename(alt_filename), open_mode) 
 436             return (stream, alt_filename) 
 439 def timeconvert(timestr): 
 440     """Convert RFC 2822 defined time string into system timestamp""" 
 442     timetuple = email.utils.parsedate_tz(timestr) 
 443     if timetuple is not None: 
 444         timestamp = email.utils.mktime_tz(timetuple) 
 448 def sanitize_filename(s, restricted=False, is_id=False): 
 449     """Sanitizes a string so it could be used as part of a filename. 
 450     If restricted is set, use a stricter subset of allowed characters. 
 451     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible 
 453     def replace_insane(char): 
 454         if restricted and char in ACCENT_CHARS: 
 455             return ACCENT_CHARS[char] 
 456         if char == '?' or ord(char) < 32 or ord(char) == 127: 
 459             return '' if restricted else '\'' 
 461             return '_
-' if restricted else ' -' 
 462         elif char in '\\/|
*<>': 
 464         if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()): 
 466         if restricted 
and ord(char
) > 127: 
 471     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 472     result 
= ''.join(map(replace_insane
, s
)) 
 474         while '__' in result
: 
 475             result 
= result
.replace('__', '_') 
 476         result 
= result
.strip('_') 
 477         # Common case of "Foreign band name - English song title" 
 478         if restricted 
and result
.startswith('-_'): 
 480         if result
.startswith('-'): 
 481             result 
= '_' + result
[len('-'):] 
 482         result 
= result
.lstrip('.') 
 488 def sanitize_path(s
): 
 489     """Sanitizes and normalizes path on Windows""" 
 490     if sys
.platform 
!= 'win32': 
 492     drive_or_unc
, _ 
= os
.path
.splitdrive(s
) 
 493     if sys
.version_info 
< (2, 7) and not drive_or_unc
: 
 494         drive_or_unc
, _ 
= os
.path
.splitunc(s
) 
 495     norm_path 
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
) 
 499         path_part 
if path_part 
in ['.', '..'] else re
.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part
) 
 500         for path_part 
in norm_path
] 
 502         sanitized_path
.insert(0, drive_or_unc 
+ os
.path
.sep
) 
 503     return os
.path
.join(*sanitized_path
) 
 506 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of 
 507 # unwanted failures due to missing protocol 
 508 def sanitize_url(url
): 
 509     return 'http:%s' % url 
if url
.startswith('//') else url
 
 512 def sanitized_Request(url
, *args
, **kwargs
): 
 513     return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
) 
 516 def orderedSet(iterable
): 
 517     """ Remove all duplicates from the input iterable """ 
 525 def _htmlentity_transform(entity_with_semicolon
): 
 526     """Transforms an HTML entity to a character.""" 
 527     entity 
= entity_with_semicolon
[:-1] 
 529     # Known non-numeric HTML entity 
 530     if entity 
in compat_html_entities
.name2codepoint
: 
 531         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 533     # TODO: HTML5 allows entities without a semicolon. For example, 
 534     # 'Éric' should be decoded as 'Éric'. 
 535     if entity_with_semicolon 
in compat_html_entities_html5
: 
 536         return compat_html_entities_html5
[entity_with_semicolon
] 
 538     mobj 
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
) 
 540         numstr 
= mobj
.group(1) 
 541         if numstr
.startswith('x'): 
 543             numstr 
= '0%s' % numstr
 
 546         # See https://github.com/rg3/youtube-dl/issues/7518 
 548             return compat_chr(int(numstr
, base
)) 
 552     # Unknown entity in name, return its literal representation 
 553     return '&%s;' % entity
 
 559     assert type(s
) == compat_str
 
 562         r
'&([^;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 565 def get_subprocess_encoding(): 
 566     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 567         # For subprocess calls, encode with locale encoding 
 568         # Refer to http://stackoverflow.com/a/9951851/35070 
 569         encoding 
= preferredencoding() 
 571         encoding 
= sys
.getfilesystemencoding() 
 577 def encodeFilename(s
, for_subprocess
=False): 
 579     @param s The name of the file 
 582     assert type(s
) == compat_str
 
 584     # Python 3 has a Unicode API 
 585     if sys
.version_info 
>= (3, 0): 
 588     # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 589     # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 590     # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 591     if not for_subprocess 
and sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 594     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 
 595     if sys
.platform
.startswith('java'): 
 598     return s
.encode(get_subprocess_encoding(), 'ignore') 
 601 def decodeFilename(b
, for_subprocess
=False): 
 603     if sys
.version_info 
>= (3, 0): 
 606     if not isinstance(b
, bytes): 
 609     return b
.decode(get_subprocess_encoding(), 'ignore') 
 612 def encodeArgument(s
): 
 613     if not isinstance(s
, compat_str
): 
 614         # Legacy code that uses byte strings 
 615         # Uncomment the following line after fixing all post processors 
 616         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 617         s 
= s
.decode('ascii') 
 618     return encodeFilename(s
, True) 
 621 def decodeArgument(b
): 
 622     return decodeFilename(b
, True) 
 625 def decodeOption(optval
): 
 628     if isinstance(optval
, bytes): 
 629         optval 
= optval
.decode(preferredencoding()) 
 631     assert isinstance(optval
, compat_str
) 
 635 def formatSeconds(secs
): 
 637         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 639         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 644 def make_HTTPS_handler(params
, **kwargs
): 
 645     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 646     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 647         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 648         if opts_no_check_certificate
: 
 649             context
.check_hostname 
= False 
 650             context
.verify_mode 
= ssl
.CERT_NONE
 
 652             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 655             # (create_default_context present but HTTPSHandler has no context=) 
 658     if sys
.version_info 
< (3, 2): 
 659         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 661         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 662         context
.verify_mode 
= (ssl
.CERT_NONE
 
 663                                if opts_no_check_certificate
 
 664                                else ssl
.CERT_REQUIRED
) 
 665         context
.set_default_verify_paths() 
 666         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 669 def bug_reports_message(): 
 670     if ytdl_is_updateable(): 
 671         update_cmd 
= 'type  youtube-dl -U  to update' 
 673         update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 674     msg 
= '; please report this issue on https://yt-dl.org/bug .' 
 675     msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 676     msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 680 class ExtractorError(Exception): 
 681     """Error during info extraction.""" 
 683     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 684         """ tb, if given, is the original traceback (so that it can be printed out). 
 685         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 688         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 690         if video_id 
is not None: 
 691             msg 
= video_id 
+ ': ' + msg
 
 693             msg 
+= ' (caused by %r)' % cause
 
 695             msg 
+= bug_reports_message() 
 696         super(ExtractorError
, self
).__init
__(msg
) 
 699         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 701         self
.video_id 
= video_id
 
 703     def format_traceback(self
): 
 704         if self
.traceback 
is None: 
 706         return ''.join(traceback
.format_tb(self
.traceback
)) 
 709 class UnsupportedError(ExtractorError
): 
 710     def __init__(self
, url
): 
 711         super(UnsupportedError
, self
).__init
__( 
 712             'Unsupported URL: %s' % url
, expected
=True) 
 716 class RegexNotFoundError(ExtractorError
): 
 717     """Error when a regex didn't match""" 
 721 class DownloadError(Exception): 
 722     """Download Error exception. 
 724     This exception may be thrown by FileDownloader objects if they are not 
 725     configured to continue on errors. They will contain the appropriate 
 729     def __init__(self
, msg
, exc_info
=None): 
 730         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 731         super(DownloadError
, self
).__init
__(msg
) 
 732         self
.exc_info 
= exc_info
 
 735 class SameFileError(Exception): 
 736     """Same File exception. 
 738     This exception will be thrown by FileDownloader objects if they detect 
 739     multiple files would have to be downloaded to the same file on disk. 
 744 class PostProcessingError(Exception): 
 745     """Post Processing exception. 
 747     This exception may be raised by PostProcessor's .run() method to 
 748     indicate an error in the postprocessing task. 
 751     def __init__(self
, msg
): 
 755 class MaxDownloadsReached(Exception): 
 756     """ --max-downloads limit has been reached. """ 
 760 class UnavailableVideoError(Exception): 
 761     """Unavailable Format exception. 
 763     This exception will be thrown when a video is requested 
 764     in a format that is not available for that video. 
 769 class ContentTooShortError(Exception): 
 770     """Content Too Short exception. 
 772     This exception may be raised by FileDownloader objects when a file they 
 773     download is too small for what the server announced first, indicating 
 774     the connection was probably interrupted. 
 777     def __init__(self
, downloaded
, expected
): 
 779         self
.downloaded 
= downloaded
 
 780         self
.expected 
= expected
 
 783 class XAttrMetadataError(Exception): 
 784     def __init__(self
, code
=None, msg
='Unknown error'): 
 785         super(XAttrMetadataError
, self
).__init
__(msg
) 
 789         # Parsing code and msg 
 790         if (self
.code 
in (errno
.ENOSPC
, errno
.EDQUOT
) or 
 791                 'No space left' in self
.msg 
or 'Disk quota excedded' in self
.msg
): 
 792             self
.reason 
= 'NO_SPACE' 
 793         elif self
.code 
== errno
.E2BIG 
or 'Argument list too long' in self
.msg
: 
 794             self
.reason 
= 'VALUE_TOO_LONG' 
 796             self
.reason 
= 'NOT_SUPPORTED' 
 799 class XAttrUnavailableError(Exception): 
 803 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 804     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 
 805     # expected HTTP responses to meet HTTP/1.0 or later (see also 
 806     # https://github.com/rg3/youtube-dl/issues/6727) 
 807     if sys
.version_info 
< (3, 0): 
 808         kwargs
[b
'strict'] = True 
 809     hc 
= http_class(*args
, **kwargs
) 
 810     source_address 
= ydl_handler
._params
.get('source_address') 
 811     if source_address 
is not None: 
 812         sa 
= (source_address
, 0) 
 813         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 814             hc
.source_address 
= sa
 
 816             def _hc_connect(self
, *args
, **kwargs
): 
 817                 sock 
= compat_socket_create_connection( 
 818                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 820                     self
.sock 
= ssl
.wrap_socket( 
 821                         sock
, self
.key_file
, self
.cert_file
, 
 822                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 825             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 830 def handle_youtubedl_headers(headers
): 
 831     filtered_headers 
= headers
 
 833     if 'Youtubedl-no-compression' in filtered_headers
: 
 834         filtered_headers 
= dict((k
, v
) for k
, v 
in filtered_headers
.items() if k
.lower() != 'accept-encoding') 
 835         del filtered_headers
['Youtubedl-no-compression'] 
 837     return filtered_headers
 
 840 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 841     """Handler for HTTP requests and responses. 
 843     This class, when installed with an OpenerDirector, automatically adds 
 844     the standard headers to every HTTP request and handles gzipped and 
 845     deflated responses from web servers. If compression is to be avoided in 
 846     a particular request, the original request in the program code only has 
 847     to include the HTTP header "Youtubedl-no-compression", which will be 
 848     removed before making the real request. 
 850     Part of this code was copied from: 
 852     http://techknack.net/python-urllib2-handlers/ 
 854     Andrew Rowls, the author of that code, agreed to release it to the 
 858     def __init__(self
, params
, *args
, **kwargs
): 
 859         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 860         self
._params 
= params
 
 862     def http_open(self
, req
): 
 863         conn_class 
= compat_http_client
.HTTPConnection
 
 865         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
 867             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
 868             del req
.headers
['Ytdl-socks-proxy'] 
 870         return self
.do_open(functools
.partial( 
 871             _create_http_connection
, self
, conn_class
, False), 
 877             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 879             return zlib
.decompress(data
) 
 882     def addinfourl_wrapper(stream
, headers
, url
, code
): 
 883         if hasattr(compat_urllib_request
.addinfourl
, 'getcode'): 
 884             return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
) 
 885         ret 
= compat_urllib_request
.addinfourl(stream
, headers
, url
) 
 889     def http_request(self
, req
): 
 890         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 
 891         # always respected by websites, some tend to give out URLs with non percent-encoded 
 892         # non-ASCII characters (see telemb.py, ard.py [#3412]) 
 893         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 
 894         # To work around aforementioned issue we will replace request's original URL with 
 895         # percent-encoded one 
 896         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 
 897         # the code of this workaround has been moved here from YoutubeDL.urlopen() 
 898         url 
= req
.get_full_url() 
 899         url_escaped 
= escape_url(url
) 
 901         # Substitute URL if any change after escaping 
 902         if url 
!= url_escaped
: 
 903             req 
= update_Request(req
, url
=url_escaped
) 
 905         for h
, v 
in std_headers
.items(): 
 906             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
 907             # The dict keys are capitalized because of this bug by urllib 
 908             if h
.capitalize() not in req
.headers
: 
 911         req
.headers 
= handle_youtubedl_headers(req
.headers
) 
 913         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 914             # Python 2.6 is brain-dead when it comes to fragments 
 915             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 916             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 920     def http_response(self
, req
, resp
): 
 923         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 924             content 
= resp
.read() 
 925             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 927                 uncompressed 
= io
.BytesIO(gz
.read()) 
 928             except IOError as original_ioerror
: 
 929                 # There may be junk add the end of the file 
 930                 # See http://stackoverflow.com/q/4928560/35070 for details 
 931                 for i 
in range(1, 1024): 
 933                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 934                         uncompressed 
= io
.BytesIO(gz
.read()) 
 939                     raise original_ioerror
 
 940             resp 
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 941             resp
.msg 
= old_resp
.msg
 
 942             del resp
.headers
['Content-encoding'] 
 944         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 945             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
 946             resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 947             resp
.msg 
= old_resp
.msg
 
 948             del resp
.headers
['Content-encoding'] 
 949         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 
 950         # https://github.com/rg3/youtube-dl/issues/6457). 
 951         if 300 <= resp
.code 
< 400: 
 952             location 
= resp
.headers
.get('Location') 
 954                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 
 955                 if sys
.version_info 
>= (3, 0): 
 956                     location 
= location
.encode('iso-8859-1').decode('utf-8') 
 958                     location 
= location
.decode('utf-8') 
 959                 location_escaped 
= escape_url(location
) 
 960                 if location 
!= location_escaped
: 
 961                     del resp
.headers
['Location'] 
 962                     if sys
.version_info 
< (3, 0): 
 963                         location_escaped 
= location_escaped
.encode('utf-8') 
 964                     resp
.headers
['Location'] = location_escaped
 
 967     https_request 
= http_request
 
 968     https_response 
= http_response
 
 971 def make_socks_conn_class(base_class
, socks_proxy
): 
 972     assert issubclass(base_class
, ( 
 973         compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
)) 
 975     url_components 
= compat_urlparse
.urlparse(socks_proxy
) 
 976     if url_components
.scheme
.lower() == 'socks5': 
 977         socks_type 
= ProxyType
.SOCKS5
 
 978     elif url_components
.scheme
.lower() in ('socks', 'socks4'): 
 979         socks_type 
= ProxyType
.SOCKS4
 
 980     elif url_components
.scheme
.lower() == 'socks4a': 
 981         socks_type 
= ProxyType
.SOCKS4A
 
 983     def unquote_if_non_empty(s
): 
 986         return compat_urllib_parse_unquote_plus(s
) 
 990         url_components
.hostname
, url_components
.port 
or 1080, 
 992         unquote_if_non_empty(url_components
.username
), 
 993         unquote_if_non_empty(url_components
.password
), 
 996     class SocksConnection(base_class
): 
 998             self
.sock 
= sockssocket() 
 999             self
.sock
.setproxy(*proxy_args
) 
1000             if type(self
.timeout
) in (int, float): 
1001                 self
.sock
.settimeout(self
.timeout
) 
1002             self
.sock
.connect((self
.host
, self
.port
)) 
1004             if isinstance(self
, compat_http_client
.HTTPSConnection
): 
1005                 if hasattr(self
, '_context'):  # Python > 2.6 
1006                     self
.sock 
= self
._context
.wrap_socket( 
1007                         self
.sock
, server_hostname
=self
.host
) 
1009                     self
.sock 
= ssl
.wrap_socket(self
.sock
) 
1011     return SocksConnection
 
1014 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
1015     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
1016         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
1017         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
1018         self
._params 
= params
 
1020     def https_open(self
, req
): 
1022         conn_class 
= self
._https
_conn
_class
 
1024         if hasattr(self
, '_context'):  # python > 2.6 
1025             kwargs
['context'] = self
._context
 
1026         if hasattr(self
, '_check_hostname'):  # python 3.x 
1027             kwargs
['check_hostname'] = self
._check
_hostname
 
1029         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
1031             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
1032             del req
.headers
['Ytdl-socks-proxy'] 
1034         return self
.do_open(functools
.partial( 
1035             _create_http_connection
, self
, conn_class
, True), 
1039 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
): 
1040     def __init__(self
, cookiejar
=None): 
1041         compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
) 
1043     def http_response(self
, request
, response
): 
1044         # Python 2 will choke on next HTTP request in row if there are non-ASCII 
1045         # characters in Set-Cookie HTTP header of last response (see 
1046         # https://github.com/rg3/youtube-dl/issues/6769). 
1047         # In order to at least prevent crashing we will percent encode Set-Cookie 
1048         # header before HTTPCookieProcessor starts processing it. 
1049         # if sys.version_info < (3, 0) and response.headers: 
1050         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 
1051         #         set_cookie = response.headers.get(set_cookie_header) 
1053         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 
1054         #             if set_cookie != set_cookie_escaped: 
1055         #                 del response.headers[set_cookie_header] 
1056         #                 response.headers[set_cookie_header] = set_cookie_escaped 
1057         return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
) 
1059     https_request 
= compat_urllib_request
.HTTPCookieProcessor
.http_request
 
1060     https_response 
= http_response
 
1063 def extract_timezone(date_str
): 
1065         r
'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
1068         timezone 
= datetime
.timedelta() 
1070         date_str 
= date_str
[:-len(m
.group('tz'))] 
1071         if not m
.group('sign'): 
1072             timezone 
= datetime
.timedelta() 
1074             sign 
= 1 if m
.group('sign') == '+' else -1 
1075             timezone 
= datetime
.timedelta( 
1076                 hours
=sign 
* int(m
.group('hours')), 
1077                 minutes
=sign 
* int(m
.group('minutes'))) 
1078     return timezone
, date_str
 
1081 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
1082     """ Return a UNIX timestamp from the given date """ 
1084     if date_str 
is None: 
1087     date_str 
= re
.sub(r
'\.[0-9]+', '', date_str
) 
1089     if timezone 
is None: 
1090         timezone
, date_str 
= extract_timezone(date_str
) 
1093         date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
1094         dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
1095         return calendar
.timegm(dt
.timetuple()) 
1100 def date_formats(day_first
=True): 
1101     return DATE_FORMATS_DAY_FIRST 
if day_first 
else DATE_FORMATS_MONTH_FIRST
 
1104 def unified_strdate(date_str
, day_first
=True): 
1105     """Return a string with the date in the format YYYYMMDD""" 
1107     if date_str 
is None: 
1111     date_str 
= date_str
.replace(',', ' ') 
1112     # Remove AM/PM + timezone 
1113     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1114     _
, date_str 
= extract_timezone(date_str
) 
1116     for expression 
in date_formats(day_first
): 
1118             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
1121     if upload_date 
is None: 
1122         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1125                 upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
1128     if upload_date 
is not None: 
1129         return compat_str(upload_date
) 
1132 def unified_timestamp(date_str
, day_first
=True): 
1133     if date_str 
is None: 
1136     date_str 
= date_str
.replace(',', ' ') 
1138     pm_delta 
= 12 if re
.search(r
'(?i)PM', date_str
) else 0 
1139     timezone
, date_str 
= extract_timezone(date_str
) 
1141     # Remove AM/PM + timezone 
1142     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1144     for expression 
in date_formats(day_first
): 
1146             dt 
= datetime
.datetime
.strptime(date_str
, expression
) - timezone 
+ datetime
.timedelta(hours
=pm_delta
) 
1147             return calendar
.timegm(dt
.timetuple()) 
1150     timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1152         return calendar
.timegm(timetuple
) + pm_delta 
* 3600 
1155 def determine_ext(url
, default_ext
='unknown_video'): 
1158     guess 
= url
.partition('?')[0].rpartition('.')[2] 
1159     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
1161     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 
1162     elif guess
.rstrip('/') in KNOWN_EXTENSIONS
: 
1163         return guess
.rstrip('/') 
1168 def subtitles_filename(filename
, sub_lang
, sub_format
): 
1169     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
1172 def date_from_str(date_str
): 
1174     Return a datetime object from a string in the format YYYYMMDD or 
1175     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
1176     today 
= datetime
.date
.today() 
1177     if date_str 
in ('now', 'today'): 
1179     if date_str 
== 'yesterday': 
1180         return today 
- datetime
.timedelta(days
=1) 
1181     match 
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
1182     if match 
is not None: 
1183         sign 
= match
.group('sign') 
1184         time 
= int(match
.group('time')) 
1187         unit 
= match
.group('unit') 
1188         # A bad approximation? 
1192         elif unit 
== 'year': 
1196         delta 
= datetime
.timedelta(**{unit
: time
}) 
1197         return today 
+ delta
 
1198     return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date() 
1201 def hyphenate_date(date_str
): 
1203     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
1204     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
1205     if match 
is not None: 
1206         return '-'.join(match
.groups()) 
1211 class DateRange(object): 
1212     """Represents a time interval between two dates""" 
1214     def __init__(self
, start
=None, end
=None): 
1215         """start and end must be strings in the format accepted by date""" 
1216         if start 
is not None: 
1217             self
.start 
= date_from_str(start
) 
1219             self
.start 
= datetime
.datetime
.min.date() 
1221             self
.end 
= date_from_str(end
) 
1223             self
.end 
= datetime
.datetime
.max.date() 
1224         if self
.start 
> self
.end
: 
1225             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
1229         """Returns a range that only contains the given day""" 
1230         return cls(day
, day
) 
1232     def __contains__(self
, date
): 
1233         """Check if the date is in the range""" 
1234         if not isinstance(date
, datetime
.date
): 
1235             date 
= date_from_str(date
) 
1236         return self
.start 
<= date 
<= self
.end
 
1239         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
1242 def platform_name(): 
1243     """ Returns the platform name as a compat_str """ 
1244     res 
= platform
.platform() 
1245     if isinstance(res
, bytes): 
1246         res 
= res
.decode(preferredencoding()) 
1248     assert isinstance(res
, compat_str
) 
1252 def _windows_write_string(s
, out
): 
1253     """ Returns True if the string was written using special methods, 
1254     False if it has yet to be written out.""" 
1255     # Adapted from http://stackoverflow.com/a/3259271/35070 
1258     import ctypes
.wintypes
 
1266         fileno 
= out
.fileno() 
1267     except AttributeError: 
1268         # If the output stream doesn't have a fileno, it's virtual 
1270     except io
.UnsupportedOperation
: 
1271         # Some strange Windows pseudo files? 
1273     if fileno 
not in WIN_OUTPUT_IDS
: 
1276     GetStdHandle 
= ctypes
.WINFUNCTYPE( 
1277         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
1278         (b
'GetStdHandle', ctypes
.windll
.kernel32
)) 
1279     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
1281     WriteConsoleW 
= ctypes
.WINFUNCTYPE( 
1282         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
1283         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
1284         ctypes
.wintypes
.LPVOID
)((b
'WriteConsoleW', ctypes
.windll
.kernel32
)) 
1285     written 
= ctypes
.wintypes
.DWORD(0) 
1287     GetFileType 
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
'GetFileType', ctypes
.windll
.kernel32
)) 
1288     FILE_TYPE_CHAR 
= 0x0002 
1289     FILE_TYPE_REMOTE 
= 0x8000 
1290     GetConsoleMode 
= ctypes
.WINFUNCTYPE( 
1291         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
1292         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
1293         (b
'GetConsoleMode', ctypes
.windll
.kernel32
)) 
1294     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
1296     def not_a_console(handle
): 
1297         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
1299         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR 
or 
1300                 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
1302     if not_a_console(h
): 
1305     def next_nonbmp_pos(s
): 
1307             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
1308         except StopIteration: 
1312         count 
= min(next_nonbmp_pos(s
), 1024) 
1314         ret 
= WriteConsoleW( 
1315             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
1317             raise OSError('Failed to write string') 
1318         if not count
:  # We just wrote a non-BMP character 
1319             assert written
.value 
== 2 
1322             assert written
.value 
> 0 
1323             s 
= s
[written
.value
:] 
1327 def write_string(s
, out
=None, encoding
=None): 
1330     assert type(s
) == compat_str
 
1332     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
1333         if _windows_write_string(s
, out
): 
1336     if ('b' in getattr(out
, 'mode', '') or 
1337             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
1338         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
1340     elif hasattr(out
, 'buffer'): 
1341         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
1342         byt 
= s
.encode(enc
, 'ignore') 
1343         out
.buffer.write(byt
) 
1349 def bytes_to_intlist(bs
): 
1352     if isinstance(bs
[0], int):  # Python 3 
1355         return [ord(c
) for c 
in bs
] 
1358 def intlist_to_bytes(xs
): 
1361     return compat_struct_pack('%dB' % len(xs
), *xs
) 
1364 # Cross-platform file locking 
1365 if sys
.platform 
== 'win32': 
1366     import ctypes
.wintypes
 
1369     class OVERLAPPED(ctypes
.Structure
): 
1371             ('Internal', ctypes
.wintypes
.LPVOID
), 
1372             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
1373             ('Offset', ctypes
.wintypes
.DWORD
), 
1374             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
1375             ('hEvent', ctypes
.wintypes
.HANDLE
), 
1378     kernel32 
= ctypes
.windll
.kernel32
 
1379     LockFileEx 
= kernel32
.LockFileEx
 
1380     LockFileEx
.argtypes 
= [ 
1381         ctypes
.wintypes
.HANDLE
,     # hFile 
1382         ctypes
.wintypes
.DWORD
,      # dwFlags 
1383         ctypes
.wintypes
.DWORD
,      # dwReserved 
1384         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1385         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1386         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1388     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1389     UnlockFileEx 
= kernel32
.UnlockFileEx
 
1390     UnlockFileEx
.argtypes 
= [ 
1391         ctypes
.wintypes
.HANDLE
,     # hFile 
1392         ctypes
.wintypes
.DWORD
,      # dwReserved 
1393         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1394         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1395         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1397     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1398     whole_low 
= 0xffffffff 
1399     whole_high 
= 0x7fffffff 
1401     def _lock_file(f
, exclusive
): 
1402         overlapped 
= OVERLAPPED() 
1403         overlapped
.Offset 
= 0 
1404         overlapped
.OffsetHigh 
= 0 
1405         overlapped
.hEvent 
= 0 
1406         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1407         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1408         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1409                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1410             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1412     def _unlock_file(f
): 
1413         assert f
._lock
_file
_overlapped
_p
 
1414         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1415         if not UnlockFileEx(handle
, 0, 
1416                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1417             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1420     # Some platforms, such as Jython, is missing fcntl 
1424         def _lock_file(f
, exclusive
): 
1425             fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1427         def _unlock_file(f
): 
1428             fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1430         UNSUPPORTED_MSG 
= 'file locking is not supported on this platform' 
1432         def _lock_file(f
, exclusive
): 
1433             raise IOError(UNSUPPORTED_MSG
) 
1435         def _unlock_file(f
): 
1436             raise IOError(UNSUPPORTED_MSG
) 
1439 class locked_file(object): 
1440     def __init__(self
, filename
, mode
, encoding
=None): 
1441         assert mode 
in ['r', 'a', 'w'] 
1442         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1445     def __enter__(self
): 
1446         exclusive 
= self
.mode 
!= 'r' 
1448             _lock_file(self
.f
, exclusive
) 
1454     def __exit__(self
, etype
, value
, traceback
): 
1456             _unlock_file(self
.f
) 
1463     def write(self
, *args
): 
1464         return self
.f
.write(*args
) 
1466     def read(self
, *args
): 
1467         return self
.f
.read(*args
) 
1470 def get_filesystem_encoding(): 
1471     encoding 
= sys
.getfilesystemencoding() 
1472     return encoding 
if encoding 
is not None else 'utf-8' 
1475 def shell_quote(args
): 
1477     encoding 
= get_filesystem_encoding() 
1479         if isinstance(a
, bytes): 
1480             # We may get a filename encoded with 'encodeFilename' 
1481             a 
= a
.decode(encoding
) 
1482         quoted_args
.append(pipes
.quote(a
)) 
1483     return ' '.join(quoted_args
) 
1486 def smuggle_url(url
, data
): 
1487     """ Pass additional data in a URL for internal use. """ 
1489     url
, idata 
= unsmuggle_url(url
, {}) 
1491     sdata 
= compat_urllib_parse_urlencode( 
1492         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1493     return url 
+ '#' + sdata
 
1496 def unsmuggle_url(smug_url
, default
=None): 
1497     if '#__youtubedl_smuggle' not in smug_url
: 
1498         return smug_url
, default
 
1499     url
, _
, sdata 
= smug_url
.rpartition('#') 
1500     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1501     data 
= json
.loads(jsond
) 
1505 def format_bytes(bytes): 
1508     if type(bytes) is str: 
1509         bytes = float(bytes) 
1513         exponent 
= int(math
.log(bytes, 1024.0)) 
1514     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1515     converted 
= float(bytes) / float(1024 ** exponent
) 
1516     return '%.2f%s' % (converted
, suffix
) 
1519 def lookup_unit_table(unit_table
, s
): 
1520     units_re 
= '|'.join(re
.escape(u
) for u 
in unit_table
) 
1522         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
) 
1525     num_str 
= m
.group('num').replace(',', '.') 
1526     mult 
= unit_table
[m
.group('unit')] 
1527     return int(float(num_str
) * mult
) 
1530 def parse_filesize(s
): 
1534     # The lower-case forms are of course incorrect and unofficial, 
1535     # but we support those too 
1552         'megabytes': 1000 ** 2, 
1553         'mebibytes': 1024 ** 2, 
1559         'gigabytes': 1000 ** 3, 
1560         'gibibytes': 1024 ** 3, 
1566         'terabytes': 1000 ** 4, 
1567         'tebibytes': 1024 ** 4, 
1573         'petabytes': 1000 ** 5, 
1574         'pebibytes': 1024 ** 5, 
1580         'exabytes': 1000 ** 6, 
1581         'exbibytes': 1024 ** 6, 
1587         'zettabytes': 1000 ** 7, 
1588         'zebibytes': 1024 ** 7, 
1594         'yottabytes': 1000 ** 8, 
1595         'yobibytes': 1024 ** 8, 
1598     return lookup_unit_table(_UNIT_TABLE
, s
) 
1607     if re
.match(r
'^[\d,.]+$', s
): 
1608         return str_to_int(s
) 
1619     return lookup_unit_table(_UNIT_TABLE
, s
) 
1622 def month_by_name(name
, lang
='en'): 
1623     """ Return the number of a month by (locale-independently) English name """ 
1625     month_names 
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en']) 
1628         return month_names
.index(name
) + 1 
1633 def month_by_abbreviation(abbrev
): 
1634     """ Return the number of a month by (locale-independently) English 
1638         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
1643 def fix_xml_ampersands(xml_str
): 
1644     """Replace all the '&' by '&' in XML""" 
1646         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1651 def setproctitle(title
): 
1652     assert isinstance(title
, compat_str
) 
1654     # ctypes in Jython is not complete 
1655     # http://bugs.jython.org/issue2148 
1656     if sys
.platform
.startswith('java'): 
1660         libc 
= ctypes
.cdll
.LoadLibrary('libc.so.6') 
1663     title_bytes 
= title
.encode('utf-8') 
1664     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1665     buf
.value 
= title_bytes
 
1667         libc
.prctl(15, buf
, 0, 0, 0) 
1668     except AttributeError: 
1669         return  # Strange libc, just skip this 
1672 def remove_start(s
, start
): 
1673     return s
[len(start
):] if s 
is not None and s
.startswith(start
) else s
 
1676 def remove_end(s
, end
): 
1677     return s
[:-len(end
)] if s 
is not None and s
.endswith(end
) else s
 
1680 def remove_quotes(s
): 
1681     if s 
is None or len(s
) < 2: 
1683     for quote 
in ('"', "'", ): 
1684         if s
[0] == quote 
and s
[-1] == quote
: 
1689 def url_basename(url
): 
1690     path 
= compat_urlparse
.urlparse(url
).path
 
1691     return path
.strip('/').split('/')[-1] 
1695     return re
.match(r
'https?://[^?#&]+/', url
).group() 
1698 class HEADRequest(compat_urllib_request
.Request
): 
1699     def get_method(self
): 
1703 class PUTRequest(compat_urllib_request
.Request
): 
1704     def get_method(self
): 
1708 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1711             v 
= getattr(v
, get_attr
, None) 
1717         return int(v
) * invscale 
// scale
 
1722 def str_or_none(v
, default
=None): 
1723     return default 
if v 
is None else compat_str(v
) 
1726 def str_to_int(int_str
): 
1727     """ A more relaxed version of int_or_none """ 
1730     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1734 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1738         return float(v
) * invscale 
/ scale
 
1743 def strip_or_none(v
): 
1744     return None if v 
is None else v
.strip() 
1747 def parse_duration(s
): 
1748     if not isinstance(s
, compat_basestring
): 
1753     days
, hours
, mins
, secs
, ms 
= [None] * 5 
1754     m 
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?$', s
) 
1756         days
, hours
, mins
, secs
, ms 
= m
.groups() 
1761                     (?P<days>[0-9]+)\s*d(?:ays?)?\s* 
1764                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 
1767                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 
1770                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 
1773             days
, hours
, mins
, secs
, ms 
= m
.groups() 
1775             m 
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s
) 
1777                 hours
, mins 
= m
.groups() 
1783         duration 
+= float(secs
) 
1785         duration 
+= float(mins
) * 60 
1787         duration 
+= float(hours
) * 60 * 60 
1789         duration 
+= float(days
) * 24 * 60 * 60 
1791         duration 
+= float(ms
) 
1795 def prepend_extension(filename
, ext
, expected_real_ext
=None): 
1796     name
, real_ext 
= os
.path
.splitext(filename
) 
1798         '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1799         if not expected_real_ext 
or real_ext
[1:] == expected_real_ext
 
1800         else '{0}.{1}'.format(filename
, ext
)) 
1803 def replace_extension(filename
, ext
, expected_real_ext
=None): 
1804     name
, real_ext 
= os
.path
.splitext(filename
) 
1805     return '{0}.{1}'.format( 
1806         name 
if not expected_real_ext 
or real_ext
[1:] == expected_real_ext 
else filename
, 
1810 def check_executable(exe
, args
=[]): 
1811     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1812     args can be a list of arguments for a short output (like -version) """ 
1814         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1820 def get_exe_version(exe
, args
=['--version'], 
1821                     version_re
=None, unrecognized
='present'): 
1822     """ Returns the version of the specified executable, 
1823     or False if the executable is not present """ 
1825         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers 
1826         # SIGTTOU if youtube-dl is run in the background. 
1827         # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656 
1828         out
, _ 
= subprocess
.Popen( 
1829             [encodeArgument(exe
)] + args
, 
1830             stdin
=subprocess
.PIPE
, 
1831             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1834     if isinstance(out
, bytes):  # Python 2.x 
1835         out 
= out
.decode('ascii', 'ignore') 
1836     return detect_exe_version(out
, version_re
, unrecognized
) 
1839 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
1840     assert isinstance(output
, compat_str
) 
1841     if version_re 
is None: 
1842         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
1843     m 
= re
.search(version_re
, output
) 
1850 class PagedList(object): 
1852         # This is only useful for tests 
1853         return len(self
.getslice()) 
1856 class OnDemandPagedList(PagedList
): 
1857     def __init__(self
, pagefunc
, pagesize
, use_cache
=False): 
1858         self
._pagefunc 
= pagefunc
 
1859         self
._pagesize 
= pagesize
 
1860         self
._use
_cache 
= use_cache
 
1864     def getslice(self
, start
=0, end
=None): 
1866         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1867             firstid 
= pagenum 
* self
._pagesize
 
1868             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
1869             if start 
>= nextfirstid
: 
1874                 page_results 
= self
._cache
.get(pagenum
) 
1875             if page_results 
is None: 
1876                 page_results 
= list(self
._pagefunc
(pagenum
)) 
1878                 self
._cache
[pagenum
] = page_results
 
1881                 start 
% self
._pagesize
 
1882                 if firstid 
<= start 
< nextfirstid
 
1886                 ((end 
- 1) % self
._pagesize
) + 1 
1887                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
1890             if startv 
!= 0 or endv 
is not None: 
1891                 page_results 
= page_results
[startv
:endv
] 
1892             res
.extend(page_results
) 
1894             # A little optimization - if current page is not "full", ie. does 
1895             # not contain page_size videos then we can assume that this page 
1896             # is the last one - there are no more ids on further pages - 
1897             # i.e. no need to query again. 
1898             if len(page_results
) + startv 
< self
._pagesize
: 
1901             # If we got the whole page, but the next page is not interesting, 
1902             # break out early as well 
1903             if end 
== nextfirstid
: 
1908 class InAdvancePagedList(PagedList
): 
1909     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
1910         self
._pagefunc 
= pagefunc
 
1911         self
._pagecount 
= pagecount
 
1912         self
._pagesize 
= pagesize
 
1914     def getslice(self
, start
=0, end
=None): 
1916         start_page 
= start 
// self
._pagesize
 
1918             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
1919         skip_elems 
= start 
- start_page 
* self
._pagesize
 
1920         only_more 
= None if end 
is None else end 
- start
 
1921         for pagenum 
in range(start_page
, end_page
): 
1922             page 
= list(self
._pagefunc
(pagenum
)) 
1924                 page 
= page
[skip_elems
:] 
1926             if only_more 
is not None: 
1927                 if len(page
) < only_more
: 
1928                     only_more 
-= len(page
) 
1930                     page 
= page
[:only_more
] 
1937 def uppercase_escape(s
): 
1938     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
1940         r
'\\U[0-9a-fA-F]{8}', 
1941         lambda m
: unicode_escape(m
.group(0))[0], 
1945 def lowercase_escape(s
): 
1946     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
1948         r
'\\u[0-9a-fA-F]{4}', 
1949         lambda m
: unicode_escape(m
.group(0))[0], 
1953 def escape_rfc3986(s
): 
1954     """Escape non-ASCII characters as suggested by RFC 3986""" 
1955     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
1956         s 
= s
.encode('utf-8') 
1957     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
1960 def escape_url(url
): 
1961     """Escape URL as suggested by RFC 3986""" 
1962     url_parsed 
= compat_urllib_parse_urlparse(url
) 
1963     return url_parsed
._replace
( 
1964         netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'), 
1965         path
=escape_rfc3986(url_parsed
.path
), 
1966         params
=escape_rfc3986(url_parsed
.params
), 
1967         query
=escape_rfc3986(url_parsed
.query
), 
1968         fragment
=escape_rfc3986(url_parsed
.fragment
) 
1972 def read_batch_urls(batch_fd
): 
1974         if not isinstance(url
, compat_str
): 
1975             url 
= url
.decode('utf-8', 'replace') 
1976         BOM_UTF8 
= '\xef\xbb\xbf' 
1977         if url
.startswith(BOM_UTF8
): 
1978             url 
= url
[len(BOM_UTF8
):] 
1980         if url
.startswith(('#', ';', ']')): 
1984     with contextlib
.closing(batch_fd
) as fd
: 
1985         return [url 
for url 
in map(fixup
, fd
) if url
] 
1988 def urlencode_postdata(*args
, **kargs
): 
1989     return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii') 
1992 def update_url_query(url
, query
): 
1995     parsed_url 
= compat_urlparse
.urlparse(url
) 
1996     qs 
= compat_parse_qs(parsed_url
.query
) 
1998     return compat_urlparse
.urlunparse(parsed_url
._replace
( 
1999         query
=compat_urllib_parse_urlencode(qs
, True))) 
2002 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}): 
2003     req_headers 
= req
.headers
.copy() 
2004     req_headers
.update(headers
) 
2005     req_data 
= data 
or req
.data
 
2006     req_url 
= update_url_query(url 
or req
.get_full_url(), query
) 
2007     req_get_method 
= req
.get_method() 
2008     if req_get_method 
== 'HEAD': 
2009         req_type 
= HEADRequest
 
2010     elif req_get_method 
== 'PUT': 
2011         req_type 
= PUTRequest
 
2013         req_type 
= compat_urllib_request
.Request
 
2015         req_url
, data
=req_data
, headers
=req_headers
, 
2016         origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
) 
2017     if hasattr(req
, 'timeout'): 
2018         new_req
.timeout 
= req
.timeout
 
2022 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True): 
2023     if isinstance(key_or_keys
, (list, tuple)): 
2024         for key 
in key_or_keys
: 
2025             if key 
not in d 
or d
[key
] is None or skip_false_values 
and not d
[key
]: 
2029     return d
.get(key_or_keys
, default
) 
2032 def try_get(src
, getter
, expected_type
=None): 
2035     except (AttributeError, KeyError, TypeError, IndexError): 
2038         if expected_type 
is None or isinstance(v
, expected_type
): 
2042 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'): 
2043     return string 
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
) 
2055 TV_PARENTAL_GUIDELINES 
= { 
2065 def parse_age_limit(s
): 
2067         return s 
if 0 <= s 
<= 21 else None 
2068     if not isinstance(s
, compat_basestring
): 
2070     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
2072         return int(m
.group('age')) 
2074         return US_RATINGS
[s
] 
2075     return TV_PARENTAL_GUIDELINES
.get(s
) 
2078 def strip_jsonp(code
): 
2080         r
'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
) 
2083 def js_to_json(code
): 
2086         if v 
in ('true', 'false', 'null'): 
2088         elif v
.startswith('/*') or v 
== ',': 
2091         if v
[0] in ("'", '"'): 
2092             v 
= re
.sub(r
'(?s)\\.|"', lambda m
: { 
2097             }.get(m
.group(0), m
.group(0)), v
[1:-1]) 
2100             (r
'^(0[xX][0-9a-fA-F]+)\s*:?$', 16), 
2101             (r
'^(0+[0-7]+)\s*:?$', 8), 
2104         for regex
, base 
in INTEGER_TABLE
: 
2105             im 
= re
.match(regex
, v
) 
2107                 i 
= int(im
.group(1), base
) 
2108                 return '"%d":' % i 
if v
.endswith(':') else '%d' % i
 
2112     return re
.sub(r
'''(?sx) 
2113         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 
2114         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 
2115         /\*.*?\*/|,(?=\s*[\]}])| 
2116         [a-zA-Z_][.a-zA-Z_0-9]*| 
2117         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| 
2122 def qualities(quality_ids
): 
2123     """ Get a numeric quality value out of a list of possible values """ 
2126             return quality_ids
.index(qid
) 
2132 DEFAULT_OUTTMPL 
= '%(title)s-%(id)s.%(ext)s' 
2135 def limit_length(s
, length
): 
2136     """ Add ellipses to overly long strings """ 
2141         return s
[:length 
- len(ELLIPSES
)] + ELLIPSES
 
2145 def version_tuple(v
): 
2146     return tuple(int(e
) for e 
in re
.split(r
'[-.]', v
)) 
2149 def is_outdated_version(version
, limit
, assume_new
=True): 
2151         return not assume_new
 
2153         return version_tuple(version
) < version_tuple(limit
) 
2155         return not assume_new
 
2158 def ytdl_is_updateable(): 
2159     """ Returns if youtube-dl can be updated with -U """ 
2160     from zipimport 
import zipimporter
 
2162     return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen') 
2165 def args_to_str(args
): 
2166     # Get a short string representation for a subprocess command 
2167     return ' '.join(compat_shlex_quote(a
) for a 
in args
) 
2170 def error_to_compat_str(err
): 
2172     # On python 2 error byte string must be decoded with proper 
2173     # encoding rather than ascii 
2174     if sys
.version_info
[0] < 3: 
2175         err_str 
= err_str
.decode(preferredencoding()) 
2179 def mimetype2ext(mt
): 
2185         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 
2186         # it's the most popular one 
2187         'audio/mpeg': 'mp3', 
2192     _
, _
, res 
= mt
.rpartition('/') 
2193     res 
= res
.split(';')[0].strip().lower() 
2197         'smptett+xml': 'tt', 
2203         'x-mp4-fragmented': 'mp4', 
2206         'x-mpegurl': 'm3u8', 
2207         'vnd.apple.mpegurl': 'm3u8', 
2212         'vnd.ms-sstr+xml': 'ism', 
2217 def parse_codecs(codecs_str
): 
2218     # http://tools.ietf.org/html/rfc6381 
2221     splited_codecs 
= list(filter(None, map( 
2222         lambda str: str.strip(), codecs_str
.strip().strip(',').split(',')))) 
2223     vcodec
, acodec 
= None, None 
2224     for full_codec 
in splited_codecs
: 
2225         codec 
= full_codec
.split('.')[0] 
2226         if codec 
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): 
2229         elif codec 
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'): 
2233             write_string('WARNING: Unknown codec %s' % full_codec
, sys
.stderr
) 
2234     if not vcodec 
and not acodec
: 
2235         if len(splited_codecs
) == 2: 
2240         elif len(splited_codecs
) == 1: 
2247             'vcodec': vcodec 
or 'none', 
2248             'acodec': acodec 
or 'none', 
2253 def urlhandle_detect_ext(url_handle
): 
2254     getheader 
= url_handle
.headers
.get
 
2256     cd 
= getheader('Content-Disposition') 
2258         m 
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
) 
2260             e 
= determine_ext(m
.group('filename'), default_ext
=None) 
2264     return mimetype2ext(getheader('Content-Type')) 
2267 def encode_data_uri(data
, mime_type
): 
2268     return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii')) 
2271 def age_restricted(content_limit
, age_limit
): 
2272     """ Returns True iff the content should be blocked """ 
2274     if age_limit 
is None:  # No limit set 
2276     if content_limit 
is None: 
2277         return False  # Content available for everyone 
2278     return age_limit 
< content_limit
 
2281 def is_html(first_bytes
): 
2282     """ Detect whether a file contains HTML by examining its first bytes. """ 
2285         (b
'\xef\xbb\xbf', 'utf-8'), 
2286         (b
'\x00\x00\xfe\xff', 'utf-32-be'), 
2287         (b
'\xff\xfe\x00\x00', 'utf-32-le'), 
2288         (b
'\xff\xfe', 'utf-16-le'), 
2289         (b
'\xfe\xff', 'utf-16-be'), 
2291     for bom
, enc 
in BOMS
: 
2292         if first_bytes
.startswith(bom
): 
2293             s 
= first_bytes
[len(bom
):].decode(enc
, 'replace') 
2296         s 
= first_bytes
.decode('utf-8', 'replace') 
2298     return re
.match(r
'^\s*<', s
) 
2301 def determine_protocol(info_dict
): 
2302     protocol 
= info_dict
.get('protocol') 
2303     if protocol 
is not None: 
2306     url 
= info_dict
['url'] 
2307     if url
.startswith('rtmp'): 
2309     elif url
.startswith('mms'): 
2311     elif url
.startswith('rtsp'): 
2314     ext 
= determine_ext(url
) 
2320     return compat_urllib_parse_urlparse(url
).scheme
 
2323 def render_table(header_row
, data
): 
2324     """ Render a list of rows, each as a list of values """ 
2325     table 
= [header_row
] + data
 
2326     max_lens 
= [max(len(compat_str(v
)) for v 
in col
) for col 
in zip(*table
)] 
2327     format_str 
= ' '.join('%-' + compat_str(ml 
+ 1) + 's' for ml 
in max_lens
[:-1]) + '%s' 
2328     return '\n'.join(format_str 
% tuple(row
) for row 
in table
) 
2331 def _match_one(filter_part
, dct
): 
2332     COMPARISON_OPERATORS 
= { 
2340     operator_rex 
= re
.compile(r
'''(?x)\s* 
2342         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
2344             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
2345             (?P<strval>(?![0-9.])[a-z0-9A-Z]*) 
2348         ''' % '|'.join(map(re
.escape
, COMPARISON_OPERATORS
.keys()))) 
2349     m 
= operator_rex
.search(filter_part
) 
2351         op 
= COMPARISON_OPERATORS
[m
.group('op')] 
2352         actual_value 
= dct
.get(m
.group('key')) 
2353         if (m
.group('strval') is not None or 
2354             # If the original field is a string and matching comparisonvalue is 
2355             # a number we should respect the origin of the original field 
2356             # and process comparison value as a string (see 
2357             # https://github.com/rg3/youtube-dl/issues/11082). 
2358             actual_value 
is not None and m
.group('intval') is not None and 
2359                 isinstance(actual_value
, compat_str
)): 
2360             if m
.group('op') not in ('=', '!='): 
2362                     'Operator %s does not support string values!' % m
.group('op')) 
2363             comparison_value 
= m
.group('strval') or m
.group('intval') 
2366                 comparison_value 
= int(m
.group('intval')) 
2368                 comparison_value 
= parse_filesize(m
.group('intval')) 
2369                 if comparison_value 
is None: 
2370                     comparison_value 
= parse_filesize(m
.group('intval') + 'B') 
2371                 if comparison_value 
is None: 
2373                         'Invalid integer value %r in filter part %r' % ( 
2374                             m
.group('intval'), filter_part
)) 
2375         if actual_value 
is None: 
2376             return m
.group('none_inclusive') 
2377         return op(actual_value
, comparison_value
) 
2380         '': lambda v
: v 
is not None, 
2381         '!': lambda v
: v 
is None, 
2383     operator_rex 
= re
.compile(r
'''(?x)\s* 
2384         (?P<op>%s)\s*(?P<key>[a-z_]+) 
2386         ''' % '|'.join(map(re
.escape
, UNARY_OPERATORS
.keys()))) 
2387     m 
= operator_rex
.search(filter_part
) 
2389         op 
= UNARY_OPERATORS
[m
.group('op')] 
2390         actual_value 
= dct
.get(m
.group('key')) 
2391         return op(actual_value
) 
2393     raise ValueError('Invalid filter part %r' % filter_part
) 
2396 def match_str(filter_str
, dct
): 
2397     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
2400         _match_one(filter_part
, dct
) for filter_part 
in filter_str
.split('&')) 
2403 def match_filter_func(filter_str
): 
2404     def _match_func(info_dict
): 
2405         if match_str(filter_str
, info_dict
): 
2408             video_title 
= info_dict
.get('title', info_dict
.get('id', 'video')) 
2409             return '%s does not pass filter %s, skipping ..' % (video_title
, filter_str
) 
2413 def parse_dfxp_time_expr(time_expr
): 
2417     mobj 
= re
.match(r
'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr
) 
2419         return float(mobj
.group('time_offset')) 
2421     mobj 
= re
.match(r
'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr
) 
2423         return 3600 * int(mobj
.group(1)) + 60 * int(mobj
.group(2)) + float(mobj
.group(3).replace(':', '.')) 
2426 def srt_subtitles_timecode(seconds
): 
2427     return '%02d:%02d:%02d,%03d' % (seconds 
/ 3600, (seconds 
% 3600) / 60, seconds 
% 60, (seconds 
% 1) * 1000) 
2430 def dfxp2srt(dfxp_data
): 
2431     _x 
= functools
.partial(xpath_with_ns
, ns_map
={ 
2432         'ttml': 'http://www.w3.org/ns/ttml', 
2433         'ttaf1': 'http://www.w3.org/2006/10/ttaf1', 
2434         'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1', 
2437     class TTMLPElementParser(object): 
2440         def start(self
, tag
, attrib
): 
2441             if tag 
in (_x('ttml:br'), _x('ttaf1:br'), 'br'): 
2447         def data(self
, data
): 
2451             return self
.out
.strip() 
2453     def parse_node(node
): 
2454         target 
= TTMLPElementParser() 
2455         parser 
= xml
.etree
.ElementTree
.XMLParser(target
=target
) 
2456         parser
.feed(xml
.etree
.ElementTree
.tostring(node
)) 
2457         return parser
.close() 
2459     dfxp 
= compat_etree_fromstring(dfxp_data
.encode('utf-8')) 
2461     paras 
= dfxp
.findall(_x('.//ttml:p')) or dfxp
.findall(_x('.//ttaf1:p')) or dfxp
.findall(_x('.//ttaf1_0604:p')) or dfxp
.findall('.//p') 
2464         raise ValueError('Invalid dfxp/TTML subtitle') 
2466     for para
, index 
in zip(paras
, itertools
.count(1)): 
2467         begin_time 
= parse_dfxp_time_expr(para
.attrib
.get('begin')) 
2468         end_time 
= parse_dfxp_time_expr(para
.attrib
.get('end')) 
2469         dur 
= parse_dfxp_time_expr(para
.attrib
.get('dur')) 
2470         if begin_time 
is None: 
2475             end_time 
= begin_time 
+ dur
 
2476         out
.append('%d\n%s --> %s\n%s\n\n' % ( 
2478             srt_subtitles_timecode(begin_time
), 
2479             srt_subtitles_timecode(end_time
), 
2485 def cli_option(params
, command_option
, param
): 
2486     param 
= params
.get(param
) 
2488         param 
= compat_str(param
) 
2489     return [command_option
, param
] if param 
is not None else [] 
2492 def cli_bool_option(params
, command_option
, param
, true_value
='true', false_value
='false', separator
=None): 
2493     param 
= params
.get(param
) 
2494     assert isinstance(param
, bool) 
2496         return [command_option 
+ separator 
+ (true_value 
if param 
else false_value
)] 
2497     return [command_option
, true_value 
if param 
else false_value
] 
2500 def cli_valueless_option(params
, command_option
, param
, expected_value
=True): 
2501     param 
= params
.get(param
) 
2502     return [command_option
] if param 
== expected_value 
else [] 
2505 def cli_configuration_args(params
, param
, default
=[]): 
2506     ex_args 
= params
.get(param
) 
2509     assert isinstance(ex_args
, list) 
2513 class ISO639Utils(object): 
2514     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 
2703     def short2long(cls
, code
): 
2704         """Convert language code from ISO 639-1 to ISO 639-2/T""" 
2705         return cls
._lang
_map
.get(code
[:2]) 
2708     def long2short(cls
, code
): 
2709         """Convert language code from ISO 639-2/T to ISO 639-1""" 
2710         for short_name
, long_name 
in cls
._lang
_map
.items(): 
2711             if long_name 
== code
: 
2715 class ISO3166Utils(object): 
2716     # From http://data.okfn.org/data/core/country-list 
2718         'AF': 'Afghanistan', 
2719         'AX': 'Åland Islands', 
2722         'AS': 'American Samoa', 
2727         'AG': 'Antigua and Barbuda', 
2744         'BO': 'Bolivia, Plurinational State of', 
2745         'BQ': 'Bonaire, Sint Eustatius and Saba', 
2746         'BA': 'Bosnia and Herzegovina', 
2748         'BV': 'Bouvet Island', 
2750         'IO': 'British Indian Ocean Territory', 
2751         'BN': 'Brunei Darussalam', 
2753         'BF': 'Burkina Faso', 
2759         'KY': 'Cayman Islands', 
2760         'CF': 'Central African Republic', 
2764         'CX': 'Christmas Island', 
2765         'CC': 'Cocos (Keeling) Islands', 
2769         'CD': 'Congo, the Democratic Republic of the', 
2770         'CK': 'Cook Islands', 
2772         'CI': 'Côte d\'Ivoire', 
2777         'CZ': 'Czech Republic', 
2781         'DO': 'Dominican Republic', 
2784         'SV': 'El Salvador', 
2785         'GQ': 'Equatorial Guinea', 
2789         'FK': 'Falkland Islands (Malvinas)', 
2790         'FO': 'Faroe Islands', 
2794         'GF': 'French Guiana', 
2795         'PF': 'French Polynesia', 
2796         'TF': 'French Southern Territories', 
2811         'GW': 'Guinea-Bissau', 
2814         'HM': 'Heard Island and McDonald Islands', 
2815         'VA': 'Holy See (Vatican City State)', 
2822         'IR': 'Iran, Islamic Republic of', 
2825         'IM': 'Isle of Man', 
2835         'KP': 'Korea, Democratic People\'s Republic of', 
2836         'KR': 'Korea, Republic of', 
2839         'LA': 'Lao People\'s Democratic Republic', 
2845         'LI': 'Liechtenstein', 
2849         'MK': 'Macedonia, the Former Yugoslav Republic of', 
2856         'MH': 'Marshall Islands', 
2862         'FM': 'Micronesia, Federated States of', 
2863         'MD': 'Moldova, Republic of', 
2874         'NL': 'Netherlands', 
2875         'NC': 'New Caledonia', 
2876         'NZ': 'New Zealand', 
2881         'NF': 'Norfolk Island', 
2882         'MP': 'Northern Mariana Islands', 
2887         'PS': 'Palestine, State of', 
2889         'PG': 'Papua New Guinea', 
2892         'PH': 'Philippines', 
2896         'PR': 'Puerto Rico', 
2900         'RU': 'Russian Federation', 
2902         'BL': 'Saint Barthélemy', 
2903         'SH': 'Saint Helena, Ascension and Tristan da Cunha', 
2904         'KN': 'Saint Kitts and Nevis', 
2905         'LC': 'Saint Lucia', 
2906         'MF': 'Saint Martin (French part)', 
2907         'PM': 'Saint Pierre and Miquelon', 
2908         'VC': 'Saint Vincent and the Grenadines', 
2911         'ST': 'Sao Tome and Principe', 
2912         'SA': 'Saudi Arabia', 
2916         'SL': 'Sierra Leone', 
2918         'SX': 'Sint Maarten (Dutch part)', 
2921         'SB': 'Solomon Islands', 
2923         'ZA': 'South Africa', 
2924         'GS': 'South Georgia and the South Sandwich Islands', 
2925         'SS': 'South Sudan', 
2930         'SJ': 'Svalbard and Jan Mayen', 
2933         'CH': 'Switzerland', 
2934         'SY': 'Syrian Arab Republic', 
2935         'TW': 'Taiwan, Province of China', 
2937         'TZ': 'Tanzania, United Republic of', 
2939         'TL': 'Timor-Leste', 
2943         'TT': 'Trinidad and Tobago', 
2946         'TM': 'Turkmenistan', 
2947         'TC': 'Turks and Caicos Islands', 
2951         'AE': 'United Arab Emirates', 
2952         'GB': 'United Kingdom', 
2953         'US': 'United States', 
2954         'UM': 'United States Minor Outlying Islands', 
2958         'VE': 'Venezuela, Bolivarian Republic of', 
2960         'VG': 'Virgin Islands, British', 
2961         'VI': 'Virgin Islands, U.S.', 
2962         'WF': 'Wallis and Futuna', 
2963         'EH': 'Western Sahara', 
2970     def short2full(cls
, code
): 
2971         """Convert an ISO 3166-2 country code to the corresponding full name""" 
2972         return cls
._country
_map
.get(code
.upper()) 
2975 class PerRequestProxyHandler(compat_urllib_request
.ProxyHandler
): 
2976     def __init__(self
, proxies
=None): 
2977         # Set default handlers 
2978         for type in ('http', 'https'): 
2979             setattr(self
, '%s_open' % type, 
2980                     lambda r
, proxy
='__noproxy__', type=type, meth
=self
.proxy_open
: 
2981                         meth(r
, proxy
, type)) 
2982         return compat_urllib_request
.ProxyHandler
.__init
__(self
, proxies
) 
2984     def proxy_open(self
, req
, proxy
, type): 
2985         req_proxy 
= req
.headers
.get('Ytdl-request-proxy') 
2986         if req_proxy 
is not None: 
2988             del req
.headers
['Ytdl-request-proxy'] 
2990         if proxy 
== '__noproxy__': 
2991             return None  # No Proxy 
2992         if compat_urlparse
.urlparse(proxy
).scheme
.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 
2993             req
.add_header('Ytdl-socks-proxy', proxy
) 
2994             # youtube-dl's http/https handlers do wrapping the socket with socks 
2996         return compat_urllib_request
.ProxyHandler
.proxy_open( 
2997             self
, req
, proxy
, type) 
3000 def ohdave_rsa_encrypt(data
, exponent
, modulus
): 
3002     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/ 
3005         data: data to encrypt, bytes-like object 
3006         exponent, modulus: parameter e and N of RSA algorithm, both integer 
3007     Output: hex string of encrypted data 
3009     Limitation: supports one block encryption only 
3012     payload 
= int(binascii
.hexlify(data
[::-1]), 16) 
3013     encrypted 
= pow(payload
, exponent
, modulus
) 
3014     return '%x' % encrypted
 
3017 def encode_base_n(num
, n
, table
=None): 
3018     FULL_TABLE 
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 
3020         table 
= FULL_TABLE
[:n
] 
3023         raise ValueError('base %d exceeds table length %d' % (n
, len(table
))) 
3030         ret 
= table
[num 
% n
] + ret
 
3035 def decode_packed_codes(code
): 
3036     mobj 
= re
.search(PACKED_CODES_RE
, code
) 
3037     obfucasted_code
, base
, count
, symbols 
= mobj
.groups() 
3040     symbols 
= symbols
.split('|') 
3045         base_n_count 
= encode_base_n(count
, base
) 
3046         symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
 
3049         r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)], 
3053 def parse_m3u8_attributes(attrib
): 
3055     for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
): 
3056         if val
.startswith('"'): 
3062 def urshift(val
, n
): 
3063     return val 
>> n 
if val 
>= 0 else (val 
+ 0x100000000) >> n
 
3066 # Based on png2str() written by @gdkchan and improved by @yokrysty 
3067 # Originally posted at https://github.com/rg3/youtube-dl/issues/9706 
3068 def decode_png(png_data
): 
3069     # Reference: https://www.w3.org/TR/PNG/ 
3070     header 
= png_data
[8:] 
3072     if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR': 
3073         raise IOError('Not a valid PNG file.') 
3075     int_map 
= {1: '>B', 2: '>H', 4: '>I'} 
3076     unpack_integer 
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0] 
3081         length 
= unpack_integer(header
[:4]) 
3084         chunk_type 
= header
[:4] 
3087         chunk_data 
= header
[:length
] 
3088         header 
= header
[length
:] 
3090         header 
= header
[4:]  # Skip CRC 
3098     ihdr 
= chunks
[0]['data'] 
3100     width 
= unpack_integer(ihdr
[:4]) 
3101     height 
= unpack_integer(ihdr
[4:8]) 
3105     for chunk 
in chunks
: 
3106         if chunk
['type'] == b
'IDAT': 
3107             idat 
+= chunk
['data'] 
3110         raise IOError('Unable to read PNG data.') 
3112     decompressed_data 
= bytearray(zlib
.decompress(idat
)) 
3117     def _get_pixel(idx
): 
3122     for y 
in range(height
): 
3123         basePos 
= y 
* (1 + stride
) 
3124         filter_type 
= decompressed_data
[basePos
] 
3128         pixels
.append(current_row
) 
3130         for x 
in range(stride
): 
3131             color 
= decompressed_data
[1 + basePos 
+ x
] 
3132             basex 
= y 
* stride 
+ x
 
3137                 left 
= _get_pixel(basex 
- 3) 
3139                 up 
= _get_pixel(basex 
- stride
) 
3141             if filter_type 
== 1:  # Sub 
3142                 color 
= (color 
+ left
) & 0xff 
3143             elif filter_type 
== 2:  # Up 
3144                 color 
= (color 
+ up
) & 0xff 
3145             elif filter_type 
== 3:  # Average 
3146                 color 
= (color 
+ ((left 
+ up
) >> 1)) & 0xff 
3147             elif filter_type 
== 4:  # Paeth 
3153                     c 
= _get_pixel(basex 
- stride 
- 3) 
3161                 if pa 
<= pb 
and pa 
<= pc
: 
3162                     color 
= (color 
+ a
) & 0xff 
3164                     color 
= (color 
+ b
) & 0xff 
3166                     color 
= (color 
+ c
) & 0xff 
3168             current_row
.append(color
) 
3170     return width
, height
, pixels
 
3173 def write_xattr(path
, key
, value
): 
3174     # This mess below finds the best xattr tool for the job 
3176         # try the pyxattr module... 
3179         if hasattr(xattr
, 'set'):  # pyxattr 
3180             # Unicode arguments are not supported in python-pyxattr until 
3182             # See https://github.com/rg3/youtube-dl/issues/5498 
3183             pyxattr_required_version 
= '0.5.0' 
3184             if version_tuple(xattr
.__version
__) < version_tuple(pyxattr_required_version
): 
3185                 # TODO: fallback to CLI tools 
3186                 raise XAttrUnavailableError( 
3187                     'python-pyxattr is detected but is too old. ' 
3188                     'youtube-dl requires %s or above while your version is %s. ' 
3189                     'Falling back to other xattr implementations' % ( 
3190                         pyxattr_required_version
, xattr
.__version
__)) 
3192             setxattr 
= xattr
.set 
3194             setxattr 
= xattr
.setxattr
 
3197             setxattr(path
, key
, value
) 
3198         except EnvironmentError as e
: 
3199             raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3202         if compat_os_name 
== 'nt': 
3203             # Write xattrs to NTFS Alternate Data Streams: 
3204             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 
3205             assert ':' not in key
 
3206             assert os
.path
.exists(path
) 
3208             ads_fn 
= path 
+ ':' + key
 
3210                 with open(ads_fn
, 'wb') as f
: 
3212             except EnvironmentError as e
: 
3213                 raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3215             user_has_setfattr 
= check_executable('setfattr', ['--version']) 
3216             user_has_xattr 
= check_executable('xattr', ['-h']) 
3218             if user_has_setfattr 
or user_has_xattr
: 
3220                 value 
= value
.decode('utf-8') 
3221                 if user_has_setfattr
: 
3222                     executable 
= 'setfattr' 
3223                     opts 
= ['-n', key
, '-v', value
] 
3224                 elif user_has_xattr
: 
3225                     executable 
= 'xattr' 
3226                     opts 
= ['-w', key
, value
] 
3228                 cmd 
= ([encodeFilename(executable
, True)] + 
3229                        [encodeArgument(o
) for o 
in opts
] + 
3230                        [encodeFilename(path
, True)]) 
3233                     p 
= subprocess
.Popen( 
3234                         cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
) 
3235                 except EnvironmentError as e
: 
3236                     raise XAttrMetadataError(e
.errno
, e
.strerror
) 
3237                 stdout
, stderr 
= p
.communicate() 
3238                 stderr 
= stderr
.decode('utf-8', 'replace') 
3239                 if p
.returncode 
!= 0: 
3240                     raise XAttrMetadataError(p
.returncode
, stderr
) 
3243                 # On Unix, and can't find pyxattr, setfattr, or xattr. 
3244                 if sys
.platform
.startswith('linux'): 
3245                     raise XAttrUnavailableError( 
3246                         "Couldn't find a tool to set the xattrs. " 
3247                         "Install either the python 'pyxattr' or 'xattr' " 
3248                         "modules, or the GNU 'attr' package " 
3249                         "(which contains the 'setfattr' tool).") 
3251                     raise XAttrUnavailableError( 
3252                         "Couldn't find a tool to set the xattrs. " 
3253                         "Install either the python 'xattr' module, " 
3254                         "or the 'xattr' binary.")