2 # -*- coding: utf-8 -*- 
   4 from __future__ 
import unicode_literals
 
  33 import xml
.etree
.ElementTree
 
  40     compat_etree_fromstring
, 
  42     compat_html_entities_html5
, 
  47     compat_socket_create_connection
, 
  53     compat_urllib_parse_urlencode
, 
  54     compat_urllib_parse_urlparse
, 
  55     compat_urllib_parse_unquote_plus
, 
  56     compat_urllib_request
, 
  67 def register_socks_protocols(): 
  68     # "Register" SOCKS protocols 
  69     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 
  70     # URLs with protocols not in urlparse.uses_netloc are not handled correctly 
  71     for scheme 
in ('socks', 'socks4', 'socks4a', 'socks5'): 
  72         if scheme 
not in compat_urlparse
.uses_netloc
: 
  73             compat_urlparse
.uses_netloc
.append(scheme
) 
  76 # This is not clearly defined otherwise 
  77 compiled_regex_type 
= type(re
.compile('')) 
  80     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', 
  81     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  82     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  83     'Accept-Encoding': 'gzip, deflate', 
  84     'Accept-Language': 'en-us,en;q=0.5', 
  90 ENGLISH_MONTH_NAMES 
= [ 
  91     'January', 'February', 'March', 'April', 'May', 'June', 
  92     'July', 'August', 'September', 'October', 'November', 'December'] 
  95     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 
  96     'flv', 'f4v', 'f4a', 'f4b', 
  97     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 
 107     'f4f', 'f4m', 'm3u8', 'smil') 
 109 # needed for sanitizing filenames in restricted mode 
 110 ACCENT_CHARS 
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', 
 111                         itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], 
 112                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 
 128     '%Y-%m-%d %H:%M:%S.%f', 
 131     '%Y-%m-%dT%H:%M:%SZ', 
 132     '%Y-%m-%dT%H:%M:%S.%fZ', 
 133     '%Y-%m-%dT%H:%M:%S.%f0Z', 
 135     '%Y-%m-%dT%H:%M:%S.%f', 
 139 DATE_FORMATS_DAY_FIRST 
= list(DATE_FORMATS
) 
 140 DATE_FORMATS_DAY_FIRST
.extend([ 
 149 DATE_FORMATS_MONTH_FIRST 
= list(DATE_FORMATS
) 
 150 DATE_FORMATS_MONTH_FIRST
.extend([ 
 159 def preferredencoding(): 
 160     """Get preferred encoding. 
 162     Returns the best encoding scheme for the system, based on 
 163     locale.getpreferredencoding() and some further tweaks. 
 166         pref 
= locale
.getpreferredencoding() 
 174 def write_json_file(obj
, fn
): 
 175     """ Encode obj as JSON and write it to fn, atomically if possible """ 
 177     fn 
= encodeFilename(fn
) 
 178     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
 179         encoding 
= get_filesystem_encoding() 
 180         # os.path.basename returns a bytes object, but NamedTemporaryFile 
 181         # will fail if the filename contains non ascii characters unless we 
 182         # use a unicode object 
 183         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
 184         # the same for os.path.dirname 
 185         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
 187         path_basename 
= os
.path
.basename
 
 188         path_dirname 
= os
.path
.dirname
 
 192         'prefix': path_basename(fn
) + '.', 
 193         'dir': path_dirname(fn
), 
 197     # In Python 2.x, json.dump expects a bytestream. 
 198     # In Python 3.x, it writes to a character stream 
 199     if sys
.version_info 
< (3, 0): 
 207     tf 
= tempfile
.NamedTemporaryFile(**compat_kwargs(args
)) 
 212         if sys
.platform 
== 'win32': 
 213             # Need to remove existing file on Windows, else os.rename raises 
 214             # WindowsError or FileExistsError. 
 219         os
.rename(tf
.name
, fn
) 
 228 if sys
.version_info 
>= (2, 7): 
 229     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 230         """ Find the xpath xpath[@key=val] """ 
 231         assert re
.match(r
'^[a-zA-Z_-]+$', key
) 
 232         expr 
= xpath 
+ ('[@%s]' % key 
if val 
is None else "[@%s='%s']" % (key
, val
)) 
 233         return node
.find(expr
) 
 235     def find_xpath_attr(node
, xpath
, key
, val
=None): 
 236         for f 
in node
.findall(compat_xpath(xpath
)): 
 237             if key 
not in f
.attrib
: 
 239             if val 
is None or f
.attrib
.get(key
) == val
: 
 243 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 244 # the namespace parameter 
 247 def xpath_with_ns(path
, ns_map
): 
 248     components 
= [c
.split(':') for c 
in path
.split('/')] 
 252             replaced
.append(c
[0]) 
 255             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 256     return '/'.join(replaced
) 
 259 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 260     def _find_xpath(xpath
): 
 261         return node
.find(compat_xpath(xpath
)) 
 263     if isinstance(xpath
, (str, compat_str
)): 
 264         n 
= _find_xpath(xpath
) 
 272         if default 
is not NO_DEFAULT
: 
 275             name 
= xpath 
if name 
is None else name
 
 276             raise ExtractorError('Could not find XML element %s' % name
) 
 282 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 283     n 
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
) 
 284     if n 
is None or n 
== default
: 
 287         if default 
is not NO_DEFAULT
: 
 290             name 
= xpath 
if name 
is None else name
 
 291             raise ExtractorError('Could not find XML element\'s text %s' % name
) 
 297 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
): 
 298     n 
= find_xpath_attr(node
, xpath
, key
) 
 300         if default 
is not NO_DEFAULT
: 
 303             name 
= '%s[@%s]' % (xpath
, key
) if name 
is None else name
 
 304             raise ExtractorError('Could not find XML attribute %s' % name
) 
 310 def get_element_by_id(id, html
): 
 311     """Return the content of the tag with the specified ID in the passed HTML document""" 
 312     return get_element_by_attribute('id', id, html
) 
 315 def get_element_by_class(class_name
, html
): 
 316     return get_element_by_attribute( 
 317         'class', r
'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 
 318         html, escape_value=False) 
 321 def get_element_by_attribute(attribute, value, html, escape_value=True): 
 322     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 324     value = re.escape(value) if escape_value else value 
 326     m = re.search(r'''(?xs) 
 328          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^
']*'))*?
 
 330          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^
"]*"|
='[^']*'))*? 
 334     ''' % (re.escape(attribute), value), html) 
 338     res = m.group('content
') 
 340     if res.startswith('"') or res.startswith("'"): 
 343     return unescapeHTML(res) 
 346 class HTMLAttributeParser(compat_HTMLParser): 
 347     """Trivial HTML parser to gather the attributes for a single element""" 
 350         compat_HTMLParser.__init__(self) 
 352     def handle_starttag(self, tag, attrs): 
 353         self.attrs = dict(attrs) 
 356 def extract_attributes(html_element): 
 357     """Given a string for an HTML element such as 
 359          a="foo" B="bar" c="&98;az" d=boz 
 360          empty= noval entity="&" 
 363     Decode and return a dictionary of attributes. 
 365         'a
': 'foo
', 'b
': 'bar
', c: 'baz
', d: 'boz
', 
 366         'empty
': '', 'noval
': None, 'entity
': '&', 
 367         'sq
': '"', 'dq': '\'' 
 369     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 
 370     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 
 372     parser = HTMLAttributeParser() 
 373     parser.feed(html_element) 
 378 def clean_html(html): 
 379     """Clean an HTML snippet into a readable string""" 
 381     if html is None:  # Convenience for sanitizing descriptions etc. 
 385     html = html.replace('\n', ' ') 
 386     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) 
 387     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 
 389     html = re.sub('<.*?>', '', html) 
 390     # Replace html entities 
 391     html = unescapeHTML(html) 
 395 def sanitize_open(filename, open_mode): 
 396     """Try to open the given filename, and slightly tweak it if this fails. 
 398     Attempts to open the given filename. If this fails, it tries to change 
 399     the filename slightly, step by step, until it's either able to open it 
 400     or it fails and raises a final exception, like the standard open() 
 403     It returns the tuple (stream, definitive_file_name). 
 407             if sys.platform == 'win32': 
 409                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 
 410             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 
 411         stream = open(encodeFilename(filename), open_mode) 
 412         return (stream, filename) 
 413     except (IOError, OSError) as err: 
 414         if err.errno in (errno.EACCES,): 
 417         # In case of error, try to remove win32 forbidden chars 
 418         alt_filename = sanitize_path(filename) 
 419         if alt_filename == filename: 
 422             # An exception here should be caught in the caller 
 423             stream = open(encodeFilename(alt_filename), open_mode) 
 424             return (stream, alt_filename) 
 427 def timeconvert(timestr): 
 428     """Convert RFC 2822 defined time string into system timestamp""" 
 430     timetuple = email.utils.parsedate_tz(timestr) 
 431     if timetuple is not None: 
 432         timestamp = email.utils.mktime_tz(timetuple) 
 436 def sanitize_filename(s, restricted=False, is_id=False): 
 437     """Sanitizes a string so it could be used as part of a filename. 
 438     If restricted is set, use a stricter subset of allowed characters. 
 439     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible 
 441     def replace_insane(char): 
 442         if restricted and char in ACCENT_CHARS: 
 443             return ACCENT_CHARS[char] 
 444         if char == '?' or ord(char) < 32 or ord(char) == 127: 
 447             return '' if restricted else '\'' 
 449             return '_
-' if restricted else ' -' 
 450         elif char in '\\/|
*<>': 
 452         if restricted and (char in '!&\'()[]{}$
;`^
,#' or char.isspace()): 
 454         if restricted 
and ord(char
) > 127: 
 459     s 
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) 
 460     result 
= ''.join(map(replace_insane
, s
)) 
 462         while '__' in result
: 
 463             result 
= result
.replace('__', '_') 
 464         result 
= result
.strip('_') 
 465         # Common case of "Foreign band name - English song title" 
 466         if restricted 
and result
.startswith('-_'): 
 468         if result
.startswith('-'): 
 469             result 
= '_' + result
[len('-'):] 
 470         result 
= result
.lstrip('.') 
 476 def sanitize_path(s
): 
 477     """Sanitizes and normalizes path on Windows""" 
 478     if sys
.platform 
!= 'win32': 
 480     drive_or_unc
, _ 
= os
.path
.splitdrive(s
) 
 481     if sys
.version_info 
< (2, 7) and not drive_or_unc
: 
 482         drive_or_unc
, _ 
= os
.path
.splitunc(s
) 
 483     norm_path 
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
) 
 487         path_part 
if path_part 
in ['.', '..'] else re
.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part
) 
 488         for path_part 
in norm_path
] 
 490         sanitized_path
.insert(0, drive_or_unc 
+ os
.path
.sep
) 
 491     return os
.path
.join(*sanitized_path
) 
 494 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of 
 495 # unwanted failures due to missing protocol 
 496 def sanitize_url(url
): 
 497     return 'http:%s' % url 
if url
.startswith('//') else url
 
 500 def sanitized_Request(url
, *args
, **kwargs
): 
 501     return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
) 
 504 def orderedSet(iterable
): 
 505     """ Remove all duplicates from the input iterable """ 
 513 def _htmlentity_transform(entity_with_semicolon
): 
 514     """Transforms an HTML entity to a character.""" 
 515     entity 
= entity_with_semicolon
[:-1] 
 517     # Known non-numeric HTML entity 
 518     if entity 
in compat_html_entities
.name2codepoint
: 
 519         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 521     # TODO: HTML5 allows entities without a semicolon. For example, 
 522     # 'Éric' should be decoded as 'Éric'. 
 523     if entity_with_semicolon 
in compat_html_entities_html5
: 
 524         return compat_html_entities_html5
[entity_with_semicolon
] 
 526     mobj 
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
) 
 528         numstr 
= mobj
.group(1) 
 529         if numstr
.startswith('x'): 
 531             numstr 
= '0%s' % numstr
 
 534         # See https://github.com/rg3/youtube-dl/issues/7518 
 536             return compat_chr(int(numstr
, base
)) 
 540     # Unknown entity in name, return its literal representation 
 541     return '&%s;' % entity
 
 547     assert type(s
) == compat_str
 
 550         r
'&([^;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 553 def get_subprocess_encoding(): 
 554     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 555         # For subprocess calls, encode with locale encoding 
 556         # Refer to http://stackoverflow.com/a/9951851/35070 
 557         encoding 
= preferredencoding() 
 559         encoding 
= sys
.getfilesystemencoding() 
 565 def encodeFilename(s
, for_subprocess
=False): 
 567     @param s The name of the file 
 570     assert type(s
) == compat_str
 
 572     # Python 3 has a Unicode API 
 573     if sys
.version_info 
>= (3, 0): 
 576     # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 577     # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 578     # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 579     if not for_subprocess 
and sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 582     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 
 583     if sys
.platform
.startswith('java'): 
 586     return s
.encode(get_subprocess_encoding(), 'ignore') 
 589 def decodeFilename(b
, for_subprocess
=False): 
 591     if sys
.version_info 
>= (3, 0): 
 594     if not isinstance(b
, bytes): 
 597     return b
.decode(get_subprocess_encoding(), 'ignore') 
 600 def encodeArgument(s
): 
 601     if not isinstance(s
, compat_str
): 
 602         # Legacy code that uses byte strings 
 603         # Uncomment the following line after fixing all post processors 
 604         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 605         s 
= s
.decode('ascii') 
 606     return encodeFilename(s
, True) 
 609 def decodeArgument(b
): 
 610     return decodeFilename(b
, True) 
 613 def decodeOption(optval
): 
 616     if isinstance(optval
, bytes): 
 617         optval 
= optval
.decode(preferredencoding()) 
 619     assert isinstance(optval
, compat_str
) 
 623 def formatSeconds(secs
): 
 625         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 627         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 632 def make_HTTPS_handler(params
, **kwargs
): 
 633     opts_no_check_certificate 
= params
.get('nocheckcertificate', False) 
 634     if hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 or 2.7.9 
 635         context 
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
) 
 636         if opts_no_check_certificate
: 
 637             context
.check_hostname 
= False 
 638             context
.verify_mode 
= ssl
.CERT_NONE
 
 640             return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 643             # (create_default_context present but HTTPSHandler has no context=) 
 646     if sys
.version_info 
< (3, 2): 
 647         return YoutubeDLHTTPSHandler(params
, **kwargs
) 
 649         context 
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
) 
 650         context
.verify_mode 
= (ssl
.CERT_NONE
 
 651                                if opts_no_check_certificate
 
 652                                else ssl
.CERT_REQUIRED
) 
 653         context
.set_default_verify_paths() 
 654         return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
) 
 657 def bug_reports_message(): 
 658     if ytdl_is_updateable(): 
 659         update_cmd 
= 'type  youtube-dl -U  to update' 
 661         update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 662     msg 
= '; please report this issue on https://yt-dl.org/bug .' 
 663     msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 664     msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 668 class ExtractorError(Exception): 
 669     """Error during info extraction.""" 
 671     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 672         """ tb, if given, is the original traceback (so that it can be printed out). 
 673         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 676         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 678         if video_id 
is not None: 
 679             msg 
= video_id 
+ ': ' + msg
 
 681             msg 
+= ' (caused by %r)' % cause
 
 683             msg 
+= bug_reports_message() 
 684         super(ExtractorError
, self
).__init
__(msg
) 
 687         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 689         self
.video_id 
= video_id
 
 691     def format_traceback(self
): 
 692         if self
.traceback 
is None: 
 694         return ''.join(traceback
.format_tb(self
.traceback
)) 
 697 class UnsupportedError(ExtractorError
): 
 698     def __init__(self
, url
): 
 699         super(UnsupportedError
, self
).__init
__( 
 700             'Unsupported URL: %s' % url
, expected
=True) 
 704 class RegexNotFoundError(ExtractorError
): 
 705     """Error when a regex didn't match""" 
 709 class DownloadError(Exception): 
 710     """Download Error exception. 
 712     This exception may be thrown by FileDownloader objects if they are not 
 713     configured to continue on errors. They will contain the appropriate 
 717     def __init__(self
, msg
, exc_info
=None): 
 718         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 719         super(DownloadError
, self
).__init
__(msg
) 
 720         self
.exc_info 
= exc_info
 
 723 class SameFileError(Exception): 
 724     """Same File exception. 
 726     This exception will be thrown by FileDownloader objects if they detect 
 727     multiple files would have to be downloaded to the same file on disk. 
 732 class PostProcessingError(Exception): 
 733     """Post Processing exception. 
 735     This exception may be raised by PostProcessor's .run() method to 
 736     indicate an error in the postprocessing task. 
 739     def __init__(self
, msg
): 
 743 class MaxDownloadsReached(Exception): 
 744     """ --max-downloads limit has been reached. """ 
 748 class UnavailableVideoError(Exception): 
 749     """Unavailable Format exception. 
 751     This exception will be thrown when a video is requested 
 752     in a format that is not available for that video. 
 757 class ContentTooShortError(Exception): 
 758     """Content Too Short exception. 
 760     This exception may be raised by FileDownloader objects when a file they 
 761     download is too small for what the server announced first, indicating 
 762     the connection was probably interrupted. 
 765     def __init__(self
, downloaded
, expected
): 
 767         self
.downloaded 
= downloaded
 
 768         self
.expected 
= expected
 
 771 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
): 
 772     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 
 773     # expected HTTP responses to meet HTTP/1.0 or later (see also 
 774     # https://github.com/rg3/youtube-dl/issues/6727) 
 775     if sys
.version_info 
< (3, 0): 
 776         kwargs
[b
'strict'] = True 
 777     hc 
= http_class(*args
, **kwargs
) 
 778     source_address 
= ydl_handler
._params
.get('source_address') 
 779     if source_address 
is not None: 
 780         sa 
= (source_address
, 0) 
 781         if hasattr(hc
, 'source_address'):  # Python 2.7+ 
 782             hc
.source_address 
= sa
 
 784             def _hc_connect(self
, *args
, **kwargs
): 
 785                 sock 
= compat_socket_create_connection( 
 786                     (self
.host
, self
.port
), self
.timeout
, sa
) 
 788                     self
.sock 
= ssl
.wrap_socket( 
 789                         sock
, self
.key_file
, self
.cert_file
, 
 790                         ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 793             hc
.connect 
= functools
.partial(_hc_connect
, hc
) 
 798 def handle_youtubedl_headers(headers
): 
 799     filtered_headers 
= headers
 
 801     if 'Youtubedl-no-compression' in filtered_headers
: 
 802         filtered_headers 
= dict((k
, v
) for k
, v 
in filtered_headers
.items() if k
.lower() != 'accept-encoding') 
 803         del filtered_headers
['Youtubedl-no-compression'] 
 805     return filtered_headers
 
 808 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 809     """Handler for HTTP requests and responses. 
 811     This class, when installed with an OpenerDirector, automatically adds 
 812     the standard headers to every HTTP request and handles gzipped and 
 813     deflated responses from web servers. If compression is to be avoided in 
 814     a particular request, the original request in the program code only has 
 815     to include the HTTP header "Youtubedl-no-compression", which will be 
 816     removed before making the real request. 
 818     Part of this code was copied from: 
 820     http://techknack.net/python-urllib2-handlers/ 
 822     Andrew Rowls, the author of that code, agreed to release it to the 
 826     def __init__(self
, params
, *args
, **kwargs
): 
 827         compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
) 
 828         self
._params 
= params
 
 830     def http_open(self
, req
): 
 831         conn_class 
= compat_http_client
.HTTPConnection
 
 833         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
 835             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
 836             del req
.headers
['Ytdl-socks-proxy'] 
 838         return self
.do_open(functools
.partial( 
 839             _create_http_connection
, self
, conn_class
, False), 
 845             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 847             return zlib
.decompress(data
) 
 850     def addinfourl_wrapper(stream
, headers
, url
, code
): 
 851         if hasattr(compat_urllib_request
.addinfourl
, 'getcode'): 
 852             return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
) 
 853         ret 
= compat_urllib_request
.addinfourl(stream
, headers
, url
) 
 857     def http_request(self
, req
): 
 858         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 
 859         # always respected by websites, some tend to give out URLs with non percent-encoded 
 860         # non-ASCII characters (see telemb.py, ard.py [#3412]) 
 861         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 
 862         # To work around aforementioned issue we will replace request's original URL with 
 863         # percent-encoded one 
 864         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 
 865         # the code of this workaround has been moved here from YoutubeDL.urlopen() 
 866         url 
= req
.get_full_url() 
 867         url_escaped 
= escape_url(url
) 
 869         # Substitute URL if any change after escaping 
 870         if url 
!= url_escaped
: 
 871             req 
= update_Request(req
, url
=url_escaped
) 
 873         for h
, v 
in std_headers
.items(): 
 874             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 
 875             # The dict keys are capitalized because of this bug by urllib 
 876             if h
.capitalize() not in req
.headers
: 
 879         req
.headers 
= handle_youtubedl_headers(req
.headers
) 
 881         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 882             # Python 2.6 is brain-dead when it comes to fragments 
 883             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 884             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 888     def http_response(self
, req
, resp
): 
 891         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 892             content 
= resp
.read() 
 893             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 895                 uncompressed 
= io
.BytesIO(gz
.read()) 
 896             except IOError as original_ioerror
: 
 897                 # There may be junk add the end of the file 
 898                 # See http://stackoverflow.com/q/4928560/35070 for details 
 899                 for i 
in range(1, 1024): 
 901                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 902                         uncompressed 
= io
.BytesIO(gz
.read()) 
 907                     raise original_ioerror
 
 908             resp 
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 909             resp
.msg 
= old_resp
.msg
 
 910             del resp
.headers
['Content-encoding'] 
 912         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 913             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
 914             resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 915             resp
.msg 
= old_resp
.msg
 
 916             del resp
.headers
['Content-encoding'] 
 917         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 
 918         # https://github.com/rg3/youtube-dl/issues/6457). 
 919         if 300 <= resp
.code 
< 400: 
 920             location 
= resp
.headers
.get('Location') 
 922                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 
 923                 if sys
.version_info 
>= (3, 0): 
 924                     location 
= location
.encode('iso-8859-1').decode('utf-8') 
 926                     location 
= location
.decode('utf-8') 
 927                 location_escaped 
= escape_url(location
) 
 928                 if location 
!= location_escaped
: 
 929                     del resp
.headers
['Location'] 
 930                     if sys
.version_info 
< (3, 0): 
 931                         location_escaped 
= location_escaped
.encode('utf-8') 
 932                     resp
.headers
['Location'] = location_escaped
 
 935     https_request 
= http_request
 
 936     https_response 
= http_response
 
 939 def make_socks_conn_class(base_class
, socks_proxy
): 
 940     assert issubclass(base_class
, ( 
 941         compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
)) 
 943     url_components 
= compat_urlparse
.urlparse(socks_proxy
) 
 944     if url_components
.scheme
.lower() == 'socks5': 
 945         socks_type 
= ProxyType
.SOCKS5
 
 946     elif url_components
.scheme
.lower() in ('socks', 'socks4'): 
 947         socks_type 
= ProxyType
.SOCKS4
 
 948     elif url_components
.scheme
.lower() == 'socks4a': 
 949         socks_type 
= ProxyType
.SOCKS4A
 
 951     def unquote_if_non_empty(s
): 
 954         return compat_urllib_parse_unquote_plus(s
) 
 958         url_components
.hostname
, url_components
.port 
or 1080, 
 960         unquote_if_non_empty(url_components
.username
), 
 961         unquote_if_non_empty(url_components
.password
), 
 964     class SocksConnection(base_class
): 
 966             self
.sock 
= sockssocket() 
 967             self
.sock
.setproxy(*proxy_args
) 
 968             if type(self
.timeout
) in (int, float): 
 969                 self
.sock
.settimeout(self
.timeout
) 
 970             self
.sock
.connect((self
.host
, self
.port
)) 
 972             if isinstance(self
, compat_http_client
.HTTPSConnection
): 
 973                 if hasattr(self
, '_context'):  # Python > 2.6 
 974                     self
.sock 
= self
._context
.wrap_socket( 
 975                         self
.sock
, server_hostname
=self
.host
) 
 977                     self
.sock 
= ssl
.wrap_socket(self
.sock
) 
 979     return SocksConnection
 
 982 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
): 
 983     def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
): 
 984         compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
) 
 985         self
._https
_conn
_class 
= https_conn_class 
or compat_http_client
.HTTPSConnection
 
 986         self
._params 
= params
 
 988     def https_open(self
, req
): 
 990         conn_class 
= self
._https
_conn
_class
 
 992         if hasattr(self
, '_context'):  # python > 2.6 
 993             kwargs
['context'] = self
._context
 
 994         if hasattr(self
, '_check_hostname'):  # python 3.x 
 995             kwargs
['check_hostname'] = self
._check
_hostname
 
 997         socks_proxy 
= req
.headers
.get('Ytdl-socks-proxy') 
 999             conn_class 
= make_socks_conn_class(conn_class
, socks_proxy
) 
1000             del req
.headers
['Ytdl-socks-proxy'] 
1002         return self
.do_open(functools
.partial( 
1003             _create_http_connection
, self
, conn_class
, True), 
1007 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
): 
1008     def __init__(self
, cookiejar
=None): 
1009         compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
) 
1011     def http_response(self
, request
, response
): 
1012         # Python 2 will choke on next HTTP request in row if there are non-ASCII 
1013         # characters in Set-Cookie HTTP header of last response (see 
1014         # https://github.com/rg3/youtube-dl/issues/6769). 
1015         # In order to at least prevent crashing we will percent encode Set-Cookie 
1016         # header before HTTPCookieProcessor starts processing it. 
1017         # if sys.version_info < (3, 0) and response.headers: 
1018         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 
1019         #         set_cookie = response.headers.get(set_cookie_header) 
1021         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 
1022         #             if set_cookie != set_cookie_escaped: 
1023         #                 del response.headers[set_cookie_header] 
1024         #                 response.headers[set_cookie_header] = set_cookie_escaped 
1025         return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
) 
1027     https_request 
= compat_urllib_request
.HTTPCookieProcessor
.http_request
 
1028     https_response 
= http_response
 
1031 def extract_timezone(date_str
): 
1033         r
'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
1036         timezone 
= datetime
.timedelta() 
1038         date_str 
= date_str
[:-len(m
.group('tz'))] 
1039         if not m
.group('sign'): 
1040             timezone 
= datetime
.timedelta() 
1042             sign 
= 1 if m
.group('sign') == '+' else -1 
1043             timezone 
= datetime
.timedelta( 
1044                 hours
=sign 
* int(m
.group('hours')), 
1045                 minutes
=sign 
* int(m
.group('minutes'))) 
1046     return timezone
, date_str
 
1049 def parse_iso8601(date_str
, delimiter
='T', timezone
=None): 
1050     """ Return a UNIX timestamp from the given date """ 
1052     if date_str 
is None: 
1055     date_str 
= re
.sub(r
'\.[0-9]+', '', date_str
) 
1057     if timezone 
is None: 
1058         timezone
, date_str 
= extract_timezone(date_str
) 
1061         date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
1062         dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
1063         return calendar
.timegm(dt
.timetuple()) 
1068 def date_formats(day_first
=True): 
1069     return DATE_FORMATS_DAY_FIRST 
if day_first 
else DATE_FORMATS_MONTH_FIRST
 
1072 def unified_strdate(date_str
, day_first
=True): 
1073     """Return a string with the date in the format YYYYMMDD""" 
1075     if date_str 
is None: 
1079     date_str 
= date_str
.replace(',', ' ') 
1080     # Remove AM/PM + timezone 
1081     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1082     _
, date_str 
= extract_timezone(date_str
) 
1084     for expression 
in date_formats(day_first
): 
1086             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
1089     if upload_date 
is None: 
1090         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1093                 upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
1096     if upload_date 
is not None: 
1097         return compat_str(upload_date
) 
1100 def unified_timestamp(date_str
, day_first
=True): 
1101     if date_str 
is None: 
1104     date_str 
= date_str
.replace(',', ' ') 
1106     pm_delta 
= 12 if re
.search(r
'(?i)PM', date_str
) else 0 
1107     timezone
, date_str 
= extract_timezone(date_str
) 
1109     # Remove AM/PM + timezone 
1110     date_str 
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
) 
1112     for expression 
in date_formats(day_first
): 
1114             dt 
= datetime
.datetime
.strptime(date_str
, expression
) - timezone 
+ datetime
.timedelta(hours
=pm_delta
) 
1115             return calendar
.timegm(dt
.timetuple()) 
1118     timetuple 
= email
.utils
.parsedate_tz(date_str
) 
1120         return calendar
.timegm(timetuple
) + pm_delta 
* 3600 
1123 def determine_ext(url
, default_ext
='unknown_video'): 
1126     guess 
= url
.partition('?')[0].rpartition('.')[2] 
1127     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
1129     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 
1130     elif guess
.rstrip('/') in KNOWN_EXTENSIONS
: 
1131         return guess
.rstrip('/') 
1136 def subtitles_filename(filename
, sub_lang
, sub_format
): 
1137     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
1140 def date_from_str(date_str
): 
1142     Return a datetime object from a string in the format YYYYMMDD or 
1143     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
1144     today 
= datetime
.date
.today() 
1145     if date_str 
in ('now', 'today'): 
1147     if date_str 
== 'yesterday': 
1148         return today 
- datetime
.timedelta(days
=1) 
1149     match 
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
1150     if match 
is not None: 
1151         sign 
= match
.group('sign') 
1152         time 
= int(match
.group('time')) 
1155         unit 
= match
.group('unit') 
1156         # A bad approximation? 
1160         elif unit 
== 'year': 
1164         delta 
= datetime
.timedelta(**{unit
: time
}) 
1165         return today 
+ delta
 
1166     return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date() 
1169 def hyphenate_date(date_str
): 
1171     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
1172     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
1173     if match 
is not None: 
1174         return '-'.join(match
.groups()) 
1179 class DateRange(object): 
1180     """Represents a time interval between two dates""" 
1182     def __init__(self
, start
=None, end
=None): 
1183         """start and end must be strings in the format accepted by date""" 
1184         if start 
is not None: 
1185             self
.start 
= date_from_str(start
) 
1187             self
.start 
= datetime
.datetime
.min.date() 
1189             self
.end 
= date_from_str(end
) 
1191             self
.end 
= datetime
.datetime
.max.date() 
1192         if self
.start 
> self
.end
: 
1193             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
1197         """Returns a range that only contains the given day""" 
1198         return cls(day
, day
) 
1200     def __contains__(self
, date
): 
1201         """Check if the date is in the range""" 
1202         if not isinstance(date
, datetime
.date
): 
1203             date 
= date_from_str(date
) 
1204         return self
.start 
<= date 
<= self
.end
 
1207         return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat()) 
1210 def platform_name(): 
1211     """ Returns the platform name as a compat_str """ 
1212     res 
= platform
.platform() 
1213     if isinstance(res
, bytes): 
1214         res 
= res
.decode(preferredencoding()) 
1216     assert isinstance(res
, compat_str
) 
1220 def _windows_write_string(s
, out
): 
1221     """ Returns True if the string was written using special methods, 
1222     False if it has yet to be written out.""" 
1223     # Adapted from http://stackoverflow.com/a/3259271/35070 
1226     import ctypes
.wintypes
 
1234         fileno 
= out
.fileno() 
1235     except AttributeError: 
1236         # If the output stream doesn't have a fileno, it's virtual 
1238     except io
.UnsupportedOperation
: 
1239         # Some strange Windows pseudo files? 
1241     if fileno 
not in WIN_OUTPUT_IDS
: 
1244     GetStdHandle 
= ctypes
.WINFUNCTYPE( 
1245         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
1246         (b
'GetStdHandle', ctypes
.windll
.kernel32
)) 
1247     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
1249     WriteConsoleW 
= ctypes
.WINFUNCTYPE( 
1250         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
1251         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
1252         ctypes
.wintypes
.LPVOID
)((b
'WriteConsoleW', ctypes
.windll
.kernel32
)) 
1253     written 
= ctypes
.wintypes
.DWORD(0) 
1255     GetFileType 
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
'GetFileType', ctypes
.windll
.kernel32
)) 
1256     FILE_TYPE_CHAR 
= 0x0002 
1257     FILE_TYPE_REMOTE 
= 0x8000 
1258     GetConsoleMode 
= ctypes
.WINFUNCTYPE( 
1259         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
1260         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
1261         (b
'GetConsoleMode', ctypes
.windll
.kernel32
)) 
1262     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
1264     def not_a_console(handle
): 
1265         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
1267         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR 
or 
1268                 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
1270     if not_a_console(h
): 
1273     def next_nonbmp_pos(s
): 
1275             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
1276         except StopIteration: 
1280         count 
= min(next_nonbmp_pos(s
), 1024) 
1282         ret 
= WriteConsoleW( 
1283             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
1285             raise OSError('Failed to write string') 
1286         if not count
:  # We just wrote a non-BMP character 
1287             assert written
.value 
== 2 
1290             assert written
.value 
> 0 
1291             s 
= s
[written
.value
:] 
1295 def write_string(s
, out
=None, encoding
=None): 
1298     assert type(s
) == compat_str
 
1300     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
1301         if _windows_write_string(s
, out
): 
1304     if ('b' in getattr(out
, 'mode', '') or 
1305             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
1306         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
1308     elif hasattr(out
, 'buffer'): 
1309         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
1310         byt 
= s
.encode(enc
, 'ignore') 
1311         out
.buffer.write(byt
) 
1317 def bytes_to_intlist(bs
): 
1320     if isinstance(bs
[0], int):  # Python 3 
1323         return [ord(c
) for c 
in bs
] 
1326 def intlist_to_bytes(xs
): 
1329     return compat_struct_pack('%dB' % len(xs
), *xs
) 
1332 # Cross-platform file locking 
1333 if sys
.platform 
== 'win32': 
1334     import ctypes
.wintypes
 
1337     class OVERLAPPED(ctypes
.Structure
): 
1339             ('Internal', ctypes
.wintypes
.LPVOID
), 
1340             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
1341             ('Offset', ctypes
.wintypes
.DWORD
), 
1342             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
1343             ('hEvent', ctypes
.wintypes
.HANDLE
), 
1346     kernel32 
= ctypes
.windll
.kernel32
 
1347     LockFileEx 
= kernel32
.LockFileEx
 
1348     LockFileEx
.argtypes 
= [ 
1349         ctypes
.wintypes
.HANDLE
,     # hFile 
1350         ctypes
.wintypes
.DWORD
,      # dwFlags 
1351         ctypes
.wintypes
.DWORD
,      # dwReserved 
1352         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1353         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1354         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1356     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1357     UnlockFileEx 
= kernel32
.UnlockFileEx
 
1358     UnlockFileEx
.argtypes 
= [ 
1359         ctypes
.wintypes
.HANDLE
,     # hFile 
1360         ctypes
.wintypes
.DWORD
,      # dwReserved 
1361         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
1362         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
1363         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
1365     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
1366     whole_low 
= 0xffffffff 
1367     whole_high 
= 0x7fffffff 
1369     def _lock_file(f
, exclusive
): 
1370         overlapped 
= OVERLAPPED() 
1371         overlapped
.Offset 
= 0 
1372         overlapped
.OffsetHigh 
= 0 
1373         overlapped
.hEvent 
= 0 
1374         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
1375         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1376         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
1377                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1378             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
1380     def _unlock_file(f
): 
1381         assert f
._lock
_file
_overlapped
_p
 
1382         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
1383         if not UnlockFileEx(handle
, 0, 
1384                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
1385             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
1388     # Some platforms, such as Jython, is missing fcntl 
1392         def _lock_file(f
, exclusive
): 
1393             fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
1395         def _unlock_file(f
): 
1396             fcntl
.flock(f
, fcntl
.LOCK_UN
) 
1398         UNSUPPORTED_MSG 
= 'file locking is not supported on this platform' 
1400         def _lock_file(f
, exclusive
): 
1401             raise IOError(UNSUPPORTED_MSG
) 
1403         def _unlock_file(f
): 
1404             raise IOError(UNSUPPORTED_MSG
) 
1407 class locked_file(object): 
1408     def __init__(self
, filename
, mode
, encoding
=None): 
1409         assert mode 
in ['r', 'a', 'w'] 
1410         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
1413     def __enter__(self
): 
1414         exclusive 
= self
.mode 
!= 'r' 
1416             _lock_file(self
.f
, exclusive
) 
1422     def __exit__(self
, etype
, value
, traceback
): 
1424             _unlock_file(self
.f
) 
1431     def write(self
, *args
): 
1432         return self
.f
.write(*args
) 
1434     def read(self
, *args
): 
1435         return self
.f
.read(*args
) 
1438 def get_filesystem_encoding(): 
1439     encoding 
= sys
.getfilesystemencoding() 
1440     return encoding 
if encoding 
is not None else 'utf-8' 
1443 def shell_quote(args
): 
1445     encoding 
= get_filesystem_encoding() 
1447         if isinstance(a
, bytes): 
1448             # We may get a filename encoded with 'encodeFilename' 
1449             a 
= a
.decode(encoding
) 
1450         quoted_args
.append(pipes
.quote(a
)) 
1451     return ' '.join(quoted_args
) 
1454 def smuggle_url(url
, data
): 
1455     """ Pass additional data in a URL for internal use. """ 
1457     url
, idata 
= unsmuggle_url(url
, {}) 
1459     sdata 
= compat_urllib_parse_urlencode( 
1460         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1461     return url 
+ '#' + sdata
 
1464 def unsmuggle_url(smug_url
, default
=None): 
1465     if '#__youtubedl_smuggle' not in smug_url
: 
1466         return smug_url
, default
 
1467     url
, _
, sdata 
= smug_url
.rpartition('#') 
1468     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1469     data 
= json
.loads(jsond
) 
1473 def format_bytes(bytes): 
1476     if type(bytes) is str: 
1477         bytes = float(bytes) 
1481         exponent 
= int(math
.log(bytes, 1024.0)) 
1482     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1483     converted 
= float(bytes) / float(1024 ** exponent
) 
1484     return '%.2f%s' % (converted
, suffix
) 
1487 def lookup_unit_table(unit_table
, s
): 
1488     units_re 
= '|'.join(re
.escape(u
) for u 
in unit_table
) 
1490         r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
) 
1493     num_str 
= m
.group('num').replace(',', '.') 
1494     mult 
= unit_table
[m
.group('unit')] 
1495     return int(float(num_str
) * mult
) 
1498 def parse_filesize(s
): 
1502     # The lower-case forms are of course incorrect and unofficial, 
1503     # but we support those too 
1541     return lookup_unit_table(_UNIT_TABLE
, s
) 
1550     if re
.match(r
'^[\d,.]+$', s
): 
1551         return str_to_int(s
) 
1562     return lookup_unit_table(_UNIT_TABLE
, s
) 
1565 def month_by_name(name
): 
1566     """ Return the number of a month by (locale-independently) English name """ 
1569         return ENGLISH_MONTH_NAMES
.index(name
) + 1 
1574 def month_by_abbreviation(abbrev
): 
1575     """ Return the number of a month by (locale-independently) English 
1579         return [s
[:3] for s 
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1 
1584 def fix_xml_ampersands(xml_str
): 
1585     """Replace all the '&' by '&' in XML""" 
1587         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1592 def setproctitle(title
): 
1593     assert isinstance(title
, compat_str
) 
1595     # ctypes in Jython is not complete 
1596     # http://bugs.jython.org/issue2148 
1597     if sys
.platform
.startswith('java'): 
1601         libc 
= ctypes
.cdll
.LoadLibrary('libc.so.6') 
1604     title_bytes 
= title
.encode('utf-8') 
1605     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1606     buf
.value 
= title_bytes
 
1608         libc
.prctl(15, buf
, 0, 0, 0) 
1609     except AttributeError: 
1610         return  # Strange libc, just skip this 
1613 def remove_start(s
, start
): 
1614     return s
[len(start
):] if s 
is not None and s
.startswith(start
) else s
 
1617 def remove_end(s
, end
): 
1618     return s
[:-len(end
)] if s 
is not None and s
.endswith(end
) else s
 
1621 def remove_quotes(s
): 
1622     if s 
is None or len(s
) < 2: 
1624     for quote 
in ('"', "'", ): 
1625         if s
[0] == quote 
and s
[-1] == quote
: 
1630 def url_basename(url
): 
1631     path 
= compat_urlparse
.urlparse(url
).path
 
1632     return path
.strip('/').split('/')[-1] 
1635 class HEADRequest(compat_urllib_request
.Request
): 
1636     def get_method(self
): 
1640 class PUTRequest(compat_urllib_request
.Request
): 
1641     def get_method(self
): 
1645 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1648             v 
= getattr(v
, get_attr
, None) 
1654         return int(v
) * invscale 
// scale
 
1659 def str_or_none(v
, default
=None): 
1660     return default 
if v 
is None else compat_str(v
) 
1663 def str_to_int(int_str
): 
1664     """ A more relaxed version of int_or_none """ 
1667     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1671 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1675         return float(v
) * invscale 
/ scale
 
1680 def strip_or_none(v
): 
1681     return None if v 
is None else v
.strip() 
1684 def parse_duration(s
): 
1685     if not isinstance(s
, compat_basestring
): 
1690     days
, hours
, mins
, secs
, ms 
= [None] * 5 
1691     m 
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?$', s
) 
1693         days
, hours
, mins
, secs
, ms 
= m
.groups() 
1698                     (?P<days>[0-9]+)\s*d(?:ays?)?\s* 
1701                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 
1704                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 
1707                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 
1710             days
, hours
, mins
, secs
, ms 
= m
.groups() 
1712             m 
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s
) 
1714                 hours
, mins 
= m
.groups() 
1720         duration 
+= float(secs
) 
1722         duration 
+= float(mins
) * 60 
1724         duration 
+= float(hours
) * 60 * 60 
1726         duration 
+= float(days
) * 24 * 60 * 60 
1728         duration 
+= float(ms
) 
1732 def prepend_extension(filename
, ext
, expected_real_ext
=None): 
1733     name
, real_ext 
= os
.path
.splitext(filename
) 
1735         '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1736         if not expected_real_ext 
or real_ext
[1:] == expected_real_ext
 
1737         else '{0}.{1}'.format(filename
, ext
)) 
1740 def replace_extension(filename
, ext
, expected_real_ext
=None): 
1741     name
, real_ext 
= os
.path
.splitext(filename
) 
1742     return '{0}.{1}'.format( 
1743         name 
if not expected_real_ext 
or real_ext
[1:] == expected_real_ext 
else filename
, 
1747 def check_executable(exe
, args
=[]): 
1748     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1749     args can be a list of arguments for a short output (like -version) """ 
1751         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1757 def get_exe_version(exe
, args
=['--version'], 
1758                     version_re
=None, unrecognized
='present'): 
1759     """ Returns the version of the specified executable, 
1760     or False if the executable is not present """ 
1762         out
, _ 
= subprocess
.Popen( 
1763             [encodeArgument(exe
)] + args
, 
1764             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1767     if isinstance(out
, bytes):  # Python 2.x 
1768         out 
= out
.decode('ascii', 'ignore') 
1769     return detect_exe_version(out
, version_re
, unrecognized
) 
1772 def detect_exe_version(output
, version_re
=None, unrecognized
='present'): 
1773     assert isinstance(output
, compat_str
) 
1774     if version_re 
is None: 
1775         version_re 
= r
'version\s+([-0-9._a-zA-Z]+)' 
1776     m 
= re
.search(version_re
, output
) 
1783 class PagedList(object): 
1785         # This is only useful for tests 
1786         return len(self
.getslice()) 
1789 class OnDemandPagedList(PagedList
): 
1790     def __init__(self
, pagefunc
, pagesize
, use_cache
=False): 
1791         self
._pagefunc 
= pagefunc
 
1792         self
._pagesize 
= pagesize
 
1793         self
._use
_cache 
= use_cache
 
1797     def getslice(self
, start
=0, end
=None): 
1799         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1800             firstid 
= pagenum 
* self
._pagesize
 
1801             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
1802             if start 
>= nextfirstid
: 
1807                 page_results 
= self
._cache
.get(pagenum
) 
1808             if page_results 
is None: 
1809                 page_results 
= list(self
._pagefunc
(pagenum
)) 
1811                 self
._cache
[pagenum
] = page_results
 
1814                 start 
% self
._pagesize
 
1815                 if firstid 
<= start 
< nextfirstid
 
1819                 ((end 
- 1) % self
._pagesize
) + 1 
1820                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
1823             if startv 
!= 0 or endv 
is not None: 
1824                 page_results 
= page_results
[startv
:endv
] 
1825             res
.extend(page_results
) 
1827             # A little optimization - if current page is not "full", ie. does 
1828             # not contain page_size videos then we can assume that this page 
1829             # is the last one - there are no more ids on further pages - 
1830             # i.e. no need to query again. 
1831             if len(page_results
) + startv 
< self
._pagesize
: 
1834             # If we got the whole page, but the next page is not interesting, 
1835             # break out early as well 
1836             if end 
== nextfirstid
: 
1841 class InAdvancePagedList(PagedList
): 
1842     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
1843         self
._pagefunc 
= pagefunc
 
1844         self
._pagecount 
= pagecount
 
1845         self
._pagesize 
= pagesize
 
1847     def getslice(self
, start
=0, end
=None): 
1849         start_page 
= start 
// self
._pagesize
 
1851             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
1852         skip_elems 
= start 
- start_page 
* self
._pagesize
 
1853         only_more 
= None if end 
is None else end 
- start
 
1854         for pagenum 
in range(start_page
, end_page
): 
1855             page 
= list(self
._pagefunc
(pagenum
)) 
1857                 page 
= page
[skip_elems
:] 
1859             if only_more 
is not None: 
1860                 if len(page
) < only_more
: 
1861                     only_more 
-= len(page
) 
1863                     page 
= page
[:only_more
] 
1870 def uppercase_escape(s
): 
1871     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
1873         r
'\\U[0-9a-fA-F]{8}', 
1874         lambda m
: unicode_escape(m
.group(0))[0], 
1878 def lowercase_escape(s
): 
1879     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
1881         r
'\\u[0-9a-fA-F]{4}', 
1882         lambda m
: unicode_escape(m
.group(0))[0], 
1886 def escape_rfc3986(s
): 
1887     """Escape non-ASCII characters as suggested by RFC 3986""" 
1888     if sys
.version_info 
< (3, 0) and isinstance(s
, compat_str
): 
1889         s 
= s
.encode('utf-8') 
1890     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
1893 def escape_url(url
): 
1894     """Escape URL as suggested by RFC 3986""" 
1895     url_parsed 
= compat_urllib_parse_urlparse(url
) 
1896     return url_parsed
._replace
( 
1897         netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'), 
1898         path
=escape_rfc3986(url_parsed
.path
), 
1899         params
=escape_rfc3986(url_parsed
.params
), 
1900         query
=escape_rfc3986(url_parsed
.query
), 
1901         fragment
=escape_rfc3986(url_parsed
.fragment
) 
1905 def read_batch_urls(batch_fd
): 
1907         if not isinstance(url
, compat_str
): 
1908             url 
= url
.decode('utf-8', 'replace') 
1909         BOM_UTF8 
= '\xef\xbb\xbf' 
1910         if url
.startswith(BOM_UTF8
): 
1911             url 
= url
[len(BOM_UTF8
):] 
1913         if url
.startswith(('#', ';', ']')): 
1917     with contextlib
.closing(batch_fd
) as fd
: 
1918         return [url 
for url 
in map(fixup
, fd
) if url
] 
1921 def urlencode_postdata(*args
, **kargs
): 
1922     return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii') 
1925 def update_url_query(url
, query
): 
1928     parsed_url 
= compat_urlparse
.urlparse(url
) 
1929     qs 
= compat_parse_qs(parsed_url
.query
) 
1931     return compat_urlparse
.urlunparse(parsed_url
._replace
( 
1932         query
=compat_urllib_parse_urlencode(qs
, True))) 
1935 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}): 
1936     req_headers 
= req
.headers
.copy() 
1937     req_headers
.update(headers
) 
1938     req_data 
= data 
or req
.data
 
1939     req_url 
= update_url_query(url 
or req
.get_full_url(), query
) 
1940     req_get_method 
= req
.get_method() 
1941     if req_get_method 
== 'HEAD': 
1942         req_type 
= HEADRequest
 
1943     elif req_get_method 
== 'PUT': 
1944         req_type 
= PUTRequest
 
1946         req_type 
= compat_urllib_request
.Request
 
1948         req_url
, data
=req_data
, headers
=req_headers
, 
1949         origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
) 
1950     if hasattr(req
, 'timeout'): 
1951         new_req
.timeout 
= req
.timeout
 
1955 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True): 
1956     if isinstance(key_or_keys
, (list, tuple)): 
1957         for key 
in key_or_keys
: 
1958             if key 
not in d 
or d
[key
] is None or skip_false_values 
and not d
[key
]: 
1962     return d
.get(key_or_keys
, default
) 
1965 def try_get(src
, getter
, expected_type
=None): 
1968     except (AttributeError, KeyError, TypeError, IndexError): 
1971         if expected_type 
is None or isinstance(v
, expected_type
): 
1975 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'): 
1976     return string 
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
) 
1988 TV_PARENTAL_GUIDELINES 
= { 
1998 def parse_age_limit(s
): 
2000         return s 
if 0 <= s 
<= 21 else None 
2001     if not isinstance(s
, compat_basestring
): 
2003     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
2005         return int(m
.group('age')) 
2007         return US_RATINGS
[s
] 
2008     return TV_PARENTAL_GUIDELINES
.get(s
) 
2011 def strip_jsonp(code
): 
2013         r
'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
) 
2016 def js_to_json(code
): 
2019         if v 
in ('true', 'false', 'null'): 
2021         elif v
.startswith('/*') or v 
== ',': 
2024         if v
[0] in ("'", '"'): 
2025             v 
= re
.sub(r
'(?s)\\.|"', lambda m
: { 
2030             }.get(m
.group(0), m
.group(0)), v
[1:-1]) 
2033             (r
'^0[xX][0-9a-fA-F]+', 16), 
2037         for regex
, base 
in INTEGER_TABLE
: 
2038             im 
= re
.match(regex
, v
) 
2040                 i 
= int(im
.group(0), base
) 
2041                 return '"%d":' % i 
if v
.endswith(':') else '%d' % i
 
2045     return re
.sub(r
'''(?sx) 
2046         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 
2047         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 
2048         /\*.*?\*/|,(?=\s*[\]}])| 
2049         [a-zA-Z_][.a-zA-Z_0-9]*| 
2050         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| 
2055 def qualities(quality_ids
): 
2056     """ Get a numeric quality value out of a list of possible values """ 
2059             return quality_ids
.index(qid
) 
2065 DEFAULT_OUTTMPL 
= '%(title)s-%(id)s.%(ext)s' 
2068 def limit_length(s
, length
): 
2069     """ Add ellipses to overly long strings """ 
2074         return s
[:length 
- len(ELLIPSES
)] + ELLIPSES
 
2078 def version_tuple(v
): 
2079     return tuple(int(e
) for e 
in re
.split(r
'[-.]', v
)) 
2082 def is_outdated_version(version
, limit
, assume_new
=True): 
2084         return not assume_new
 
2086         return version_tuple(version
) < version_tuple(limit
) 
2088         return not assume_new
 
2091 def ytdl_is_updateable(): 
2092     """ Returns if youtube-dl can be updated with -U """ 
2093     from zipimport 
import zipimporter
 
2095     return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen') 
2098 def args_to_str(args
): 
2099     # Get a short string representation for a subprocess command 
2100     return ' '.join(compat_shlex_quote(a
) for a 
in args
) 
2103 def error_to_compat_str(err
): 
2105     # On python 2 error byte string must be decoded with proper 
2106     # encoding rather than ascii 
2107     if sys
.version_info
[0] < 3: 
2108         err_str 
= err_str
.decode(preferredencoding()) 
2112 def mimetype2ext(mt
): 
2118         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 
2119         # it's the most popular one 
2120         'audio/mpeg': 'mp3', 
2125     _
, _
, res 
= mt
.rpartition('/') 
2130         'smptett+xml': 'tt', 
2136         'x-mp4-fragmented': 'mp4', 
2139         'x-mpegurl': 'm3u8', 
2140         'vnd.apple.mpegurl': 'm3u8', 
2145         'vnd.ms-sstr+xml': 'ism', 
2149 def parse_codecs(codecs_str
): 
2150     # http://tools.ietf.org/html/rfc6381 
2153     splited_codecs 
= list(filter(None, map( 
2154         lambda str: str.strip(), codecs_str
.strip().strip(',').split(',')))) 
2155     vcodec
, acodec 
= None, None 
2156     for full_codec 
in splited_codecs
: 
2157         codec 
= full_codec
.split('.')[0] 
2158         if codec 
in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): 
2161         elif codec 
in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'): 
2165             write_string('WARNING: Unknown codec %s' % full_codec
, sys
.stderr
) 
2166     if not vcodec 
and not acodec
: 
2167         if len(splited_codecs
) == 2: 
2172         elif len(splited_codecs
) == 1: 
2179             'vcodec': vcodec 
or 'none', 
2180             'acodec': acodec 
or 'none', 
2185 def urlhandle_detect_ext(url_handle
): 
2186     getheader 
= url_handle
.headers
.get
 
2188     cd 
= getheader('Content-Disposition') 
2190         m 
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
) 
2192             e 
= determine_ext(m
.group('filename'), default_ext
=None) 
2196     return mimetype2ext(getheader('Content-Type')) 
2199 def encode_data_uri(data
, mime_type
): 
2200     return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii')) 
2203 def age_restricted(content_limit
, age_limit
): 
2204     """ Returns True iff the content should be blocked """ 
2206     if age_limit 
is None:  # No limit set 
2208     if content_limit 
is None: 
2209         return False  # Content available for everyone 
2210     return age_limit 
< content_limit
 
2213 def is_html(first_bytes
): 
2214     """ Detect whether a file contains HTML by examining its first bytes. """ 
2217         (b
'\xef\xbb\xbf', 'utf-8'), 
2218         (b
'\x00\x00\xfe\xff', 'utf-32-be'), 
2219         (b
'\xff\xfe\x00\x00', 'utf-32-le'), 
2220         (b
'\xff\xfe', 'utf-16-le'), 
2221         (b
'\xfe\xff', 'utf-16-be'), 
2223     for bom
, enc 
in BOMS
: 
2224         if first_bytes
.startswith(bom
): 
2225             s 
= first_bytes
[len(bom
):].decode(enc
, 'replace') 
2228         s 
= first_bytes
.decode('utf-8', 'replace') 
2230     return re
.match(r
'^\s*<', s
) 
2233 def determine_protocol(info_dict
): 
2234     protocol 
= info_dict
.get('protocol') 
2235     if protocol 
is not None: 
2238     url 
= info_dict
['url'] 
2239     if url
.startswith('rtmp'): 
2241     elif url
.startswith('mms'): 
2243     elif url
.startswith('rtsp'): 
2246     ext 
= determine_ext(url
) 
2252     return compat_urllib_parse_urlparse(url
).scheme
 
2255 def render_table(header_row
, data
): 
2256     """ Render a list of rows, each as a list of values """ 
2257     table 
= [header_row
] + data
 
2258     max_lens 
= [max(len(compat_str(v
)) for v 
in col
) for col 
in zip(*table
)] 
2259     format_str 
= ' '.join('%-' + compat_str(ml 
+ 1) + 's' for ml 
in max_lens
[:-1]) + '%s' 
2260     return '\n'.join(format_str 
% tuple(row
) for row 
in table
) 
2263 def _match_one(filter_part
, dct
): 
2264     COMPARISON_OPERATORS 
= { 
2272     operator_rex 
= re
.compile(r
'''(?x)\s* 
2274         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 
2276             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 
2277             (?P<strval>(?![0-9.])[a-z0-9A-Z]*) 
2280         ''' % '|'.join(map(re
.escape
, COMPARISON_OPERATORS
.keys()))) 
2281     m 
= operator_rex
.search(filter_part
) 
2283         op 
= COMPARISON_OPERATORS
[m
.group('op')] 
2284         if m
.group('strval') is not None: 
2285             if m
.group('op') not in ('=', '!='): 
2287                     'Operator %s does not support string values!' % m
.group('op')) 
2288             comparison_value 
= m
.group('strval') 
2291                 comparison_value 
= int(m
.group('intval')) 
2293                 comparison_value 
= parse_filesize(m
.group('intval')) 
2294                 if comparison_value 
is None: 
2295                     comparison_value 
= parse_filesize(m
.group('intval') + 'B') 
2296                 if comparison_value 
is None: 
2298                         'Invalid integer value %r in filter part %r' % ( 
2299                             m
.group('intval'), filter_part
)) 
2300         actual_value 
= dct
.get(m
.group('key')) 
2301         if actual_value 
is None: 
2302             return m
.group('none_inclusive') 
2303         return op(actual_value
, comparison_value
) 
2306         '': lambda v
: v 
is not None, 
2307         '!': lambda v
: v 
is None, 
2309     operator_rex 
= re
.compile(r
'''(?x)\s* 
2310         (?P<op>%s)\s*(?P<key>[a-z_]+) 
2312         ''' % '|'.join(map(re
.escape
, UNARY_OPERATORS
.keys()))) 
2313     m 
= operator_rex
.search(filter_part
) 
2315         op 
= UNARY_OPERATORS
[m
.group('op')] 
2316         actual_value 
= dct
.get(m
.group('key')) 
2317         return op(actual_value
) 
2319     raise ValueError('Invalid filter part %r' % filter_part
) 
2322 def match_str(filter_str
, dct
): 
2323     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 
2326         _match_one(filter_part
, dct
) for filter_part 
in filter_str
.split('&')) 
2329 def match_filter_func(filter_str
): 
2330     def _match_func(info_dict
): 
2331         if match_str(filter_str
, info_dict
): 
2334             video_title 
= info_dict
.get('title', info_dict
.get('id', 'video')) 
2335             return '%s does not pass filter %s, skipping ..' % (video_title
, filter_str
) 
2339 def parse_dfxp_time_expr(time_expr
): 
2343     mobj 
= re
.match(r
'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr
) 
2345         return float(mobj
.group('time_offset')) 
2347     mobj 
= re
.match(r
'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr
) 
2349         return 3600 * int(mobj
.group(1)) + 60 * int(mobj
.group(2)) + float(mobj
.group(3).replace(':', '.')) 
2352 def srt_subtitles_timecode(seconds
): 
2353     return '%02d:%02d:%02d,%03d' % (seconds 
/ 3600, (seconds 
% 3600) / 60, seconds 
% 60, (seconds 
% 1) * 1000) 
2356 def dfxp2srt(dfxp_data
): 
2357     _x 
= functools
.partial(xpath_with_ns
, ns_map
={ 
2358         'ttml': 'http://www.w3.org/ns/ttml', 
2359         'ttaf1': 'http://www.w3.org/2006/10/ttaf1', 
2360         'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1', 
2363     class TTMLPElementParser(object): 
2366         def start(self
, tag
, attrib
): 
2367             if tag 
in (_x('ttml:br'), _x('ttaf1:br'), 'br'): 
2373         def data(self
, data
): 
2377             return self
.out
.strip() 
2379     def parse_node(node
): 
2380         target 
= TTMLPElementParser() 
2381         parser 
= xml
.etree
.ElementTree
.XMLParser(target
=target
) 
2382         parser
.feed(xml
.etree
.ElementTree
.tostring(node
)) 
2383         return parser
.close() 
2385     dfxp 
= compat_etree_fromstring(dfxp_data
.encode('utf-8')) 
2387     paras 
= dfxp
.findall(_x('.//ttml:p')) or dfxp
.findall(_x('.//ttaf1:p')) or dfxp
.findall(_x('.//ttaf1_0604:p')) or dfxp
.findall('.//p') 
2390         raise ValueError('Invalid dfxp/TTML subtitle') 
2392     for para
, index 
in zip(paras
, itertools
.count(1)): 
2393         begin_time 
= parse_dfxp_time_expr(para
.attrib
.get('begin')) 
2394         end_time 
= parse_dfxp_time_expr(para
.attrib
.get('end')) 
2395         dur 
= parse_dfxp_time_expr(para
.attrib
.get('dur')) 
2396         if begin_time 
is None: 
2401             end_time 
= begin_time 
+ dur
 
2402         out
.append('%d\n%s --> %s\n%s\n\n' % ( 
2404             srt_subtitles_timecode(begin_time
), 
2405             srt_subtitles_timecode(end_time
), 
2411 def cli_option(params
, command_option
, param
): 
2412     param 
= params
.get(param
) 
2414         param 
= compat_str(param
) 
2415     return [command_option
, param
] if param 
is not None else [] 
2418 def cli_bool_option(params
, command_option
, param
, true_value
='true', false_value
='false', separator
=None): 
2419     param 
= params
.get(param
) 
2420     assert isinstance(param
, bool) 
2422         return [command_option 
+ separator 
+ (true_value 
if param 
else false_value
)] 
2423     return [command_option
, true_value 
if param 
else false_value
] 
2426 def cli_valueless_option(params
, command_option
, param
, expected_value
=True): 
2427     param 
= params
.get(param
) 
2428     return [command_option
] if param 
== expected_value 
else [] 
2431 def cli_configuration_args(params
, param
, default
=[]): 
2432     ex_args 
= params
.get(param
) 
2435     assert isinstance(ex_args
, list) 
2439 class ISO639Utils(object): 
2440     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 
2629     def short2long(cls
, code
): 
2630         """Convert language code from ISO 639-1 to ISO 639-2/T""" 
2631         return cls
._lang
_map
.get(code
[:2]) 
2634     def long2short(cls
, code
): 
2635         """Convert language code from ISO 639-2/T to ISO 639-1""" 
2636         for short_name
, long_name 
in cls
._lang
_map
.items(): 
2637             if long_name 
== code
: 
2641 class ISO3166Utils(object): 
2642     # From http://data.okfn.org/data/core/country-list 
2644         'AF': 'Afghanistan', 
2645         'AX': 'Åland Islands', 
2648         'AS': 'American Samoa', 
2653         'AG': 'Antigua and Barbuda', 
2670         'BO': 'Bolivia, Plurinational State of', 
2671         'BQ': 'Bonaire, Sint Eustatius and Saba', 
2672         'BA': 'Bosnia and Herzegovina', 
2674         'BV': 'Bouvet Island', 
2676         'IO': 'British Indian Ocean Territory', 
2677         'BN': 'Brunei Darussalam', 
2679         'BF': 'Burkina Faso', 
2685         'KY': 'Cayman Islands', 
2686         'CF': 'Central African Republic', 
2690         'CX': 'Christmas Island', 
2691         'CC': 'Cocos (Keeling) Islands', 
2695         'CD': 'Congo, the Democratic Republic of the', 
2696         'CK': 'Cook Islands', 
2698         'CI': 'Côte d\'Ivoire', 
2703         'CZ': 'Czech Republic', 
2707         'DO': 'Dominican Republic', 
2710         'SV': 'El Salvador', 
2711         'GQ': 'Equatorial Guinea', 
2715         'FK': 'Falkland Islands (Malvinas)', 
2716         'FO': 'Faroe Islands', 
2720         'GF': 'French Guiana', 
2721         'PF': 'French Polynesia', 
2722         'TF': 'French Southern Territories', 
2737         'GW': 'Guinea-Bissau', 
2740         'HM': 'Heard Island and McDonald Islands', 
2741         'VA': 'Holy See (Vatican City State)', 
2748         'IR': 'Iran, Islamic Republic of', 
2751         'IM': 'Isle of Man', 
2761         'KP': 'Korea, Democratic People\'s Republic of', 
2762         'KR': 'Korea, Republic of', 
2765         'LA': 'Lao People\'s Democratic Republic', 
2771         'LI': 'Liechtenstein', 
2775         'MK': 'Macedonia, the Former Yugoslav Republic of', 
2782         'MH': 'Marshall Islands', 
2788         'FM': 'Micronesia, Federated States of', 
2789         'MD': 'Moldova, Republic of', 
2800         'NL': 'Netherlands', 
2801         'NC': 'New Caledonia', 
2802         'NZ': 'New Zealand', 
2807         'NF': 'Norfolk Island', 
2808         'MP': 'Northern Mariana Islands', 
2813         'PS': 'Palestine, State of', 
2815         'PG': 'Papua New Guinea', 
2818         'PH': 'Philippines', 
2822         'PR': 'Puerto Rico', 
2826         'RU': 'Russian Federation', 
2828         'BL': 'Saint Barthélemy', 
2829         'SH': 'Saint Helena, Ascension and Tristan da Cunha', 
2830         'KN': 'Saint Kitts and Nevis', 
2831         'LC': 'Saint Lucia', 
2832         'MF': 'Saint Martin (French part)', 
2833         'PM': 'Saint Pierre and Miquelon', 
2834         'VC': 'Saint Vincent and the Grenadines', 
2837         'ST': 'Sao Tome and Principe', 
2838         'SA': 'Saudi Arabia', 
2842         'SL': 'Sierra Leone', 
2844         'SX': 'Sint Maarten (Dutch part)', 
2847         'SB': 'Solomon Islands', 
2849         'ZA': 'South Africa', 
2850         'GS': 'South Georgia and the South Sandwich Islands', 
2851         'SS': 'South Sudan', 
2856         'SJ': 'Svalbard and Jan Mayen', 
2859         'CH': 'Switzerland', 
2860         'SY': 'Syrian Arab Republic', 
2861         'TW': 'Taiwan, Province of China', 
2863         'TZ': 'Tanzania, United Republic of', 
2865         'TL': 'Timor-Leste', 
2869         'TT': 'Trinidad and Tobago', 
2872         'TM': 'Turkmenistan', 
2873         'TC': 'Turks and Caicos Islands', 
2877         'AE': 'United Arab Emirates', 
2878         'GB': 'United Kingdom', 
2879         'US': 'United States', 
2880         'UM': 'United States Minor Outlying Islands', 
2884         'VE': 'Venezuela, Bolivarian Republic of', 
2886         'VG': 'Virgin Islands, British', 
2887         'VI': 'Virgin Islands, U.S.', 
2888         'WF': 'Wallis and Futuna', 
2889         'EH': 'Western Sahara', 
2896     def short2full(cls
, code
): 
2897         """Convert an ISO 3166-2 country code to the corresponding full name""" 
2898         return cls
._country
_map
.get(code
.upper()) 
2901 class PerRequestProxyHandler(compat_urllib_request
.ProxyHandler
): 
2902     def __init__(self
, proxies
=None): 
2903         # Set default handlers 
2904         for type in ('http', 'https'): 
2905             setattr(self
, '%s_open' % type, 
2906                     lambda r
, proxy
='__noproxy__', type=type, meth
=self
.proxy_open
: 
2907                         meth(r
, proxy
, type)) 
2908         return compat_urllib_request
.ProxyHandler
.__init
__(self
, proxies
) 
2910     def proxy_open(self
, req
, proxy
, type): 
2911         req_proxy 
= req
.headers
.get('Ytdl-request-proxy') 
2912         if req_proxy 
is not None: 
2914             del req
.headers
['Ytdl-request-proxy'] 
2916         if proxy 
== '__noproxy__': 
2917             return None  # No Proxy 
2918         if compat_urlparse
.urlparse(proxy
).scheme
.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 
2919             req
.add_header('Ytdl-socks-proxy', proxy
) 
2920             # youtube-dl's http/https handlers do wrapping the socket with socks 
2922         return compat_urllib_request
.ProxyHandler
.proxy_open( 
2923             self
, req
, proxy
, type) 
2926 def ohdave_rsa_encrypt(data
, exponent
, modulus
): 
2928     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/ 
2931         data: data to encrypt, bytes-like object 
2932         exponent, modulus: parameter e and N of RSA algorithm, both integer 
2933     Output: hex string of encrypted data 
2935     Limitation: supports one block encryption only 
2938     payload 
= int(binascii
.hexlify(data
[::-1]), 16) 
2939     encrypted 
= pow(payload
, exponent
, modulus
) 
2940     return '%x' % encrypted
 
2943 def encode_base_n(num
, n
, table
=None): 
2944     FULL_TABLE 
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 
2946         table 
= FULL_TABLE
[:n
] 
2949         raise ValueError('base %d exceeds table length %d' % (n
, len(table
))) 
2956         ret 
= table
[num 
% n
] + ret
 
2961 def decode_packed_codes(code
): 
2963         r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)", 
2965     obfucasted_code
, base
, count
, symbols 
= mobj
.groups() 
2968     symbols 
= symbols
.split('|') 
2973         base_n_count 
= encode_base_n(count
, base
) 
2974         symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
 
2977         r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)], 
2981 def parse_m3u8_attributes(attrib
): 
2983     for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
): 
2984         if val
.startswith('"'): 
2990 def urshift(val
, n
): 
2991     return val 
>> n 
if val 
>= 0 else (val 
+ 0x100000000) >> n
 
2994 # Based on png2str() written by @gdkchan and improved by @yokrysty 
2995 # Originally posted at https://github.com/rg3/youtube-dl/issues/9706 
2996 def decode_png(png_data
): 
2997     # Reference: https://www.w3.org/TR/PNG/ 
2998     header 
= png_data
[8:] 
3000     if png_data
[:8] != b
'\x89PNG\x0d\x0a\x1a\x0a' or header
[4:8] != b
'IHDR': 
3001         raise IOError('Not a valid PNG file.') 
3003     int_map 
= {1: '>B', 2: '>H', 4: '>I'} 
3004     unpack_integer 
= lambda x
: compat_struct_unpack(int_map
[len(x
)], x
)[0] 
3009         length 
= unpack_integer(header
[:4]) 
3012         chunk_type 
= header
[:4] 
3015         chunk_data 
= header
[:length
] 
3016         header 
= header
[length
:] 
3018         header 
= header
[4:]  # Skip CRC 
3026     ihdr 
= chunks
[0]['data'] 
3028     width 
= unpack_integer(ihdr
[:4]) 
3029     height 
= unpack_integer(ihdr
[4:8]) 
3033     for chunk 
in chunks
: 
3034         if chunk
['type'] == b
'IDAT': 
3035             idat 
+= chunk
['data'] 
3038         raise IOError('Unable to read PNG data.') 
3040     decompressed_data 
= bytearray(zlib
.decompress(idat
)) 
3045     def _get_pixel(idx
): 
3050     for y 
in range(height
): 
3051         basePos 
= y 
* (1 + stride
) 
3052         filter_type 
= decompressed_data
[basePos
] 
3056         pixels
.append(current_row
) 
3058         for x 
in range(stride
): 
3059             color 
= decompressed_data
[1 + basePos 
+ x
] 
3060             basex 
= y 
* stride 
+ x
 
3065                 left 
= _get_pixel(basex 
- 3) 
3067                 up 
= _get_pixel(basex 
- stride
) 
3069             if filter_type 
== 1:  # Sub 
3070                 color 
= (color 
+ left
) & 0xff 
3071             elif filter_type 
== 2:  # Up 
3072                 color 
= (color 
+ up
) & 0xff 
3073             elif filter_type 
== 3:  # Average 
3074                 color 
= (color 
+ ((left 
+ up
) >> 1)) & 0xff 
3075             elif filter_type 
== 4:  # Paeth 
3081                     c 
= _get_pixel(basex 
- stride 
- 3) 
3089                 if pa 
<= pb 
and pa 
<= pc
: 
3090                     color 
= (color 
+ a
) & 0xff 
3092                     color 
= (color 
+ b
) & 0xff 
3094                     color 
= (color 
+ c
) & 0xff 
3096             current_row
.append(color
) 
3098     return width
, height
, pixels