2 # -*- coding: utf-8 -*-
4 from __future__
import unicode_literals
33 import xml
.etree
.ElementTree
40 compat_etree_fromstring
,
42 compat_html_entities_html5
,
47 compat_socket_create_connection
,
52 compat_urllib_parse_urlencode
,
53 compat_urllib_parse_urlparse
,
54 compat_urllib_parse_unquote_plus
,
55 compat_urllib_request
,
66 def register_socks_protocols():
67 # "Register" SOCKS protocols
68 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
69 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
70 for scheme
in ('socks', 'socks4', 'socks4a', 'socks5'):
71 if scheme
not in compat_urlparse
.uses_netloc
:
72 compat_urlparse
.uses_netloc
.append(scheme
)
75 # This is not clearly defined otherwise
76 compiled_regex_type
= type(re
.compile(''))
79 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)',
80 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
81 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
82 'Accept-Encoding': 'gzip, deflate',
83 'Accept-Language': 'en-us,en;q=0.5',
89 ENGLISH_MONTH_NAMES
= [
90 'January', 'February', 'March', 'April', 'May', 'June',
91 'July', 'August', 'September', 'October', 'November', 'December']
94 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
95 'flv', 'f4v', 'f4a', 'f4b',
96 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
106 'f4f', 'f4m', 'm3u8', 'smil')
108 # needed for sanitizing filenames in restricted mode
109 ACCENT_CHARS
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
110 itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
111 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
114 def preferredencoding():
115 """Get preferred encoding.
117 Returns the best encoding scheme for the system, based on
118 locale.getpreferredencoding() and some further tweaks.
121 pref
= locale
.getpreferredencoding()
129 def write_json_file(obj
, fn
):
130 """ Encode obj as JSON and write it to fn, atomically if possible """
132 fn
= encodeFilename(fn
)
133 if sys
.version_info
< (3, 0) and sys
.platform
!= 'win32':
134 encoding
= get_filesystem_encoding()
135 # os.path.basename returns a bytes object, but NamedTemporaryFile
136 # will fail if the filename contains non ascii characters unless we
137 # use a unicode object
138 path_basename
= lambda f
: os
.path
.basename(fn
).decode(encoding
)
139 # the same for os.path.dirname
140 path_dirname
= lambda f
: os
.path
.dirname(fn
).decode(encoding
)
142 path_basename
= os
.path
.basename
143 path_dirname
= os
.path
.dirname
147 'prefix': path_basename(fn
) + '.',
148 'dir': path_dirname(fn
),
152 # In Python 2.x, json.dump expects a bytestream.
153 # In Python 3.x, it writes to a character stream
154 if sys
.version_info
< (3, 0):
162 tf
= tempfile
.NamedTemporaryFile(**compat_kwargs(args
))
167 if sys
.platform
== 'win32':
168 # Need to remove existing file on Windows, else os.rename raises
169 # WindowsError or FileExistsError.
174 os
.rename(tf
.name
, fn
)
183 if sys
.version_info
>= (2, 7):
184 def find_xpath_attr(node
, xpath
, key
, val
=None):
185 """ Find the xpath xpath[@key=val] """
186 assert re
.match(r
'^[a-zA-Z_-]+$', key
)
187 expr
= xpath
+ ('[@%s]' % key
if val
is None else "[@%s='%s']" % (key
, val
))
188 return node
.find(expr
)
190 def find_xpath_attr(node
, xpath
, key
, val
=None):
191 for f
in node
.findall(compat_xpath(xpath
)):
192 if key
not in f
.attrib
:
194 if val
is None or f
.attrib
.get(key
) == val
:
198 # On python2.6 the xml.etree.ElementTree.Element methods don't support
199 # the namespace parameter
202 def xpath_with_ns(path
, ns_map
):
203 components
= [c
.split(':') for c
in path
.split('/')]
207 replaced
.append(c
[0])
210 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
211 return '/'.join(replaced
)
214 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
215 def _find_xpath(xpath
):
216 return node
.find(compat_xpath(xpath
))
218 if isinstance(xpath
, (str, compat_str
)):
219 n
= _find_xpath(xpath
)
227 if default
is not NO_DEFAULT
:
230 name
= xpath
if name
is None else name
231 raise ExtractorError('Could not find XML element %s' % name
)
237 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
238 n
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
)
239 if n
is None or n
== default
:
242 if default
is not NO_DEFAULT
:
245 name
= xpath
if name
is None else name
246 raise ExtractorError('Could not find XML element\'s text %s' % name
)
252 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
):
253 n
= find_xpath_attr(node
, xpath
, key
)
255 if default
is not NO_DEFAULT
:
258 name
= '%s[@%s]' % (xpath
, key
) if name
is None else name
259 raise ExtractorError('Could not find XML attribute %s' % name
)
265 def get_element_by_id(id, html
):
266 """Return the content of the tag with the specified ID in the passed HTML document"""
267 return get_element_by_attribute('id', id, html
)
270 def get_element_by_attribute(attribute
, value
, html
):
271 """Return the content of the tag with the specified attribute in the passed HTML document"""
273 m
= re
.search(r
'''(?xs)
275 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
277 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
281 ''' % (re
.escape(attribute
), re
.escape(value
)), html
)
285 res
= m
.group('content')
287 if res
.startswith('"') or res
.startswith("'"):
290 return unescapeHTML(res
)
293 class HTMLAttributeParser(compat_HTMLParser
):
294 """Trivial HTML parser to gather the attributes for a single element"""
297 compat_HTMLParser
.__init
__(self
)
299 def handle_starttag(self
, tag
, attrs
):
300 self
.attrs
= dict(attrs
)
303 def extract_attributes(html_element
):
304 """Given a string for an HTML element such as
306 a="foo" B="bar" c="&98;az" d=boz
307 empty= noval entity="&"
310 Decode and return a dictionary of attributes.
312 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
313 'empty': '', 'noval': None, 'entity': '&',
314 'sq': '"', 'dq': '\''
316 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
317 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
319 parser
= HTMLAttributeParser()
320 parser
.feed(html_element
)
325 def clean_html(html
):
326 """Clean an HTML snippet into a readable string"""
328 if html
is None: # Convenience for sanitizing descriptions etc.
332 html
= html
.replace('\n', ' ')
333 html
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
)
334 html
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
)
336 html
= re
.sub('<.*?>', '', html
)
337 # Replace html entities
338 html
= unescapeHTML(html
)
342 def sanitize_open(filename
, open_mode
):
343 """Try to open the given filename, and slightly tweak it if this fails.
345 Attempts to open the given filename. If this fails, it tries to change
346 the filename slightly, step by step, until it's either able to open it
347 or it fails and raises a final exception, like the standard open()
350 It returns the tuple (stream, definitive_file_name).
354 if sys
.platform
== 'win32':
356 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
357 return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
)
358 stream
= open(encodeFilename(filename
), open_mode
)
359 return (stream
, filename
)
360 except (IOError, OSError) as err
:
361 if err
.errno
in (errno
.EACCES
,):
364 # In case of error, try to remove win32 forbidden chars
365 alt_filename
= sanitize_path(filename
)
366 if alt_filename
== filename
:
369 # An exception here should be caught in the caller
370 stream
= open(encodeFilename(alt_filename
), open_mode
)
371 return (stream
, alt_filename
)
374 def timeconvert(timestr
):
375 """Convert RFC 2822 defined time string into system timestamp"""
377 timetuple
= email
.utils
.parsedate_tz(timestr
)
378 if timetuple
is not None:
379 timestamp
= email
.utils
.mktime_tz(timetuple
)
383 def sanitize_filename(s
, restricted
=False, is_id
=False):
384 """Sanitizes a string so it could be used as part of a filename.
385 If restricted is set, use a stricter subset of allowed characters.
386 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
388 def replace_insane(char
):
389 if restricted
and char
in ACCENT_CHARS
:
390 return ACCENT_CHARS
[char
]
391 if char
== '?' or ord(char
) < 32 or ord(char
) == 127:
394 return '' if restricted
else '\''
396 return '_-' if restricted
else ' -'
397 elif char
in '\\/|*<>':
399 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace()):
401 if restricted
and ord(char
) > 127:
406 s
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
)
407 result
= ''.join(map(replace_insane
, s
))
409 while '__' in result
:
410 result
= result
.replace('__', '_')
411 result
= result
.strip('_')
412 # Common case of "Foreign band name - English song title"
413 if restricted
and result
.startswith('-_'):
415 if result
.startswith('-'):
416 result
= '_' + result
[len('-'):]
417 result
= result
.lstrip('.')
423 def sanitize_path(s
):
424 """Sanitizes and normalizes path on Windows"""
425 if sys
.platform
!= 'win32':
427 drive_or_unc
, _
= os
.path
.splitdrive(s
)
428 if sys
.version_info
< (2, 7) and not drive_or_unc
:
429 drive_or_unc
, _
= os
.path
.splitunc(s
)
430 norm_path
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
)
434 path_part
if path_part
in ['.', '..'] else re
.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part
)
435 for path_part
in norm_path
]
437 sanitized_path
.insert(0, drive_or_unc
+ os
.path
.sep
)
438 return os
.path
.join(*sanitized_path
)
441 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
442 # unwanted failures due to missing protocol
443 def sanitize_url(url
):
444 return 'http:%s' % url
if url
.startswith('//') else url
447 def sanitized_Request(url
, *args
, **kwargs
):
448 return compat_urllib_request
.Request(sanitize_url(url
), *args
, **kwargs
)
451 def orderedSet(iterable
):
452 """ Remove all duplicates from the input iterable """
460 def _htmlentity_transform(entity_with_semicolon
):
461 """Transforms an HTML entity to a character."""
462 entity
= entity_with_semicolon
[:-1]
464 # Known non-numeric HTML entity
465 if entity
in compat_html_entities
.name2codepoint
:
466 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
468 # TODO: HTML5 allows entities without a semicolon. For example,
469 # 'Éric' should be decoded as 'Éric'.
470 if entity_with_semicolon
in compat_html_entities_html5
:
471 return compat_html_entities_html5
[entity_with_semicolon
]
473 mobj
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
)
475 numstr
= mobj
.group(1)
476 if numstr
.startswith('x'):
478 numstr
= '0%s' % numstr
481 # See https://github.com/rg3/youtube-dl/issues/7518
483 return compat_chr(int(numstr
, base
))
487 # Unknown entity in name, return its literal representation
488 return '&%s;' % entity
494 assert type(s
) == compat_str
497 r
'&([^;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
)
500 def get_subprocess_encoding():
501 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
502 # For subprocess calls, encode with locale encoding
503 # Refer to http://stackoverflow.com/a/9951851/35070
504 encoding
= preferredencoding()
506 encoding
= sys
.getfilesystemencoding()
512 def encodeFilename(s
, for_subprocess
=False):
514 @param s The name of the file
517 assert type(s
) == compat_str
519 # Python 3 has a Unicode API
520 if sys
.version_info
>= (3, 0):
523 # Pass '' directly to use Unicode APIs on Windows 2000 and up
524 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
525 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
526 if not for_subprocess
and sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
529 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
530 if sys
.platform
.startswith('java'):
533 return s
.encode(get_subprocess_encoding(), 'ignore')
536 def decodeFilename(b
, for_subprocess
=False):
538 if sys
.version_info
>= (3, 0):
541 if not isinstance(b
, bytes):
544 return b
.decode(get_subprocess_encoding(), 'ignore')
547 def encodeArgument(s
):
548 if not isinstance(s
, compat_str
):
549 # Legacy code that uses byte strings
550 # Uncomment the following line after fixing all post processors
551 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
552 s
= s
.decode('ascii')
553 return encodeFilename(s
, True)
556 def decodeArgument(b
):
557 return decodeFilename(b
, True)
560 def decodeOption(optval
):
563 if isinstance(optval
, bytes):
564 optval
= optval
.decode(preferredencoding())
566 assert isinstance(optval
, compat_str
)
570 def formatSeconds(secs
):
572 return '%d:%02d:%02d' % (secs
// 3600, (secs
% 3600) // 60, secs
% 60)
574 return '%d:%02d' % (secs
// 60, secs
% 60)
579 def make_HTTPS_handler(params
, **kwargs
):
580 opts_no_check_certificate
= params
.get('nocheckcertificate', False)
581 if hasattr(ssl
, 'create_default_context'): # Python >= 3.4 or 2.7.9
582 context
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
)
583 if opts_no_check_certificate
:
584 context
.check_hostname
= False
585 context
.verify_mode
= ssl
.CERT_NONE
587 return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
)
590 # (create_default_context present but HTTPSHandler has no context=)
593 if sys
.version_info
< (3, 2):
594 return YoutubeDLHTTPSHandler(params
, **kwargs
)
596 context
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
)
597 context
.verify_mode
= (ssl
.CERT_NONE
598 if opts_no_check_certificate
599 else ssl
.CERT_REQUIRED
)
600 context
.set_default_verify_paths()
601 return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
)
604 def bug_reports_message():
605 if ytdl_is_updateable():
606 update_cmd
= 'type youtube-dl -U to update'
608 update_cmd
= 'see https://yt-dl.org/update on how to update'
609 msg
= '; please report this issue on https://yt-dl.org/bug .'
610 msg
+= ' Make sure you are using the latest version; %s.' % update_cmd
611 msg
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
615 class ExtractorError(Exception):
616 """Error during info extraction."""
618 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None):
619 """ tb, if given, is the original traceback (so that it can be printed out).
620 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
623 if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
):
625 if video_id
is not None:
626 msg
= video_id
+ ': ' + msg
628 msg
+= ' (caused by %r)' % cause
630 msg
+= bug_reports_message()
631 super(ExtractorError
, self
).__init
__(msg
)
634 self
.exc_info
= sys
.exc_info() # preserve original exception
636 self
.video_id
= video_id
638 def format_traceback(self
):
639 if self
.traceback
is None:
641 return ''.join(traceback
.format_tb(self
.traceback
))
644 class UnsupportedError(ExtractorError
):
645 def __init__(self
, url
):
646 super(UnsupportedError
, self
).__init
__(
647 'Unsupported URL: %s' % url
, expected
=True)
651 class RegexNotFoundError(ExtractorError
):
652 """Error when a regex didn't match"""
656 class DownloadError(Exception):
657 """Download Error exception.
659 This exception may be thrown by FileDownloader objects if they are not
660 configured to continue on errors. They will contain the appropriate
664 def __init__(self
, msg
, exc_info
=None):
665 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
666 super(DownloadError
, self
).__init
__(msg
)
667 self
.exc_info
= exc_info
670 class SameFileError(Exception):
671 """Same File exception.
673 This exception will be thrown by FileDownloader objects if they detect
674 multiple files would have to be downloaded to the same file on disk.
679 class PostProcessingError(Exception):
680 """Post Processing exception.
682 This exception may be raised by PostProcessor's .run() method to
683 indicate an error in the postprocessing task.
686 def __init__(self
, msg
):
690 class MaxDownloadsReached(Exception):
691 """ --max-downloads limit has been reached. """
695 class UnavailableVideoError(Exception):
696 """Unavailable Format exception.
698 This exception will be thrown when a video is requested
699 in a format that is not available for that video.
704 class ContentTooShortError(Exception):
705 """Content Too Short exception.
707 This exception may be raised by FileDownloader objects when a file they
708 download is too small for what the server announced first, indicating
709 the connection was probably interrupted.
712 def __init__(self
, downloaded
, expected
):
714 self
.downloaded
= downloaded
715 self
.expected
= expected
718 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
):
719 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
720 # expected HTTP responses to meet HTTP/1.0 or later (see also
721 # https://github.com/rg3/youtube-dl/issues/6727)
722 if sys
.version_info
< (3, 0):
723 kwargs
[b
'strict'] = True
724 hc
= http_class(*args
, **kwargs
)
725 source_address
= ydl_handler
._params
.get('source_address')
726 if source_address
is not None:
727 sa
= (source_address
, 0)
728 if hasattr(hc
, 'source_address'): # Python 2.7+
729 hc
.source_address
= sa
731 def _hc_connect(self
, *args
, **kwargs
):
732 sock
= compat_socket_create_connection(
733 (self
.host
, self
.port
), self
.timeout
, sa
)
735 self
.sock
= ssl
.wrap_socket(
736 sock
, self
.key_file
, self
.cert_file
,
737 ssl_version
=ssl
.PROTOCOL_TLSv1
)
740 hc
.connect
= functools
.partial(_hc_connect
, hc
)
745 def handle_youtubedl_headers(headers
):
746 filtered_headers
= headers
748 if 'Youtubedl-no-compression' in filtered_headers
:
749 filtered_headers
= dict((k
, v
) for k
, v
in filtered_headers
.items() if k
.lower() != 'accept-encoding')
750 del filtered_headers
['Youtubedl-no-compression']
752 return filtered_headers
755 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
756 """Handler for HTTP requests and responses.
758 This class, when installed with an OpenerDirector, automatically adds
759 the standard headers to every HTTP request and handles gzipped and
760 deflated responses from web servers. If compression is to be avoided in
761 a particular request, the original request in the program code only has
762 to include the HTTP header "Youtubedl-no-compression", which will be
763 removed before making the real request.
765 Part of this code was copied from:
767 http://techknack.net/python-urllib2-handlers/
769 Andrew Rowls, the author of that code, agreed to release it to the
773 def __init__(self
, params
, *args
, **kwargs
):
774 compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
)
775 self
._params
= params
777 def http_open(self
, req
):
778 conn_class
= compat_http_client
.HTTPConnection
780 socks_proxy
= req
.headers
.get('Ytdl-socks-proxy')
782 conn_class
= make_socks_conn_class(conn_class
, socks_proxy
)
783 del req
.headers
['Ytdl-socks-proxy']
785 return self
.do_open(functools
.partial(
786 _create_http_connection
, self
, conn_class
, False),
792 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
794 return zlib
.decompress(data
)
797 def addinfourl_wrapper(stream
, headers
, url
, code
):
798 if hasattr(compat_urllib_request
.addinfourl
, 'getcode'):
799 return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
)
800 ret
= compat_urllib_request
.addinfourl(stream
, headers
, url
)
804 def http_request(self
, req
):
805 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
806 # always respected by websites, some tend to give out URLs with non percent-encoded
807 # non-ASCII characters (see telemb.py, ard.py [#3412])
808 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
809 # To work around aforementioned issue we will replace request's original URL with
810 # percent-encoded one
811 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
812 # the code of this workaround has been moved here from YoutubeDL.urlopen()
813 url
= req
.get_full_url()
814 url_escaped
= escape_url(url
)
816 # Substitute URL if any change after escaping
817 if url
!= url_escaped
:
818 req
= update_Request(req
, url
=url_escaped
)
820 for h
, v
in std_headers
.items():
821 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
822 # The dict keys are capitalized because of this bug by urllib
823 if h
.capitalize() not in req
.headers
:
826 req
.headers
= handle_youtubedl_headers(req
.headers
)
828 if sys
.version_info
< (2, 7) and '#' in req
.get_full_url():
829 # Python 2.6 is brain-dead when it comes to fragments
830 req
._Request
__original
= req
._Request
__original
.partition('#')[0]
831 req
._Request
__r
_type
= req
._Request
__r
_type
.partition('#')[0]
835 def http_response(self
, req
, resp
):
838 if resp
.headers
.get('Content-encoding', '') == 'gzip':
839 content
= resp
.read()
840 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
842 uncompressed
= io
.BytesIO(gz
.read())
843 except IOError as original_ioerror
:
844 # There may be junk add the end of the file
845 # See http://stackoverflow.com/q/4928560/35070 for details
846 for i
in range(1, 1024):
848 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
849 uncompressed
= io
.BytesIO(gz
.read())
854 raise original_ioerror
855 resp
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
856 resp
.msg
= old_resp
.msg
857 del resp
.headers
['Content-encoding']
859 if resp
.headers
.get('Content-encoding', '') == 'deflate':
860 gz
= io
.BytesIO(self
.deflate(resp
.read()))
861 resp
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
862 resp
.msg
= old_resp
.msg
863 del resp
.headers
['Content-encoding']
864 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
865 # https://github.com/rg3/youtube-dl/issues/6457).
866 if 300 <= resp
.code
< 400:
867 location
= resp
.headers
.get('Location')
869 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
870 if sys
.version_info
>= (3, 0):
871 location
= location
.encode('iso-8859-1').decode('utf-8')
873 location
= location
.decode('utf-8')
874 location_escaped
= escape_url(location
)
875 if location
!= location_escaped
:
876 del resp
.headers
['Location']
877 if sys
.version_info
< (3, 0):
878 location_escaped
= location_escaped
.encode('utf-8')
879 resp
.headers
['Location'] = location_escaped
882 https_request
= http_request
883 https_response
= http_response
886 def make_socks_conn_class(base_class
, socks_proxy
):
887 assert issubclass(base_class
, (
888 compat_http_client
.HTTPConnection
, compat_http_client
.HTTPSConnection
))
890 url_components
= compat_urlparse
.urlparse(socks_proxy
)
891 if url_components
.scheme
.lower() == 'socks5':
892 socks_type
= ProxyType
.SOCKS5
893 elif url_components
.scheme
.lower() in ('socks', 'socks4'):
894 socks_type
= ProxyType
.SOCKS4
895 elif url_components
.scheme
.lower() == 'socks4a':
896 socks_type
= ProxyType
.SOCKS4A
898 def unquote_if_non_empty(s
):
901 return compat_urllib_parse_unquote_plus(s
)
905 url_components
.hostname
, url_components
.port
or 1080,
907 unquote_if_non_empty(url_components
.username
),
908 unquote_if_non_empty(url_components
.password
),
911 class SocksConnection(base_class
):
913 self
.sock
= sockssocket()
914 self
.sock
.setproxy(*proxy_args
)
915 if type(self
.timeout
) in (int, float):
916 self
.sock
.settimeout(self
.timeout
)
917 self
.sock
.connect((self
.host
, self
.port
))
919 if isinstance(self
, compat_http_client
.HTTPSConnection
):
920 if hasattr(self
, '_context'): # Python > 2.6
921 self
.sock
= self
._context
.wrap_socket(
922 self
.sock
, server_hostname
=self
.host
)
924 self
.sock
= ssl
.wrap_socket(self
.sock
)
926 return SocksConnection
929 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
):
930 def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
):
931 compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
)
932 self
._https
_conn
_class
= https_conn_class
or compat_http_client
.HTTPSConnection
933 self
._params
= params
935 def https_open(self
, req
):
937 conn_class
= self
._https
_conn
_class
939 if hasattr(self
, '_context'): # python > 2.6
940 kwargs
['context'] = self
._context
941 if hasattr(self
, '_check_hostname'): # python 3.x
942 kwargs
['check_hostname'] = self
._check
_hostname
944 socks_proxy
= req
.headers
.get('Ytdl-socks-proxy')
946 conn_class
= make_socks_conn_class(conn_class
, socks_proxy
)
947 del req
.headers
['Ytdl-socks-proxy']
949 return self
.do_open(functools
.partial(
950 _create_http_connection
, self
, conn_class
, True),
954 class YoutubeDLCookieProcessor(compat_urllib_request
.HTTPCookieProcessor
):
955 def __init__(self
, cookiejar
=None):
956 compat_urllib_request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
)
958 def http_response(self
, request
, response
):
959 # Python 2 will choke on next HTTP request in row if there are non-ASCII
960 # characters in Set-Cookie HTTP header of last response (see
961 # https://github.com/rg3/youtube-dl/issues/6769).
962 # In order to at least prevent crashing we will percent encode Set-Cookie
963 # header before HTTPCookieProcessor starts processing it.
964 # if sys.version_info < (3, 0) and response.headers:
965 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
966 # set_cookie = response.headers.get(set_cookie_header)
968 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
969 # if set_cookie != set_cookie_escaped:
970 # del response.headers[set_cookie_header]
971 # response.headers[set_cookie_header] = set_cookie_escaped
972 return compat_urllib_request
.HTTPCookieProcessor
.http_response(self
, request
, response
)
974 https_request
= compat_urllib_request
.HTTPCookieProcessor
.http_request
975 https_response
= http_response
978 def parse_iso8601(date_str
, delimiter
='T', timezone
=None):
979 """ Return a UNIX timestamp from the given date """
984 date_str
= re
.sub(r
'\.[0-9]+', '', date_str
)
988 r
'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
991 timezone
= datetime
.timedelta()
993 date_str
= date_str
[:-len(m
.group(0))]
994 if not m
.group('sign'):
995 timezone
= datetime
.timedelta()
997 sign
= 1 if m
.group('sign') == '+' else -1
998 timezone
= datetime
.timedelta(
999 hours
=sign
* int(m
.group('hours')),
1000 minutes
=sign
* int(m
.group('minutes')))
1002 date_format
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
)
1003 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
1004 return calendar
.timegm(dt
.timetuple())
1009 def unified_strdate(date_str
, day_first
=True):
1010 """Return a string with the date in the format YYYYMMDD"""
1012 if date_str
is None:
1016 date_str
= date_str
.replace(',', ' ')
1017 # %z (UTC offset) is only supported in python>=3.2
1018 if not re
.match(r
'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str
):
1019 date_str
= re
.sub(r
' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str
)
1020 # Remove AM/PM + timezone
1021 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1023 format_expressions
= [
1034 '%Y/%m/%d %H:%M:%S',
1035 '%Y-%m-%d %H:%M:%S',
1036 '%Y-%m-%d %H:%M:%S.%f',
1039 '%Y-%m-%dT%H:%M:%SZ',
1040 '%Y-%m-%dT%H:%M:%S.%fZ',
1041 '%Y-%m-%dT%H:%M:%S.%f0Z',
1042 '%Y-%m-%dT%H:%M:%S',
1043 '%Y-%m-%dT%H:%M:%S.%f',
1047 format_expressions
.extend([
1053 '%d/%m/%Y %H:%M:%S',
1056 format_expressions
.extend([
1061 '%m/%d/%Y %H:%M:%S',
1063 for expression
in format_expressions
:
1065 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
1068 if upload_date
is None:
1069 timetuple
= email
.utils
.parsedate_tz(date_str
)
1072 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
1075 if upload_date
is not None:
1076 return compat_str(upload_date
)
1079 def determine_ext(url
, default_ext
='unknown_video'):
1082 guess
= url
.partition('?')[0].rpartition('.')[2]
1083 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
1085 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1086 elif guess
.rstrip('/') in KNOWN_EXTENSIONS
:
1087 return guess
.rstrip('/')
1092 def subtitles_filename(filename
, sub_lang
, sub_format
):
1093 return filename
.rsplit('.', 1)[0] + '.' + sub_lang
+ '.' + sub_format
1096 def date_from_str(date_str
):
1098 Return a datetime object from a string in the format YYYYMMDD or
1099 (now|today)[+-][0-9](day|week|month|year)(s)?"""
1100 today
= datetime
.date
.today()
1101 if date_str
in ('now', 'today'):
1103 if date_str
== 'yesterday':
1104 return today
- datetime
.timedelta(days
=1)
1105 match
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
)
1106 if match
is not None:
1107 sign
= match
.group('sign')
1108 time
= int(match
.group('time'))
1111 unit
= match
.group('unit')
1112 # A bad approximation?
1116 elif unit
== 'year':
1120 delta
= datetime
.timedelta(**{unit
: time
})
1121 return today
+ delta
1122 return datetime
.datetime
.strptime(date_str
, '%Y%m%d').date()
1125 def hyphenate_date(date_str
):
1127 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1128 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
1129 if match
is not None:
1130 return '-'.join(match
.groups())
1135 class DateRange(object):
1136 """Represents a time interval between two dates"""
1138 def __init__(self
, start
=None, end
=None):
1139 """start and end must be strings in the format accepted by date"""
1140 if start
is not None:
1141 self
.start
= date_from_str(start
)
1143 self
.start
= datetime
.datetime
.min.date()
1145 self
.end
= date_from_str(end
)
1147 self
.end
= datetime
.datetime
.max.date()
1148 if self
.start
> self
.end
:
1149 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
1153 """Returns a range that only contains the given day"""
1154 return cls(day
, day
)
1156 def __contains__(self
, date
):
1157 """Check if the date is in the range"""
1158 if not isinstance(date
, datetime
.date
):
1159 date
= date_from_str(date
)
1160 return self
.start
<= date
<= self
.end
1163 return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat())
1166 def platform_name():
1167 """ Returns the platform name as a compat_str """
1168 res
= platform
.platform()
1169 if isinstance(res
, bytes):
1170 res
= res
.decode(preferredencoding())
1172 assert isinstance(res
, compat_str
)
1176 def _windows_write_string(s
, out
):
1177 """ Returns True if the string was written using special methods,
1178 False if it has yet to be written out."""
1179 # Adapted from http://stackoverflow.com/a/3259271/35070
1182 import ctypes
.wintypes
1190 fileno
= out
.fileno()
1191 except AttributeError:
1192 # If the output stream doesn't have a fileno, it's virtual
1194 except io
.UnsupportedOperation
:
1195 # Some strange Windows pseudo files?
1197 if fileno
not in WIN_OUTPUT_IDS
:
1200 GetStdHandle
= ctypes
.WINFUNCTYPE(
1201 ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)(
1202 (b
'GetStdHandle', ctypes
.windll
.kernel32
))
1203 h
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
])
1205 WriteConsoleW
= ctypes
.WINFUNCTYPE(
1206 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
,
1207 ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
),
1208 ctypes
.wintypes
.LPVOID
)((b
'WriteConsoleW', ctypes
.windll
.kernel32
))
1209 written
= ctypes
.wintypes
.DWORD(0)
1211 GetFileType
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
'GetFileType', ctypes
.windll
.kernel32
))
1212 FILE_TYPE_CHAR
= 0x0002
1213 FILE_TYPE_REMOTE
= 0x8000
1214 GetConsoleMode
= ctypes
.WINFUNCTYPE(
1215 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
,
1216 ctypes
.POINTER(ctypes
.wintypes
.DWORD
))(
1217 (b
'GetConsoleMode', ctypes
.windll
.kernel32
))
1218 INVALID_HANDLE_VALUE
= ctypes
.wintypes
.DWORD(-1).value
1220 def not_a_console(handle
):
1221 if handle
== INVALID_HANDLE_VALUE
or handle
is None:
1223 return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR
or
1224 GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0)
1226 if not_a_console(h
):
1229 def next_nonbmp_pos(s
):
1231 return next(i
for i
, c
in enumerate(s
) if ord(c
) > 0xffff)
1232 except StopIteration:
1236 count
= min(next_nonbmp_pos(s
), 1024)
1238 ret
= WriteConsoleW(
1239 h
, s
, count
if count
else 2, ctypes
.byref(written
), None)
1241 raise OSError('Failed to write string')
1242 if not count
: # We just wrote a non-BMP character
1243 assert written
.value
== 2
1246 assert written
.value
> 0
1247 s
= s
[written
.value
:]
1251 def write_string(s
, out
=None, encoding
=None):
1254 assert type(s
) == compat_str
1256 if sys
.platform
== 'win32' and encoding
is None and hasattr(out
, 'fileno'):
1257 if _windows_write_string(s
, out
):
1260 if ('b' in getattr(out
, 'mode', '') or
1261 sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
1262 byt
= s
.encode(encoding
or preferredencoding(), 'ignore')
1264 elif hasattr(out
, 'buffer'):
1265 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
1266 byt
= s
.encode(enc
, 'ignore')
1267 out
.buffer.write(byt
)
1273 def bytes_to_intlist(bs
):
1276 if isinstance(bs
[0], int): # Python 3
1279 return [ord(c
) for c
in bs
]
1282 def intlist_to_bytes(xs
):
1285 return compat_struct_pack('%dB' % len(xs
), *xs
)
1288 # Cross-platform file locking
1289 if sys
.platform
== 'win32':
1290 import ctypes
.wintypes
1293 class OVERLAPPED(ctypes
.Structure
):
1295 ('Internal', ctypes
.wintypes
.LPVOID
),
1296 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
1297 ('Offset', ctypes
.wintypes
.DWORD
),
1298 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
1299 ('hEvent', ctypes
.wintypes
.HANDLE
),
1302 kernel32
= ctypes
.windll
.kernel32
1303 LockFileEx
= kernel32
.LockFileEx
1304 LockFileEx
.argtypes
= [
1305 ctypes
.wintypes
.HANDLE
, # hFile
1306 ctypes
.wintypes
.DWORD
, # dwFlags
1307 ctypes
.wintypes
.DWORD
, # dwReserved
1308 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1309 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1310 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1312 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
1313 UnlockFileEx
= kernel32
.UnlockFileEx
1314 UnlockFileEx
.argtypes
= [
1315 ctypes
.wintypes
.HANDLE
, # hFile
1316 ctypes
.wintypes
.DWORD
, # dwReserved
1317 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1318 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1319 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1321 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
1322 whole_low
= 0xffffffff
1323 whole_high
= 0x7fffffff
1325 def _lock_file(f
, exclusive
):
1326 overlapped
= OVERLAPPED()
1327 overlapped
.Offset
= 0
1328 overlapped
.OffsetHigh
= 0
1329 overlapped
.hEvent
= 0
1330 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
1331 handle
= msvcrt
.get_osfhandle(f
.fileno())
1332 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
1333 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1334 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
1336 def _unlock_file(f
):
1337 assert f
._lock
_file
_overlapped
_p
1338 handle
= msvcrt
.get_osfhandle(f
.fileno())
1339 if not UnlockFileEx(handle
, 0,
1340 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1341 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
1344 # Some platforms, such as Jython, is missing fcntl
1348 def _lock_file(f
, exclusive
):
1349 fcntl
.flock(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
1351 def _unlock_file(f
):
1352 fcntl
.flock(f
, fcntl
.LOCK_UN
)
1354 UNSUPPORTED_MSG
= 'file locking is not supported on this platform'
1356 def _lock_file(f
, exclusive
):
1357 raise IOError(UNSUPPORTED_MSG
)
1359 def _unlock_file(f
):
1360 raise IOError(UNSUPPORTED_MSG
)
1363 class locked_file(object):
1364 def __init__(self
, filename
, mode
, encoding
=None):
1365 assert mode
in ['r', 'a', 'w']
1366 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
1369 def __enter__(self
):
1370 exclusive
= self
.mode
!= 'r'
1372 _lock_file(self
.f
, exclusive
)
1378 def __exit__(self
, etype
, value
, traceback
):
1380 _unlock_file(self
.f
)
1387 def write(self
, *args
):
1388 return self
.f
.write(*args
)
1390 def read(self
, *args
):
1391 return self
.f
.read(*args
)
1394 def get_filesystem_encoding():
1395 encoding
= sys
.getfilesystemencoding()
1396 return encoding
if encoding
is not None else 'utf-8'
1399 def shell_quote(args
):
1401 encoding
= get_filesystem_encoding()
1403 if isinstance(a
, bytes):
1404 # We may get a filename encoded with 'encodeFilename'
1405 a
= a
.decode(encoding
)
1406 quoted_args
.append(pipes
.quote(a
))
1407 return ' '.join(quoted_args
)
1410 def smuggle_url(url
, data
):
1411 """ Pass additional data in a URL for internal use. """
1413 sdata
= compat_urllib_parse_urlencode(
1414 {'__youtubedl_smuggle': json
.dumps(data
)})
1415 return url
+ '#' + sdata
1418 def unsmuggle_url(smug_url
, default
=None):
1419 if '#__youtubedl_smuggle' not in smug_url
:
1420 return smug_url
, default
1421 url
, _
, sdata
= smug_url
.rpartition('#')
1422 jsond
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0]
1423 data
= json
.loads(jsond
)
1427 def format_bytes(bytes):
1430 if type(bytes) is str:
1431 bytes = float(bytes)
1435 exponent
= int(math
.log(bytes, 1024.0))
1436 suffix
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
]
1437 converted
= float(bytes) / float(1024 ** exponent
)
1438 return '%.2f%s' % (converted
, suffix
)
1441 def lookup_unit_table(unit_table
, s
):
1442 units_re
= '|'.join(re
.escape(u
) for u
in unit_table
)
1444 r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re
, s
)
1447 num_str
= m
.group('num').replace(',', '.')
1448 mult
= unit_table
[m
.group('unit')]
1449 return int(float(num_str
) * mult
)
1452 def parse_filesize(s
):
1456 # The lower-case forms are of course incorrect and unofficial,
1457 # but we support those too
1495 return lookup_unit_table(_UNIT_TABLE
, s
)
1504 if re
.match(r
'^[\d,.]+$', s
):
1505 return str_to_int(s
)
1516 return lookup_unit_table(_UNIT_TABLE
, s
)
1519 def month_by_name(name
):
1520 """ Return the number of a month by (locale-independently) English name """
1523 return ENGLISH_MONTH_NAMES
.index(name
) + 1
1528 def month_by_abbreviation(abbrev
):
1529 """ Return the number of a month by (locale-independently) English
1533 return [s
[:3] for s
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1
1538 def fix_xml_ampersands(xml_str
):
1539 """Replace all the '&' by '&' in XML"""
1541 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1546 def setproctitle(title
):
1547 assert isinstance(title
, compat_str
)
1549 # ctypes in Jython is not complete
1550 # http://bugs.jython.org/issue2148
1551 if sys
.platform
.startswith('java'):
1555 libc
= ctypes
.cdll
.LoadLibrary('libc.so.6')
1558 title_bytes
= title
.encode('utf-8')
1559 buf
= ctypes
.create_string_buffer(len(title_bytes
))
1560 buf
.value
= title_bytes
1562 libc
.prctl(15, buf
, 0, 0, 0)
1563 except AttributeError:
1564 return # Strange libc, just skip this
1567 def remove_start(s
, start
):
1568 return s
[len(start
):] if s
is not None and s
.startswith(start
) else s
1571 def remove_end(s
, end
):
1572 return s
[:-len(end
)] if s
is not None and s
.endswith(end
) else s
1575 def remove_quotes(s
):
1576 if s
is None or len(s
) < 2:
1578 for quote
in ('"', "'", ):
1579 if s
[0] == quote
and s
[-1] == quote
:
1584 def url_basename(url
):
1585 path
= compat_urlparse
.urlparse(url
).path
1586 return path
.strip('/').split('/')[-1]
1589 class HEADRequest(compat_urllib_request
.Request
):
1590 def get_method(self
):
1594 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
1597 v
= getattr(v
, get_attr
, None)
1603 return int(v
) * invscale
// scale
1608 def str_or_none(v
, default
=None):
1609 return default
if v
is None else compat_str(v
)
1612 def str_to_int(int_str
):
1613 """ A more relaxed version of int_or_none """
1616 int_str
= re
.sub(r
'[,\.\+]', '', int_str
)
1620 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
1624 return float(v
) * invscale
/ scale
1629 def parse_duration(s
):
1630 if not isinstance(s
, compat_basestring
):
1635 days
, hours
, mins
, secs
, ms
= [None] * 5
1636 m
= re
.match(r
'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?$', s
)
1638 days
, hours
, mins
, secs
, ms
= m
.groups()
1643 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
1646 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
1649 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
1652 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
1655 days
, hours
, mins
, secs
, ms
= m
.groups()
1657 m
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s
)
1659 hours
, mins
= m
.groups()
1665 duration
+= float(secs
)
1667 duration
+= float(mins
) * 60
1669 duration
+= float(hours
) * 60 * 60
1671 duration
+= float(days
) * 24 * 60 * 60
1673 duration
+= float(ms
)
1677 def prepend_extension(filename
, ext
, expected_real_ext
=None):
1678 name
, real_ext
= os
.path
.splitext(filename
)
1680 '{0}.{1}{2}'.format(name
, ext
, real_ext
)
1681 if not expected_real_ext
or real_ext
[1:] == expected_real_ext
1682 else '{0}.{1}'.format(filename
, ext
))
1685 def replace_extension(filename
, ext
, expected_real_ext
=None):
1686 name
, real_ext
= os
.path
.splitext(filename
)
1687 return '{0}.{1}'.format(
1688 name
if not expected_real_ext
or real_ext
[1:] == expected_real_ext
else filename
,
1692 def check_executable(exe
, args
=[]):
1693 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1694 args can be a list of arguments for a short output (like -version) """
1696 subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate()
1702 def get_exe_version(exe
, args
=['--version'],
1703 version_re
=None, unrecognized
='present'):
1704 """ Returns the version of the specified executable,
1705 or False if the executable is not present """
1707 out
, _
= subprocess
.Popen(
1708 [encodeArgument(exe
)] + args
,
1709 stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate()
1712 if isinstance(out
, bytes): # Python 2.x
1713 out
= out
.decode('ascii', 'ignore')
1714 return detect_exe_version(out
, version_re
, unrecognized
)
1717 def detect_exe_version(output
, version_re
=None, unrecognized
='present'):
1718 assert isinstance(output
, compat_str
)
1719 if version_re
is None:
1720 version_re
= r
'version\s+([-0-9._a-zA-Z]+)'
1721 m
= re
.search(version_re
, output
)
1728 class PagedList(object):
1730 # This is only useful for tests
1731 return len(self
.getslice())
1734 class OnDemandPagedList(PagedList
):
1735 def __init__(self
, pagefunc
, pagesize
, use_cache
=False):
1736 self
._pagefunc
= pagefunc
1737 self
._pagesize
= pagesize
1738 self
._use
_cache
= use_cache
1742 def getslice(self
, start
=0, end
=None):
1744 for pagenum
in itertools
.count(start
// self
._pagesize
):
1745 firstid
= pagenum
* self
._pagesize
1746 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
1747 if start
>= nextfirstid
:
1752 page_results
= self
._cache
.get(pagenum
)
1753 if page_results
is None:
1754 page_results
= list(self
._pagefunc
(pagenum
))
1756 self
._cache
[pagenum
] = page_results
1759 start
% self
._pagesize
1760 if firstid
<= start
< nextfirstid
1764 ((end
- 1) % self
._pagesize
) + 1
1765 if (end
is not None and firstid
<= end
<= nextfirstid
)
1768 if startv
!= 0 or endv
is not None:
1769 page_results
= page_results
[startv
:endv
]
1770 res
.extend(page_results
)
1772 # A little optimization - if current page is not "full", ie. does
1773 # not contain page_size videos then we can assume that this page
1774 # is the last one - there are no more ids on further pages -
1775 # i.e. no need to query again.
1776 if len(page_results
) + startv
< self
._pagesize
:
1779 # If we got the whole page, but the next page is not interesting,
1780 # break out early as well
1781 if end
== nextfirstid
:
1786 class InAdvancePagedList(PagedList
):
1787 def __init__(self
, pagefunc
, pagecount
, pagesize
):
1788 self
._pagefunc
= pagefunc
1789 self
._pagecount
= pagecount
1790 self
._pagesize
= pagesize
1792 def getslice(self
, start
=0, end
=None):
1794 start_page
= start
// self
._pagesize
1796 self
._pagecount
if end
is None else (end
// self
._pagesize
+ 1))
1797 skip_elems
= start
- start_page
* self
._pagesize
1798 only_more
= None if end
is None else end
- start
1799 for pagenum
in range(start_page
, end_page
):
1800 page
= list(self
._pagefunc
(pagenum
))
1802 page
= page
[skip_elems
:]
1804 if only_more
is not None:
1805 if len(page
) < only_more
:
1806 only_more
-= len(page
)
1808 page
= page
[:only_more
]
1815 def uppercase_escape(s
):
1816 unicode_escape
= codecs
.getdecoder('unicode_escape')
1818 r
'\\U[0-9a-fA-F]{8}',
1819 lambda m
: unicode_escape(m
.group(0))[0],
1823 def lowercase_escape(s
):
1824 unicode_escape
= codecs
.getdecoder('unicode_escape')
1826 r
'\\u[0-9a-fA-F]{4}',
1827 lambda m
: unicode_escape(m
.group(0))[0],
1831 def escape_rfc3986(s
):
1832 """Escape non-ASCII characters as suggested by RFC 3986"""
1833 if sys
.version_info
< (3, 0) and isinstance(s
, compat_str
):
1834 s
= s
.encode('utf-8')
1835 return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]")
1838 def escape_url(url
):
1839 """Escape URL as suggested by RFC 3986"""
1840 url_parsed
= compat_urllib_parse_urlparse(url
)
1841 return url_parsed
._replace
(
1842 netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'),
1843 path
=escape_rfc3986(url_parsed
.path
),
1844 params
=escape_rfc3986(url_parsed
.params
),
1845 query
=escape_rfc3986(url_parsed
.query
),
1846 fragment
=escape_rfc3986(url_parsed
.fragment
)
1850 def read_batch_urls(batch_fd
):
1852 if not isinstance(url
, compat_str
):
1853 url
= url
.decode('utf-8', 'replace')
1854 BOM_UTF8
= '\xef\xbb\xbf'
1855 if url
.startswith(BOM_UTF8
):
1856 url
= url
[len(BOM_UTF8
):]
1858 if url
.startswith(('#', ';', ']')):
1862 with contextlib
.closing(batch_fd
) as fd
:
1863 return [url
for url
in map(fixup
, fd
) if url
]
1866 def urlencode_postdata(*args
, **kargs
):
1867 return compat_urllib_parse_urlencode(*args
, **kargs
).encode('ascii')
1870 def update_url_query(url
, query
):
1873 parsed_url
= compat_urlparse
.urlparse(url
)
1874 qs
= compat_parse_qs(parsed_url
.query
)
1876 return compat_urlparse
.urlunparse(parsed_url
._replace
(
1877 query
=compat_urllib_parse_urlencode(qs
, True)))
1880 def update_Request(req
, url
=None, data
=None, headers
={}, query
={}):
1881 req_headers
= req
.headers
.copy()
1882 req_headers
.update(headers
)
1883 req_data
= data
or req
.data
1884 req_url
= update_url_query(url
or req
.get_full_url(), query
)
1885 req_type
= HEADRequest
if req
.get_method() == 'HEAD' else compat_urllib_request
.Request
1887 req_url
, data
=req_data
, headers
=req_headers
,
1888 origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
)
1889 if hasattr(req
, 'timeout'):
1890 new_req
.timeout
= req
.timeout
1894 def dict_get(d
, key_or_keys
, default
=None, skip_false_values
=True):
1895 if isinstance(key_or_keys
, (list, tuple)):
1896 for key
in key_or_keys
:
1897 if key
not in d
or d
[key
] is None or skip_false_values
and not d
[key
]:
1901 return d
.get(key_or_keys
, default
)
1904 def try_get(src
, getter
, expected_type
=None):
1907 except (AttributeError, KeyError, TypeError, IndexError):
1910 if expected_type
is None or isinstance(v
, expected_type
):
1914 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'):
1915 return string
if isinstance(string
, compat_str
) else compat_str(string
, encoding
, errors
)
1927 def parse_age_limit(s
):
1930 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
1931 return int(m
.group('age')) if m
else US_RATINGS
.get(s
)
1934 def strip_jsonp(code
):
1936 r
'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
)
1939 def js_to_json(code
):
1942 if v
in ('true', 'false', 'null'):
1944 elif v
.startswith('/*') or v
== ',':
1947 if v
[0] in ("'", '"'):
1948 v
= re
.sub(r
'(?s)\\.|"', lambda m
: {
1953 }.get(m
.group(0), m
.group(0)), v
[1:-1])
1956 (r
'^0[xX][0-9a-fA-F]+', 16),
1960 for regex
, base
in INTEGER_TABLE
:
1961 im
= re
.match(regex
, v
)
1963 i
= int(im
.group(0), base
)
1964 return '"%d":' % i
if v
.endswith(':') else '%d' % i
1968 return re
.sub(r
'''(?sx)
1969 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
1970 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
1971 /\*.*?\*/|,(?=\s*[\]}])|
1972 [a-zA-Z_][.a-zA-Z_0-9]*|
1973 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
1978 def qualities(quality_ids
):
1979 """ Get a numeric quality value out of a list of possible values """
1982 return quality_ids
.index(qid
)
1988 DEFAULT_OUTTMPL
= '%(title)s-%(id)s.%(ext)s'
1991 def limit_length(s
, length
):
1992 """ Add ellipses to overly long strings """
1997 return s
[:length
- len(ELLIPSES
)] + ELLIPSES
2001 def version_tuple(v
):
2002 return tuple(int(e
) for e
in re
.split(r
'[-.]', v
))
2005 def is_outdated_version(version
, limit
, assume_new
=True):
2007 return not assume_new
2009 return version_tuple(version
) < version_tuple(limit
)
2011 return not assume_new
2014 def ytdl_is_updateable():
2015 """ Returns if youtube-dl can be updated with -U """
2016 from zipimport
import zipimporter
2018 return isinstance(globals().get('__loader__'), zipimporter
) or hasattr(sys
, 'frozen')
2021 def args_to_str(args
):
2022 # Get a short string representation for a subprocess command
2023 return ' '.join(compat_shlex_quote(a
) for a
in args
)
2026 def error_to_compat_str(err
):
2028 # On python 2 error byte string must be decoded with proper
2029 # encoding rather than ascii
2030 if sys
.version_info
[0] < 3:
2031 err_str
= err_str
.decode(preferredencoding())
2035 def mimetype2ext(mt
):
2041 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
2042 # it's the most popular one
2043 'audio/mpeg': 'mp3',
2048 _
, _
, res
= mt
.rpartition('/')
2052 'smptett+xml': 'tt',
2058 'x-mp4-fragmented': 'mp4',
2063 def urlhandle_detect_ext(url_handle
):
2064 getheader
= url_handle
.headers
.get
2066 cd
= getheader('Content-Disposition')
2068 m
= re
.match(r
'attachment;\s*filename="(?P<filename>[^"]+)"', cd
)
2070 e
= determine_ext(m
.group('filename'), default_ext
=None)
2074 return mimetype2ext(getheader('Content-Type'))
2077 def encode_data_uri(data
, mime_type
):
2078 return 'data:%s;base64,%s' % (mime_type
, base64
.b64encode(data
).decode('ascii'))
2081 def age_restricted(content_limit
, age_limit
):
2082 """ Returns True iff the content should be blocked """
2084 if age_limit
is None: # No limit set
2086 if content_limit
is None:
2087 return False # Content available for everyone
2088 return age_limit
< content_limit
2091 def is_html(first_bytes
):
2092 """ Detect whether a file contains HTML by examining its first bytes. """
2095 (b
'\xef\xbb\xbf', 'utf-8'),
2096 (b
'\x00\x00\xfe\xff', 'utf-32-be'),
2097 (b
'\xff\xfe\x00\x00', 'utf-32-le'),
2098 (b
'\xff\xfe', 'utf-16-le'),
2099 (b
'\xfe\xff', 'utf-16-be'),
2101 for bom
, enc
in BOMS
:
2102 if first_bytes
.startswith(bom
):
2103 s
= first_bytes
[len(bom
):].decode(enc
, 'replace')
2106 s
= first_bytes
.decode('utf-8', 'replace')
2108 return re
.match(r
'^\s*<', s
)
2111 def determine_protocol(info_dict
):
2112 protocol
= info_dict
.get('protocol')
2113 if protocol
is not None:
2116 url
= info_dict
['url']
2117 if url
.startswith('rtmp'):
2119 elif url
.startswith('mms'):
2121 elif url
.startswith('rtsp'):
2124 ext
= determine_ext(url
)
2130 return compat_urllib_parse_urlparse(url
).scheme
2133 def render_table(header_row
, data
):
2134 """ Render a list of rows, each as a list of values """
2135 table
= [header_row
] + data
2136 max_lens
= [max(len(compat_str(v
)) for v
in col
) for col
in zip(*table
)]
2137 format_str
= ' '.join('%-' + compat_str(ml
+ 1) + 's' for ml
in max_lens
[:-1]) + '%s'
2138 return '\n'.join(format_str
% tuple(row
) for row
in table
)
2141 def _match_one(filter_part
, dct
):
2142 COMPARISON_OPERATORS
= {
2150 operator_rex
= re
.compile(r
'''(?x)\s*
2152 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2154 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
2155 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
2158 ''' % '|'.join(map(re
.escape
, COMPARISON_OPERATORS
.keys())))
2159 m
= operator_rex
.search(filter_part
)
2161 op
= COMPARISON_OPERATORS
[m
.group('op')]
2162 if m
.group('strval') is not None:
2163 if m
.group('op') not in ('=', '!='):
2165 'Operator %s does not support string values!' % m
.group('op'))
2166 comparison_value
= m
.group('strval')
2169 comparison_value
= int(m
.group('intval'))
2171 comparison_value
= parse_filesize(m
.group('intval'))
2172 if comparison_value
is None:
2173 comparison_value
= parse_filesize(m
.group('intval') + 'B')
2174 if comparison_value
is None:
2176 'Invalid integer value %r in filter part %r' % (
2177 m
.group('intval'), filter_part
))
2178 actual_value
= dct
.get(m
.group('key'))
2179 if actual_value
is None:
2180 return m
.group('none_inclusive')
2181 return op(actual_value
, comparison_value
)
2184 '': lambda v
: v
is not None,
2185 '!': lambda v
: v
is None,
2187 operator_rex
= re
.compile(r
'''(?x)\s*
2188 (?P<op>%s)\s*(?P<key>[a-z_]+)
2190 ''' % '|'.join(map(re
.escape
, UNARY_OPERATORS
.keys())))
2191 m
= operator_rex
.search(filter_part
)
2193 op
= UNARY_OPERATORS
[m
.group('op')]
2194 actual_value
= dct
.get(m
.group('key'))
2195 return op(actual_value
)
2197 raise ValueError('Invalid filter part %r' % filter_part
)
2200 def match_str(filter_str
, dct
):
2201 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
2204 _match_one(filter_part
, dct
) for filter_part
in filter_str
.split('&'))
2207 def match_filter_func(filter_str
):
2208 def _match_func(info_dict
):
2209 if match_str(filter_str
, info_dict
):
2212 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
2213 return '%s does not pass filter %s, skipping ..' % (video_title
, filter_str
)
2217 def parse_dfxp_time_expr(time_expr
):
2221 mobj
= re
.match(r
'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr
)
2223 return float(mobj
.group('time_offset'))
2225 mobj
= re
.match(r
'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr
)
2227 return 3600 * int(mobj
.group(1)) + 60 * int(mobj
.group(2)) + float(mobj
.group(3).replace(':', '.'))
2230 def srt_subtitles_timecode(seconds
):
2231 return '%02d:%02d:%02d,%03d' % (seconds
/ 3600, (seconds
% 3600) / 60, seconds
% 60, (seconds
% 1) * 1000)
2234 def dfxp2srt(dfxp_data
):
2235 _x
= functools
.partial(xpath_with_ns
, ns_map
={
2236 'ttml': 'http://www.w3.org/ns/ttml',
2237 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
2238 'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1',
2241 class TTMLPElementParser(object):
2244 def start(self
, tag
, attrib
):
2245 if tag
in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
2251 def data(self
, data
):
2255 return self
.out
.strip()
2257 def parse_node(node
):
2258 target
= TTMLPElementParser()
2259 parser
= xml
.etree
.ElementTree
.XMLParser(target
=target
)
2260 parser
.feed(xml
.etree
.ElementTree
.tostring(node
))
2261 return parser
.close()
2263 dfxp
= compat_etree_fromstring(dfxp_data
.encode('utf-8'))
2265 paras
= dfxp
.findall(_x('.//ttml:p')) or dfxp
.findall(_x('.//ttaf1:p')) or dfxp
.findall(_x('.//ttaf1_0604:p')) or dfxp
.findall('.//p')
2268 raise ValueError('Invalid dfxp/TTML subtitle')
2270 for para
, index
in zip(paras
, itertools
.count(1)):
2271 begin_time
= parse_dfxp_time_expr(para
.attrib
.get('begin'))
2272 end_time
= parse_dfxp_time_expr(para
.attrib
.get('end'))
2273 dur
= parse_dfxp_time_expr(para
.attrib
.get('dur'))
2274 if begin_time
is None:
2279 end_time
= begin_time
+ dur
2280 out
.append('%d\n%s --> %s\n%s\n\n' % (
2282 srt_subtitles_timecode(begin_time
),
2283 srt_subtitles_timecode(end_time
),
2289 def cli_option(params
, command_option
, param
):
2290 param
= params
.get(param
)
2291 return [command_option
, param
] if param
is not None else []
2294 def cli_bool_option(params
, command_option
, param
, true_value
='true', false_value
='false', separator
=None):
2295 param
= params
.get(param
)
2296 assert isinstance(param
, bool)
2298 return [command_option
+ separator
+ (true_value
if param
else false_value
)]
2299 return [command_option
, true_value
if param
else false_value
]
2302 def cli_valueless_option(params
, command_option
, param
, expected_value
=True):
2303 param
= params
.get(param
)
2304 return [command_option
] if param
== expected_value
else []
2307 def cli_configuration_args(params
, param
, default
=[]):
2308 ex_args
= params
.get(param
)
2311 assert isinstance(ex_args
, list)
2315 class ISO639Utils(object):
2316 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2505 def short2long(cls
, code
):
2506 """Convert language code from ISO 639-1 to ISO 639-2/T"""
2507 return cls
._lang
_map
.get(code
[:2])
2510 def long2short(cls
, code
):
2511 """Convert language code from ISO 639-2/T to ISO 639-1"""
2512 for short_name
, long_name
in cls
._lang
_map
.items():
2513 if long_name
== code
:
2517 class ISO3166Utils(object):
2518 # From http://data.okfn.org/data/core/country-list
2520 'AF': 'Afghanistan',
2521 'AX': 'Åland Islands',
2524 'AS': 'American Samoa',
2529 'AG': 'Antigua and Barbuda',
2546 'BO': 'Bolivia, Plurinational State of',
2547 'BQ': 'Bonaire, Sint Eustatius and Saba',
2548 'BA': 'Bosnia and Herzegovina',
2550 'BV': 'Bouvet Island',
2552 'IO': 'British Indian Ocean Territory',
2553 'BN': 'Brunei Darussalam',
2555 'BF': 'Burkina Faso',
2561 'KY': 'Cayman Islands',
2562 'CF': 'Central African Republic',
2566 'CX': 'Christmas Island',
2567 'CC': 'Cocos (Keeling) Islands',
2571 'CD': 'Congo, the Democratic Republic of the',
2572 'CK': 'Cook Islands',
2574 'CI': 'Côte d\'Ivoire',
2579 'CZ': 'Czech Republic',
2583 'DO': 'Dominican Republic',
2586 'SV': 'El Salvador',
2587 'GQ': 'Equatorial Guinea',
2591 'FK': 'Falkland Islands (Malvinas)',
2592 'FO': 'Faroe Islands',
2596 'GF': 'French Guiana',
2597 'PF': 'French Polynesia',
2598 'TF': 'French Southern Territories',
2613 'GW': 'Guinea-Bissau',
2616 'HM': 'Heard Island and McDonald Islands',
2617 'VA': 'Holy See (Vatican City State)',
2624 'IR': 'Iran, Islamic Republic of',
2627 'IM': 'Isle of Man',
2637 'KP': 'Korea, Democratic People\'s Republic of',
2638 'KR': 'Korea, Republic of',
2641 'LA': 'Lao People\'s Democratic Republic',
2647 'LI': 'Liechtenstein',
2651 'MK': 'Macedonia, the Former Yugoslav Republic of',
2658 'MH': 'Marshall Islands',
2664 'FM': 'Micronesia, Federated States of',
2665 'MD': 'Moldova, Republic of',
2676 'NL': 'Netherlands',
2677 'NC': 'New Caledonia',
2678 'NZ': 'New Zealand',
2683 'NF': 'Norfolk Island',
2684 'MP': 'Northern Mariana Islands',
2689 'PS': 'Palestine, State of',
2691 'PG': 'Papua New Guinea',
2694 'PH': 'Philippines',
2698 'PR': 'Puerto Rico',
2702 'RU': 'Russian Federation',
2704 'BL': 'Saint Barthélemy',
2705 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
2706 'KN': 'Saint Kitts and Nevis',
2707 'LC': 'Saint Lucia',
2708 'MF': 'Saint Martin (French part)',
2709 'PM': 'Saint Pierre and Miquelon',
2710 'VC': 'Saint Vincent and the Grenadines',
2713 'ST': 'Sao Tome and Principe',
2714 'SA': 'Saudi Arabia',
2718 'SL': 'Sierra Leone',
2720 'SX': 'Sint Maarten (Dutch part)',
2723 'SB': 'Solomon Islands',
2725 'ZA': 'South Africa',
2726 'GS': 'South Georgia and the South Sandwich Islands',
2727 'SS': 'South Sudan',
2732 'SJ': 'Svalbard and Jan Mayen',
2735 'CH': 'Switzerland',
2736 'SY': 'Syrian Arab Republic',
2737 'TW': 'Taiwan, Province of China',
2739 'TZ': 'Tanzania, United Republic of',
2741 'TL': 'Timor-Leste',
2745 'TT': 'Trinidad and Tobago',
2748 'TM': 'Turkmenistan',
2749 'TC': 'Turks and Caicos Islands',
2753 'AE': 'United Arab Emirates',
2754 'GB': 'United Kingdom',
2755 'US': 'United States',
2756 'UM': 'United States Minor Outlying Islands',
2760 'VE': 'Venezuela, Bolivarian Republic of',
2762 'VG': 'Virgin Islands, British',
2763 'VI': 'Virgin Islands, U.S.',
2764 'WF': 'Wallis and Futuna',
2765 'EH': 'Western Sahara',
2772 def short2full(cls
, code
):
2773 """Convert an ISO 3166-2 country code to the corresponding full name"""
2774 return cls
._country
_map
.get(code
.upper())
2777 class PerRequestProxyHandler(compat_urllib_request
.ProxyHandler
):
2778 def __init__(self
, proxies
=None):
2779 # Set default handlers
2780 for type in ('http', 'https'):
2781 setattr(self
, '%s_open' % type,
2782 lambda r
, proxy
='__noproxy__', type=type, meth
=self
.proxy_open
:
2783 meth(r
, proxy
, type))
2784 return compat_urllib_request
.ProxyHandler
.__init
__(self
, proxies
)
2786 def proxy_open(self
, req
, proxy
, type):
2787 req_proxy
= req
.headers
.get('Ytdl-request-proxy')
2788 if req_proxy
is not None:
2790 del req
.headers
['Ytdl-request-proxy']
2792 if proxy
== '__noproxy__':
2793 return None # No Proxy
2794 if compat_urlparse
.urlparse(proxy
).scheme
.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
2795 req
.add_header('Ytdl-socks-proxy', proxy
)
2796 # youtube-dl's http/https handlers do wrapping the socket with socks
2798 return compat_urllib_request
.ProxyHandler
.proxy_open(
2799 self
, req
, proxy
, type)
2802 def ohdave_rsa_encrypt(data
, exponent
, modulus
):
2804 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
2807 data: data to encrypt, bytes-like object
2808 exponent, modulus: parameter e and N of RSA algorithm, both integer
2809 Output: hex string of encrypted data
2811 Limitation: supports one block encryption only
2814 payload
= int(binascii
.hexlify(data
[::-1]), 16)
2815 encrypted
= pow(payload
, exponent
, modulus
)
2816 return '%x' % encrypted
2819 def encode_base_n(num
, n
, table
=None):
2820 FULL_TABLE
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
2822 table
= FULL_TABLE
[:n
]
2825 raise ValueError('base %d exceeds table length %d' % (n
, len(table
)))
2832 ret
= table
[num
% n
] + ret
2837 def decode_packed_codes(code
):
2839 r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)",
2841 obfucasted_code
, base
, count
, symbols
= mobj
.groups()
2844 symbols
= symbols
.split('|')
2849 base_n_count
= encode_base_n(count
, base
)
2850 symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
2853 r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)],
2857 def parse_m3u8_attributes(attrib
):
2859 for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
):
2860 if val
.startswith('"'):