]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/utils.py
Merge tag 'upstream/2014.11.23'
[youtubedl] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import unicode_literals
5
6 import calendar
7 import codecs
8 import contextlib
9 import ctypes
10 import datetime
11 import email.utils
12 import errno
13 import gzip
14 import itertools
15 import io
16 import json
17 import locale
18 import math
19 import os
20 import pipes
21 import platform
22 import re
23 import ssl
24 import socket
25 import struct
26 import subprocess
27 import sys
28 import tempfile
29 import traceback
30 import xml.etree.ElementTree
31 import zlib
32
33 from .compat import (
34 compat_chr,
35 compat_getenv,
36 compat_html_entities,
37 compat_parse_qs,
38 compat_str,
39 compat_urllib_error,
40 compat_urllib_parse,
41 compat_urllib_parse_urlparse,
42 compat_urllib_request,
43 compat_urlparse,
44 )
45
46
47 # This is not clearly defined otherwise
48 compiled_regex_type = type(re.compile(''))
49
50 std_headers = {
51 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
52 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
53 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
54 'Accept-Encoding': 'gzip, deflate',
55 'Accept-Language': 'en-us,en;q=0.5',
56 }
57
58 def preferredencoding():
59 """Get preferred encoding.
60
61 Returns the best encoding scheme for the system, based on
62 locale.getpreferredencoding() and some further tweaks.
63 """
64 try:
65 pref = locale.getpreferredencoding()
66 'TEST'.encode(pref)
67 except:
68 pref = 'UTF-8'
69
70 return pref
71
72
73 def write_json_file(obj, fn):
74 """ Encode obj as JSON and write it to fn, atomically if possible """
75
76 fn = encodeFilename(fn)
77 if sys.version_info < (3, 0) and sys.platform != 'win32':
78 encoding = get_filesystem_encoding()
79 # os.path.basename returns a bytes object, but NamedTemporaryFile
80 # will fail if the filename contains non ascii characters unless we
81 # use a unicode object
82 path_basename = lambda f: os.path.basename(fn).decode(encoding)
83 # the same for os.path.dirname
84 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
85 else:
86 path_basename = os.path.basename
87 path_dirname = os.path.dirname
88
89 args = {
90 'suffix': '.tmp',
91 'prefix': path_basename(fn) + '.',
92 'dir': path_dirname(fn),
93 'delete': False,
94 }
95
96 # In Python 2.x, json.dump expects a bytestream.
97 # In Python 3.x, it writes to a character stream
98 if sys.version_info < (3, 0):
99 args['mode'] = 'wb'
100 else:
101 args.update({
102 'mode': 'w',
103 'encoding': 'utf-8',
104 })
105
106 tf = tempfile.NamedTemporaryFile(**args)
107
108 try:
109 with tf:
110 json.dump(obj, tf)
111 if sys.platform == 'win32':
112 # Need to remove existing file on Windows, else os.rename raises
113 # WindowsError or FileExistsError.
114 try:
115 os.unlink(fn)
116 except OSError:
117 pass
118 os.rename(tf.name, fn)
119 except:
120 try:
121 os.remove(tf.name)
122 except OSError:
123 pass
124 raise
125
126
127 if sys.version_info >= (2, 7):
128 def find_xpath_attr(node, xpath, key, val):
129 """ Find the xpath xpath[@key=val] """
130 assert re.match(r'^[a-zA-Z-]+$', key)
131 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
132 expr = xpath + u"[@%s='%s']" % (key, val)
133 return node.find(expr)
134 else:
135 def find_xpath_attr(node, xpath, key, val):
136 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
137 # .//node does not match if a node is a direct child of . !
138 if isinstance(xpath, unicode):
139 xpath = xpath.encode('ascii')
140
141 for f in node.findall(xpath):
142 if f.attrib.get(key) == val:
143 return f
144 return None
145
146 # On python2.6 the xml.etree.ElementTree.Element methods don't support
147 # the namespace parameter
148 def xpath_with_ns(path, ns_map):
149 components = [c.split(':') for c in path.split('/')]
150 replaced = []
151 for c in components:
152 if len(c) == 1:
153 replaced.append(c[0])
154 else:
155 ns, tag = c
156 replaced.append('{%s}%s' % (ns_map[ns], tag))
157 return '/'.join(replaced)
158
159
160 def xpath_text(node, xpath, name=None, fatal=False):
161 if sys.version_info < (2, 7): # Crazy 2.6
162 xpath = xpath.encode('ascii')
163
164 n = node.find(xpath)
165 if n is None:
166 if fatal:
167 name = xpath if name is None else name
168 raise ExtractorError('Could not find XML element %s' % name)
169 else:
170 return None
171 return n.text
172
173
174 def get_element_by_id(id, html):
175 """Return the content of the tag with the specified ID in the passed HTML document"""
176 return get_element_by_attribute("id", id, html)
177
178
179 def get_element_by_attribute(attribute, value, html):
180 """Return the content of the tag with the specified attribute in the passed HTML document"""
181
182 m = re.search(r'''(?xs)
183 <([a-zA-Z0-9:._-]+)
184 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
185 \s+%s=['"]?%s['"]?
186 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
187 \s*>
188 (?P<content>.*?)
189 </\1>
190 ''' % (re.escape(attribute), re.escape(value)), html)
191
192 if not m:
193 return None
194 res = m.group('content')
195
196 if res.startswith('"') or res.startswith("'"):
197 res = res[1:-1]
198
199 return unescapeHTML(res)
200
201
202 def clean_html(html):
203 """Clean an HTML snippet into a readable string"""
204 # Newline vs <br />
205 html = html.replace('\n', ' ')
206 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
207 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
208 # Strip html tags
209 html = re.sub('<.*?>', '', html)
210 # Replace html entities
211 html = unescapeHTML(html)
212 return html.strip()
213
214
215 def sanitize_open(filename, open_mode):
216 """Try to open the given filename, and slightly tweak it if this fails.
217
218 Attempts to open the given filename. If this fails, it tries to change
219 the filename slightly, step by step, until it's either able to open it
220 or it fails and raises a final exception, like the standard open()
221 function.
222
223 It returns the tuple (stream, definitive_file_name).
224 """
225 try:
226 if filename == '-':
227 if sys.platform == 'win32':
228 import msvcrt
229 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
230 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
231 stream = open(encodeFilename(filename), open_mode)
232 return (stream, filename)
233 except (IOError, OSError) as err:
234 if err.errno in (errno.EACCES,):
235 raise
236
237 # In case of error, try to remove win32 forbidden chars
238 alt_filename = os.path.join(
239 re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
240 for path_part in os.path.split(filename)
241 )
242 if alt_filename == filename:
243 raise
244 else:
245 # An exception here should be caught in the caller
246 stream = open(encodeFilename(filename), open_mode)
247 return (stream, alt_filename)
248
249
250 def timeconvert(timestr):
251 """Convert RFC 2822 defined time string into system timestamp"""
252 timestamp = None
253 timetuple = email.utils.parsedate_tz(timestr)
254 if timetuple is not None:
255 timestamp = email.utils.mktime_tz(timetuple)
256 return timestamp
257
258 def sanitize_filename(s, restricted=False, is_id=False):
259 """Sanitizes a string so it could be used as part of a filename.
260 If restricted is set, use a stricter subset of allowed characters.
261 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
262 """
263 def replace_insane(char):
264 if char == '?' or ord(char) < 32 or ord(char) == 127:
265 return ''
266 elif char == '"':
267 return '' if restricted else '\''
268 elif char == ':':
269 return '_-' if restricted else ' -'
270 elif char in '\\/|*<>':
271 return '_'
272 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
273 return '_'
274 if restricted and ord(char) > 127:
275 return '_'
276 return char
277
278 result = ''.join(map(replace_insane, s))
279 if not is_id:
280 while '__' in result:
281 result = result.replace('__', '_')
282 result = result.strip('_')
283 # Common case of "Foreign band name - English song title"
284 if restricted and result.startswith('-_'):
285 result = result[2:]
286 if not result:
287 result = '_'
288 return result
289
290 def orderedSet(iterable):
291 """ Remove all duplicates from the input iterable """
292 res = []
293 for el in iterable:
294 if el not in res:
295 res.append(el)
296 return res
297
298
299 def _htmlentity_transform(entity):
300 """Transforms an HTML entity to a character."""
301 # Known non-numeric HTML entity
302 if entity in compat_html_entities.name2codepoint:
303 return compat_chr(compat_html_entities.name2codepoint[entity])
304
305 mobj = re.match(r'#(x?[0-9]+)', entity)
306 if mobj is not None:
307 numstr = mobj.group(1)
308 if numstr.startswith('x'):
309 base = 16
310 numstr = '0%s' % numstr
311 else:
312 base = 10
313 return compat_chr(int(numstr, base))
314
315 # Unknown entity in name, return its literal representation
316 return ('&%s;' % entity)
317
318
319 def unescapeHTML(s):
320 if s is None:
321 return None
322 assert type(s) == compat_str
323
324 return re.sub(
325 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
326
327
328 def encodeFilename(s, for_subprocess=False):
329 """
330 @param s The name of the file
331 """
332
333 assert type(s) == compat_str
334
335 # Python 3 has a Unicode API
336 if sys.version_info >= (3, 0):
337 return s
338
339 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
340 # Pass '' directly to use Unicode APIs on Windows 2000 and up
341 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
342 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
343 if not for_subprocess:
344 return s
345 else:
346 # For subprocess calls, encode with locale encoding
347 # Refer to http://stackoverflow.com/a/9951851/35070
348 encoding = preferredencoding()
349 else:
350 encoding = sys.getfilesystemencoding()
351 if encoding is None:
352 encoding = 'utf-8'
353 return s.encode(encoding, 'ignore')
354
355
356 def encodeArgument(s):
357 if not isinstance(s, compat_str):
358 # Legacy code that uses byte strings
359 # Uncomment the following line after fixing all post processors
360 #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
361 s = s.decode('ascii')
362 return encodeFilename(s, True)
363
364
365 def decodeOption(optval):
366 if optval is None:
367 return optval
368 if isinstance(optval, bytes):
369 optval = optval.decode(preferredencoding())
370
371 assert isinstance(optval, compat_str)
372 return optval
373
374 def formatSeconds(secs):
375 if secs > 3600:
376 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
377 elif secs > 60:
378 return '%d:%02d' % (secs // 60, secs % 60)
379 else:
380 return '%d' % secs
381
382
383 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
384 if sys.version_info < (3, 2):
385 import httplib
386
387 class HTTPSConnectionV3(httplib.HTTPSConnection):
388 def __init__(self, *args, **kwargs):
389 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
390
391 def connect(self):
392 sock = socket.create_connection((self.host, self.port), self.timeout)
393 if getattr(self, '_tunnel_host', False):
394 self.sock = sock
395 self._tunnel()
396 try:
397 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
398 except ssl.SSLError:
399 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
400
401 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
402 def https_open(self, req):
403 return self.do_open(HTTPSConnectionV3, req)
404 return HTTPSHandlerV3(**kwargs)
405 elif hasattr(ssl, 'create_default_context'): # Python >= 3.4
406 context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
407 context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3
408 if opts_no_check_certificate:
409 context.verify_mode = ssl.CERT_NONE
410 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
411 else: # Python < 3.4
412 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
413 context.verify_mode = (ssl.CERT_NONE
414 if opts_no_check_certificate
415 else ssl.CERT_REQUIRED)
416 context.set_default_verify_paths()
417 try:
418 context.load_default_certs()
419 except AttributeError:
420 pass # Python < 3.4
421 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
422
423
424 class ExtractorError(Exception):
425 """Error during info extraction."""
426 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
427 """ tb, if given, is the original traceback (so that it can be printed out).
428 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
429 """
430
431 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
432 expected = True
433 if video_id is not None:
434 msg = video_id + ': ' + msg
435 if cause:
436 msg += ' (caused by %r)' % cause
437 if not expected:
438 if ytdl_is_updateable():
439 update_cmd = 'type youtube-dl -U to update'
440 else:
441 update_cmd = 'see https://yt-dl.org/update on how to update'
442 msg += '; please report this issue on https://yt-dl.org/bug .'
443 msg += ' Make sure you are using the latest version; %s.' % update_cmd
444 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
445 super(ExtractorError, self).__init__(msg)
446
447 self.traceback = tb
448 self.exc_info = sys.exc_info() # preserve original exception
449 self.cause = cause
450 self.video_id = video_id
451
452 def format_traceback(self):
453 if self.traceback is None:
454 return None
455 return ''.join(traceback.format_tb(self.traceback))
456
457
458 class RegexNotFoundError(ExtractorError):
459 """Error when a regex didn't match"""
460 pass
461
462
463 class DownloadError(Exception):
464 """Download Error exception.
465
466 This exception may be thrown by FileDownloader objects if they are not
467 configured to continue on errors. They will contain the appropriate
468 error message.
469 """
470 def __init__(self, msg, exc_info=None):
471 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
472 super(DownloadError, self).__init__(msg)
473 self.exc_info = exc_info
474
475
476 class SameFileError(Exception):
477 """Same File exception.
478
479 This exception will be thrown by FileDownloader objects if they detect
480 multiple files would have to be downloaded to the same file on disk.
481 """
482 pass
483
484
485 class PostProcessingError(Exception):
486 """Post Processing exception.
487
488 This exception may be raised by PostProcessor's .run() method to
489 indicate an error in the postprocessing task.
490 """
491 def __init__(self, msg):
492 self.msg = msg
493
494 class MaxDownloadsReached(Exception):
495 """ --max-downloads limit has been reached. """
496 pass
497
498
499 class UnavailableVideoError(Exception):
500 """Unavailable Format exception.
501
502 This exception will be thrown when a video is requested
503 in a format that is not available for that video.
504 """
505 pass
506
507
508 class ContentTooShortError(Exception):
509 """Content Too Short exception.
510
511 This exception may be raised by FileDownloader objects when a file they
512 download is too small for what the server announced first, indicating
513 the connection was probably interrupted.
514 """
515 # Both in bytes
516 downloaded = None
517 expected = None
518
519 def __init__(self, downloaded, expected):
520 self.downloaded = downloaded
521 self.expected = expected
522
523 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
524 """Handler for HTTP requests and responses.
525
526 This class, when installed with an OpenerDirector, automatically adds
527 the standard headers to every HTTP request and handles gzipped and
528 deflated responses from web servers. If compression is to be avoided in
529 a particular request, the original request in the program code only has
530 to include the HTTP header "Youtubedl-No-Compression", which will be
531 removed before making the real request.
532
533 Part of this code was copied from:
534
535 http://techknack.net/python-urllib2-handlers/
536
537 Andrew Rowls, the author of that code, agreed to release it to the
538 public domain.
539 """
540
541 @staticmethod
542 def deflate(data):
543 try:
544 return zlib.decompress(data, -zlib.MAX_WBITS)
545 except zlib.error:
546 return zlib.decompress(data)
547
548 @staticmethod
549 def addinfourl_wrapper(stream, headers, url, code):
550 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
551 return compat_urllib_request.addinfourl(stream, headers, url, code)
552 ret = compat_urllib_request.addinfourl(stream, headers, url)
553 ret.code = code
554 return ret
555
556 def http_request(self, req):
557 for h, v in std_headers.items():
558 if h not in req.headers:
559 req.add_header(h, v)
560 if 'Youtubedl-no-compression' in req.headers:
561 if 'Accept-encoding' in req.headers:
562 del req.headers['Accept-encoding']
563 del req.headers['Youtubedl-no-compression']
564 if 'Youtubedl-user-agent' in req.headers:
565 if 'User-agent' in req.headers:
566 del req.headers['User-agent']
567 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
568 del req.headers['Youtubedl-user-agent']
569
570 if sys.version_info < (2, 7) and '#' in req.get_full_url():
571 # Python 2.6 is brain-dead when it comes to fragments
572 req._Request__original = req._Request__original.partition('#')[0]
573 req._Request__r_type = req._Request__r_type.partition('#')[0]
574
575 return req
576
577 def http_response(self, req, resp):
578 old_resp = resp
579 # gzip
580 if resp.headers.get('Content-encoding', '') == 'gzip':
581 content = resp.read()
582 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
583 try:
584 uncompressed = io.BytesIO(gz.read())
585 except IOError as original_ioerror:
586 # There may be junk add the end of the file
587 # See http://stackoverflow.com/q/4928560/35070 for details
588 for i in range(1, 1024):
589 try:
590 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
591 uncompressed = io.BytesIO(gz.read())
592 except IOError:
593 continue
594 break
595 else:
596 raise original_ioerror
597 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
598 resp.msg = old_resp.msg
599 # deflate
600 if resp.headers.get('Content-encoding', '') == 'deflate':
601 gz = io.BytesIO(self.deflate(resp.read()))
602 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
603 resp.msg = old_resp.msg
604 return resp
605
606 https_request = http_request
607 https_response = http_response
608
609
610 def parse_iso8601(date_str, delimiter='T'):
611 """ Return a UNIX timestamp from the given date """
612
613 if date_str is None:
614 return None
615
616 m = re.search(
617 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
618 date_str)
619 if not m:
620 timezone = datetime.timedelta()
621 else:
622 date_str = date_str[:-len(m.group(0))]
623 if not m.group('sign'):
624 timezone = datetime.timedelta()
625 else:
626 sign = 1 if m.group('sign') == '+' else -1
627 timezone = datetime.timedelta(
628 hours=sign * int(m.group('hours')),
629 minutes=sign * int(m.group('minutes')))
630 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
631 dt = datetime.datetime.strptime(date_str, date_format) - timezone
632 return calendar.timegm(dt.timetuple())
633
634
635 def unified_strdate(date_str):
636 """Return a string with the date in the format YYYYMMDD"""
637
638 if date_str is None:
639 return None
640
641 upload_date = None
642 #Replace commas
643 date_str = date_str.replace(',', ' ')
644 # %z (UTC offset) is only supported in python>=3.2
645 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
646 format_expressions = [
647 '%d %B %Y',
648 '%d %b %Y',
649 '%B %d %Y',
650 '%b %d %Y',
651 '%b %dst %Y %I:%M%p',
652 '%b %dnd %Y %I:%M%p',
653 '%b %dth %Y %I:%M%p',
654 '%Y-%m-%d',
655 '%Y/%m/%d',
656 '%d.%m.%Y',
657 '%d/%m/%Y',
658 '%d/%m/%y',
659 '%Y/%m/%d %H:%M:%S',
660 '%d/%m/%Y %H:%M:%S',
661 '%Y-%m-%d %H:%M:%S',
662 '%Y-%m-%d %H:%M:%S.%f',
663 '%d.%m.%Y %H:%M',
664 '%d.%m.%Y %H.%M',
665 '%Y-%m-%dT%H:%M:%SZ',
666 '%Y-%m-%dT%H:%M:%S.%fZ',
667 '%Y-%m-%dT%H:%M:%S.%f0Z',
668 '%Y-%m-%dT%H:%M:%S',
669 '%Y-%m-%dT%H:%M:%S.%f',
670 '%Y-%m-%dT%H:%M',
671 ]
672 for expression in format_expressions:
673 try:
674 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
675 except ValueError:
676 pass
677 if upload_date is None:
678 timetuple = email.utils.parsedate_tz(date_str)
679 if timetuple:
680 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
681 return upload_date
682
683 def determine_ext(url, default_ext='unknown_video'):
684 if url is None:
685 return default_ext
686 guess = url.partition('?')[0].rpartition('.')[2]
687 if re.match(r'^[A-Za-z0-9]+$', guess):
688 return guess
689 else:
690 return default_ext
691
692 def subtitles_filename(filename, sub_lang, sub_format):
693 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
694
695 def date_from_str(date_str):
696 """
697 Return a datetime object from a string in the format YYYYMMDD or
698 (now|today)[+-][0-9](day|week|month|year)(s)?"""
699 today = datetime.date.today()
700 if date_str == 'now'or date_str == 'today':
701 return today
702 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
703 if match is not None:
704 sign = match.group('sign')
705 time = int(match.group('time'))
706 if sign == '-':
707 time = -time
708 unit = match.group('unit')
709 #A bad aproximation?
710 if unit == 'month':
711 unit = 'day'
712 time *= 30
713 elif unit == 'year':
714 unit = 'day'
715 time *= 365
716 unit += 's'
717 delta = datetime.timedelta(**{unit: time})
718 return today + delta
719 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
720
721 def hyphenate_date(date_str):
722 """
723 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
724 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
725 if match is not None:
726 return '-'.join(match.groups())
727 else:
728 return date_str
729
730 class DateRange(object):
731 """Represents a time interval between two dates"""
732 def __init__(self, start=None, end=None):
733 """start and end must be strings in the format accepted by date"""
734 if start is not None:
735 self.start = date_from_str(start)
736 else:
737 self.start = datetime.datetime.min.date()
738 if end is not None:
739 self.end = date_from_str(end)
740 else:
741 self.end = datetime.datetime.max.date()
742 if self.start > self.end:
743 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
744 @classmethod
745 def day(cls, day):
746 """Returns a range that only contains the given day"""
747 return cls(day,day)
748 def __contains__(self, date):
749 """Check if the date is in the range"""
750 if not isinstance(date, datetime.date):
751 date = date_from_str(date)
752 return self.start <= date <= self.end
753 def __str__(self):
754 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
755
756
757 def platform_name():
758 """ Returns the platform name as a compat_str """
759 res = platform.platform()
760 if isinstance(res, bytes):
761 res = res.decode(preferredencoding())
762
763 assert isinstance(res, compat_str)
764 return res
765
766
767 def _windows_write_string(s, out):
768 """ Returns True if the string was written using special methods,
769 False if it has yet to be written out."""
770 # Adapted from http://stackoverflow.com/a/3259271/35070
771
772 import ctypes
773 import ctypes.wintypes
774
775 WIN_OUTPUT_IDS = {
776 1: -11,
777 2: -12,
778 }
779
780 try:
781 fileno = out.fileno()
782 except AttributeError:
783 # If the output stream doesn't have a fileno, it's virtual
784 return False
785 if fileno not in WIN_OUTPUT_IDS:
786 return False
787
788 GetStdHandle = ctypes.WINFUNCTYPE(
789 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
790 ("GetStdHandle", ctypes.windll.kernel32))
791 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
792
793 WriteConsoleW = ctypes.WINFUNCTYPE(
794 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
795 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
796 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
797 written = ctypes.wintypes.DWORD(0)
798
799 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
800 FILE_TYPE_CHAR = 0x0002
801 FILE_TYPE_REMOTE = 0x8000
802 GetConsoleMode = ctypes.WINFUNCTYPE(
803 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
804 ctypes.POINTER(ctypes.wintypes.DWORD))(
805 ("GetConsoleMode", ctypes.windll.kernel32))
806 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
807
808 def not_a_console(handle):
809 if handle == INVALID_HANDLE_VALUE or handle is None:
810 return True
811 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
812 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
813
814 if not_a_console(h):
815 return False
816
817 def next_nonbmp_pos(s):
818 try:
819 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
820 except StopIteration:
821 return len(s)
822
823 while s:
824 count = min(next_nonbmp_pos(s), 1024)
825
826 ret = WriteConsoleW(
827 h, s, count if count else 2, ctypes.byref(written), None)
828 if ret == 0:
829 raise OSError('Failed to write string')
830 if not count: # We just wrote a non-BMP character
831 assert written.value == 2
832 s = s[1:]
833 else:
834 assert written.value > 0
835 s = s[written.value:]
836 return True
837
838
839 def write_string(s, out=None, encoding=None):
840 if out is None:
841 out = sys.stderr
842 assert type(s) == compat_str
843
844 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
845 if _windows_write_string(s, out):
846 return
847
848 if ('b' in getattr(out, 'mode', '') or
849 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
850 byt = s.encode(encoding or preferredencoding(), 'ignore')
851 out.write(byt)
852 elif hasattr(out, 'buffer'):
853 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
854 byt = s.encode(enc, 'ignore')
855 out.buffer.write(byt)
856 else:
857 out.write(s)
858 out.flush()
859
860
861 def bytes_to_intlist(bs):
862 if not bs:
863 return []
864 if isinstance(bs[0], int): # Python 3
865 return list(bs)
866 else:
867 return [ord(c) for c in bs]
868
869
870 def intlist_to_bytes(xs):
871 if not xs:
872 return b''
873 return struct_pack('%dB' % len(xs), *xs)
874
875
876 # Cross-platform file locking
877 if sys.platform == 'win32':
878 import ctypes.wintypes
879 import msvcrt
880
881 class OVERLAPPED(ctypes.Structure):
882 _fields_ = [
883 ('Internal', ctypes.wintypes.LPVOID),
884 ('InternalHigh', ctypes.wintypes.LPVOID),
885 ('Offset', ctypes.wintypes.DWORD),
886 ('OffsetHigh', ctypes.wintypes.DWORD),
887 ('hEvent', ctypes.wintypes.HANDLE),
888 ]
889
890 kernel32 = ctypes.windll.kernel32
891 LockFileEx = kernel32.LockFileEx
892 LockFileEx.argtypes = [
893 ctypes.wintypes.HANDLE, # hFile
894 ctypes.wintypes.DWORD, # dwFlags
895 ctypes.wintypes.DWORD, # dwReserved
896 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
897 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
898 ctypes.POINTER(OVERLAPPED) # Overlapped
899 ]
900 LockFileEx.restype = ctypes.wintypes.BOOL
901 UnlockFileEx = kernel32.UnlockFileEx
902 UnlockFileEx.argtypes = [
903 ctypes.wintypes.HANDLE, # hFile
904 ctypes.wintypes.DWORD, # dwReserved
905 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
906 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
907 ctypes.POINTER(OVERLAPPED) # Overlapped
908 ]
909 UnlockFileEx.restype = ctypes.wintypes.BOOL
910 whole_low = 0xffffffff
911 whole_high = 0x7fffffff
912
913 def _lock_file(f, exclusive):
914 overlapped = OVERLAPPED()
915 overlapped.Offset = 0
916 overlapped.OffsetHigh = 0
917 overlapped.hEvent = 0
918 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
919 handle = msvcrt.get_osfhandle(f.fileno())
920 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
921 whole_low, whole_high, f._lock_file_overlapped_p):
922 raise OSError('Locking file failed: %r' % ctypes.FormatError())
923
924 def _unlock_file(f):
925 assert f._lock_file_overlapped_p
926 handle = msvcrt.get_osfhandle(f.fileno())
927 if not UnlockFileEx(handle, 0,
928 whole_low, whole_high, f._lock_file_overlapped_p):
929 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
930
931 else:
932 import fcntl
933
934 def _lock_file(f, exclusive):
935 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
936
937 def _unlock_file(f):
938 fcntl.flock(f, fcntl.LOCK_UN)
939
940
941 class locked_file(object):
942 def __init__(self, filename, mode, encoding=None):
943 assert mode in ['r', 'a', 'w']
944 self.f = io.open(filename, mode, encoding=encoding)
945 self.mode = mode
946
947 def __enter__(self):
948 exclusive = self.mode != 'r'
949 try:
950 _lock_file(self.f, exclusive)
951 except IOError:
952 self.f.close()
953 raise
954 return self
955
956 def __exit__(self, etype, value, traceback):
957 try:
958 _unlock_file(self.f)
959 finally:
960 self.f.close()
961
962 def __iter__(self):
963 return iter(self.f)
964
965 def write(self, *args):
966 return self.f.write(*args)
967
968 def read(self, *args):
969 return self.f.read(*args)
970
971
972 def get_filesystem_encoding():
973 encoding = sys.getfilesystemencoding()
974 return encoding if encoding is not None else 'utf-8'
975
976
977 def shell_quote(args):
978 quoted_args = []
979 encoding = get_filesystem_encoding()
980 for a in args:
981 if isinstance(a, bytes):
982 # We may get a filename encoded with 'encodeFilename'
983 a = a.decode(encoding)
984 quoted_args.append(pipes.quote(a))
985 return ' '.join(quoted_args)
986
987
988 def takewhile_inclusive(pred, seq):
989 """ Like itertools.takewhile, but include the latest evaluated element
990 (the first element so that Not pred(e)) """
991 for e in seq:
992 yield e
993 if not pred(e):
994 return
995
996
997 def smuggle_url(url, data):
998 """ Pass additional data in a URL for internal use. """
999
1000 sdata = compat_urllib_parse.urlencode(
1001 {'__youtubedl_smuggle': json.dumps(data)})
1002 return url + '#' + sdata
1003
1004
1005 def unsmuggle_url(smug_url, default=None):
1006 if not '#__youtubedl_smuggle' in smug_url:
1007 return smug_url, default
1008 url, _, sdata = smug_url.rpartition('#')
1009 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1010 data = json.loads(jsond)
1011 return url, data
1012
1013
1014 def format_bytes(bytes):
1015 if bytes is None:
1016 return 'N/A'
1017 if type(bytes) is str:
1018 bytes = float(bytes)
1019 if bytes == 0.0:
1020 exponent = 0
1021 else:
1022 exponent = int(math.log(bytes, 1024.0))
1023 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1024 converted = float(bytes) / float(1024 ** exponent)
1025 return '%.2f%s' % (converted, suffix)
1026
1027
1028 def get_term_width():
1029 columns = compat_getenv('COLUMNS', None)
1030 if columns:
1031 return int(columns)
1032
1033 try:
1034 sp = subprocess.Popen(
1035 ['stty', 'size'],
1036 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1037 out, err = sp.communicate()
1038 return int(out.split()[1])
1039 except:
1040 pass
1041 return None
1042
1043
1044 def month_by_name(name):
1045 """ Return the number of a month by (locale-independently) English name """
1046
1047 ENGLISH_NAMES = [
1048 'January', 'February', 'March', 'April', 'May', 'June',
1049 'July', 'August', 'September', 'October', 'November', 'December']
1050 try:
1051 return ENGLISH_NAMES.index(name) + 1
1052 except ValueError:
1053 return None
1054
1055
1056 def fix_xml_ampersands(xml_str):
1057 """Replace all the '&' by '&amp;' in XML"""
1058 return re.sub(
1059 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1060 '&amp;',
1061 xml_str)
1062
1063
1064 def setproctitle(title):
1065 assert isinstance(title, compat_str)
1066 try:
1067 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1068 except OSError:
1069 return
1070 title_bytes = title.encode('utf-8')
1071 buf = ctypes.create_string_buffer(len(title_bytes))
1072 buf.value = title_bytes
1073 try:
1074 libc.prctl(15, buf, 0, 0, 0)
1075 except AttributeError:
1076 return # Strange libc, just skip this
1077
1078
1079 def remove_start(s, start):
1080 if s.startswith(start):
1081 return s[len(start):]
1082 return s
1083
1084
1085 def remove_end(s, end):
1086 if s.endswith(end):
1087 return s[:-len(end)]
1088 return s
1089
1090
1091 def url_basename(url):
1092 path = compat_urlparse.urlparse(url).path
1093 return path.strip('/').split('/')[-1]
1094
1095
1096 class HEADRequest(compat_urllib_request.Request):
1097 def get_method(self):
1098 return "HEAD"
1099
1100
1101 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1102 if get_attr:
1103 if v is not None:
1104 v = getattr(v, get_attr, None)
1105 if v == '':
1106 v = None
1107 return default if v is None else (int(v) * invscale // scale)
1108
1109
1110 def str_or_none(v, default=None):
1111 return default if v is None else compat_str(v)
1112
1113
1114 def str_to_int(int_str):
1115 """ A more relaxed version of int_or_none """
1116 if int_str is None:
1117 return None
1118 int_str = re.sub(r'[,\.\+]', '', int_str)
1119 return int(int_str)
1120
1121
1122 def float_or_none(v, scale=1, invscale=1, default=None):
1123 return default if v is None else (float(v) * invscale / scale)
1124
1125
1126 def parse_duration(s):
1127 if s is None:
1128 return None
1129
1130 s = s.strip()
1131
1132 m = re.match(
1133 r'''(?ix)T?
1134 (?:
1135 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
1136 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1137 )?
1138 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
1139 if not m:
1140 return None
1141 res = int(m.group('secs'))
1142 if m.group('mins'):
1143 res += int(m.group('mins')) * 60
1144 if m.group('hours'):
1145 res += int(m.group('hours')) * 60 * 60
1146 if m.group('ms'):
1147 res += float(m.group('ms'))
1148 return res
1149
1150
1151 def prepend_extension(filename, ext):
1152 name, real_ext = os.path.splitext(filename)
1153 return '{0}.{1}{2}'.format(name, ext, real_ext)
1154
1155
1156 def check_executable(exe, args=[]):
1157 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1158 args can be a list of arguments for a short output (like -version) """
1159 try:
1160 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1161 except OSError:
1162 return False
1163 return exe
1164
1165
1166 def get_exe_version(exe, args=['--version'],
1167 version_re=r'version\s+([0-9._-a-zA-Z]+)',
1168 unrecognized='present'):
1169 """ Returns the version of the specified executable,
1170 or False if the executable is not present """
1171 try:
1172 out, err = subprocess.Popen(
1173 [exe] + args,
1174 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1175 except OSError:
1176 return False
1177 firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1178 m = re.search(version_re, firstline)
1179 if m:
1180 return m.group(1)
1181 else:
1182 return unrecognized
1183
1184
1185 class PagedList(object):
1186 def __len__(self):
1187 # This is only useful for tests
1188 return len(self.getslice())
1189
1190
1191 class OnDemandPagedList(PagedList):
1192 def __init__(self, pagefunc, pagesize):
1193 self._pagefunc = pagefunc
1194 self._pagesize = pagesize
1195
1196 def getslice(self, start=0, end=None):
1197 res = []
1198 for pagenum in itertools.count(start // self._pagesize):
1199 firstid = pagenum * self._pagesize
1200 nextfirstid = pagenum * self._pagesize + self._pagesize
1201 if start >= nextfirstid:
1202 continue
1203
1204 page_results = list(self._pagefunc(pagenum))
1205
1206 startv = (
1207 start % self._pagesize
1208 if firstid <= start < nextfirstid
1209 else 0)
1210
1211 endv = (
1212 ((end - 1) % self._pagesize) + 1
1213 if (end is not None and firstid <= end <= nextfirstid)
1214 else None)
1215
1216 if startv != 0 or endv is not None:
1217 page_results = page_results[startv:endv]
1218 res.extend(page_results)
1219
1220 # A little optimization - if current page is not "full", ie. does
1221 # not contain page_size videos then we can assume that this page
1222 # is the last one - there are no more ids on further pages -
1223 # i.e. no need to query again.
1224 if len(page_results) + startv < self._pagesize:
1225 break
1226
1227 # If we got the whole page, but the next page is not interesting,
1228 # break out early as well
1229 if end == nextfirstid:
1230 break
1231 return res
1232
1233
1234 class InAdvancePagedList(PagedList):
1235 def __init__(self, pagefunc, pagecount, pagesize):
1236 self._pagefunc = pagefunc
1237 self._pagecount = pagecount
1238 self._pagesize = pagesize
1239
1240 def getslice(self, start=0, end=None):
1241 res = []
1242 start_page = start // self._pagesize
1243 end_page = (
1244 self._pagecount if end is None else (end // self._pagesize + 1))
1245 skip_elems = start - start_page * self._pagesize
1246 only_more = None if end is None else end - start
1247 for pagenum in range(start_page, end_page):
1248 page = list(self._pagefunc(pagenum))
1249 if skip_elems:
1250 page = page[skip_elems:]
1251 skip_elems = None
1252 if only_more is not None:
1253 if len(page) < only_more:
1254 only_more -= len(page)
1255 else:
1256 page = page[:only_more]
1257 res.extend(page)
1258 break
1259 res.extend(page)
1260 return res
1261
1262
1263 def uppercase_escape(s):
1264 unicode_escape = codecs.getdecoder('unicode_escape')
1265 return re.sub(
1266 r'\\U[0-9a-fA-F]{8}',
1267 lambda m: unicode_escape(m.group(0))[0],
1268 s)
1269
1270
1271 def escape_rfc3986(s):
1272 """Escape non-ASCII characters as suggested by RFC 3986"""
1273 if sys.version_info < (3, 0) and isinstance(s, unicode):
1274 s = s.encode('utf-8')
1275 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1276
1277
1278 def escape_url(url):
1279 """Escape URL as suggested by RFC 3986"""
1280 url_parsed = compat_urllib_parse_urlparse(url)
1281 return url_parsed._replace(
1282 path=escape_rfc3986(url_parsed.path),
1283 params=escape_rfc3986(url_parsed.params),
1284 query=escape_rfc3986(url_parsed.query),
1285 fragment=escape_rfc3986(url_parsed.fragment)
1286 ).geturl()
1287
1288 try:
1289 struct.pack('!I', 0)
1290 except TypeError:
1291 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1292 def struct_pack(spec, *args):
1293 if isinstance(spec, compat_str):
1294 spec = spec.encode('ascii')
1295 return struct.pack(spec, *args)
1296
1297 def struct_unpack(spec, *args):
1298 if isinstance(spec, compat_str):
1299 spec = spec.encode('ascii')
1300 return struct.unpack(spec, *args)
1301 else:
1302 struct_pack = struct.pack
1303 struct_unpack = struct.unpack
1304
1305
1306 def read_batch_urls(batch_fd):
1307 def fixup(url):
1308 if not isinstance(url, compat_str):
1309 url = url.decode('utf-8', 'replace')
1310 BOM_UTF8 = '\xef\xbb\xbf'
1311 if url.startswith(BOM_UTF8):
1312 url = url[len(BOM_UTF8):]
1313 url = url.strip()
1314 if url.startswith(('#', ';', ']')):
1315 return False
1316 return url
1317
1318 with contextlib.closing(batch_fd) as fd:
1319 return [url for url in map(fixup, fd) if url]
1320
1321
1322 def urlencode_postdata(*args, **kargs):
1323 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1324
1325
1326 try:
1327 etree_iter = xml.etree.ElementTree.Element.iter
1328 except AttributeError: # Python <=2.6
1329 etree_iter = lambda n: n.findall('.//*')
1330
1331
1332 def parse_xml(s):
1333 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1334 def doctype(self, name, pubid, system):
1335 pass # Ignore doctypes
1336
1337 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1338 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1339 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1340 # Fix up XML parser in Python 2.x
1341 if sys.version_info < (3, 0):
1342 for n in etree_iter(tree):
1343 if n.text is not None:
1344 if not isinstance(n.text, compat_str):
1345 n.text = n.text.decode('utf-8')
1346 return tree
1347
1348
1349 US_RATINGS = {
1350 'G': 0,
1351 'PG': 10,
1352 'PG-13': 13,
1353 'R': 16,
1354 'NC': 18,
1355 }
1356
1357
1358 def parse_age_limit(s):
1359 if s is None:
1360 return None
1361 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1362 return int(m.group('age')) if m else US_RATINGS.get(s, None)
1363
1364
1365 def strip_jsonp(code):
1366 return re.sub(
1367 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1368
1369
1370 def js_to_json(code):
1371 def fix_kv(m):
1372 v = m.group(0)
1373 if v in ('true', 'false', 'null'):
1374 return v
1375 if v.startswith('"'):
1376 return v
1377 if v.startswith("'"):
1378 v = v[1:-1]
1379 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1380 '\\\\': '\\\\',
1381 "\\'": "'",
1382 '"': '\\"',
1383 }[m.group(0)], v)
1384 return '"%s"' % v
1385
1386 res = re.sub(r'''(?x)
1387 "(?:[^"\\]*(?:\\\\|\\")?)*"|
1388 '(?:[^'\\]*(?:\\\\|\\')?)*'|
1389 [a-zA-Z_][a-zA-Z_0-9]*
1390 ''', fix_kv, code)
1391 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1392 return res
1393
1394
1395 def qualities(quality_ids):
1396 """ Get a numeric quality value out of a list of possible values """
1397 def q(qid):
1398 try:
1399 return quality_ids.index(qid)
1400 except ValueError:
1401 return -1
1402 return q
1403
1404
1405 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1406
1407
1408 def limit_length(s, length):
1409 """ Add ellipses to overly long strings """
1410 if s is None:
1411 return None
1412 ELLIPSES = '...'
1413 if len(s) > length:
1414 return s[:length - len(ELLIPSES)] + ELLIPSES
1415 return s
1416
1417
1418 def version_tuple(v):
1419 return [int(e) for e in v.split('.')]
1420
1421
1422 def is_outdated_version(version, limit, assume_new=True):
1423 if not version:
1424 return not assume_new
1425 try:
1426 return version_tuple(version) < version_tuple(limit)
1427 except ValueError:
1428 return not assume_new
1429
1430
1431 def ytdl_is_updateable():
1432 """ Returns if youtube-dl can be updated with -U """
1433 from zipimport import zipimporter
1434
1435 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')