]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/utils.py
Start new release.
[youtubedl] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import unicode_literals
5
6 import calendar
7 import codecs
8 import contextlib
9 import ctypes
10 import datetime
11 import email.utils
12 import errno
13 import gzip
14 import itertools
15 import io
16 import json
17 import locale
18 import math
19 import os
20 import pipes
21 import platform
22 import re
23 import ssl
24 import socket
25 import struct
26 import subprocess
27 import sys
28 import tempfile
29 import traceback
30 import xml.etree.ElementTree
31 import zlib
32
33 from .compat import (
34 compat_chr,
35 compat_getenv,
36 compat_html_entities,
37 compat_parse_qs,
38 compat_str,
39 compat_urllib_error,
40 compat_urllib_parse,
41 compat_urllib_parse_urlparse,
42 compat_urllib_request,
43 compat_urlparse,
44 shlex_quote,
45 )
46
47
48 # This is not clearly defined otherwise
49 compiled_regex_type = type(re.compile(''))
50
51 std_headers = {
52 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
53 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
54 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
55 'Accept-Encoding': 'gzip, deflate',
56 'Accept-Language': 'en-us,en;q=0.5',
57 }
58
59
60 def preferredencoding():
61 """Get preferred encoding.
62
63 Returns the best encoding scheme for the system, based on
64 locale.getpreferredencoding() and some further tweaks.
65 """
66 try:
67 pref = locale.getpreferredencoding()
68 'TEST'.encode(pref)
69 except:
70 pref = 'UTF-8'
71
72 return pref
73
74
75 def write_json_file(obj, fn):
76 """ Encode obj as JSON and write it to fn, atomically if possible """
77
78 fn = encodeFilename(fn)
79 if sys.version_info < (3, 0) and sys.platform != 'win32':
80 encoding = get_filesystem_encoding()
81 # os.path.basename returns a bytes object, but NamedTemporaryFile
82 # will fail if the filename contains non ascii characters unless we
83 # use a unicode object
84 path_basename = lambda f: os.path.basename(fn).decode(encoding)
85 # the same for os.path.dirname
86 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
87 else:
88 path_basename = os.path.basename
89 path_dirname = os.path.dirname
90
91 args = {
92 'suffix': '.tmp',
93 'prefix': path_basename(fn) + '.',
94 'dir': path_dirname(fn),
95 'delete': False,
96 }
97
98 # In Python 2.x, json.dump expects a bytestream.
99 # In Python 3.x, it writes to a character stream
100 if sys.version_info < (3, 0):
101 args['mode'] = 'wb'
102 else:
103 args.update({
104 'mode': 'w',
105 'encoding': 'utf-8',
106 })
107
108 tf = tempfile.NamedTemporaryFile(**args)
109
110 try:
111 with tf:
112 json.dump(obj, tf)
113 if sys.platform == 'win32':
114 # Need to remove existing file on Windows, else os.rename raises
115 # WindowsError or FileExistsError.
116 try:
117 os.unlink(fn)
118 except OSError:
119 pass
120 os.rename(tf.name, fn)
121 except:
122 try:
123 os.remove(tf.name)
124 except OSError:
125 pass
126 raise
127
128
129 if sys.version_info >= (2, 7):
130 def find_xpath_attr(node, xpath, key, val):
131 """ Find the xpath xpath[@key=val] """
132 assert re.match(r'^[a-zA-Z-]+$', key)
133 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
134 expr = xpath + "[@%s='%s']" % (key, val)
135 return node.find(expr)
136 else:
137 def find_xpath_attr(node, xpath, key, val):
138 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
139 # .//node does not match if a node is a direct child of . !
140 if isinstance(xpath, unicode):
141 xpath = xpath.encode('ascii')
142
143 for f in node.findall(xpath):
144 if f.attrib.get(key) == val:
145 return f
146 return None
147
148 # On python2.6 the xml.etree.ElementTree.Element methods don't support
149 # the namespace parameter
150
151
152 def xpath_with_ns(path, ns_map):
153 components = [c.split(':') for c in path.split('/')]
154 replaced = []
155 for c in components:
156 if len(c) == 1:
157 replaced.append(c[0])
158 else:
159 ns, tag = c
160 replaced.append('{%s}%s' % (ns_map[ns], tag))
161 return '/'.join(replaced)
162
163
164 def xpath_text(node, xpath, name=None, fatal=False):
165 if sys.version_info < (2, 7): # Crazy 2.6
166 xpath = xpath.encode('ascii')
167
168 n = node.find(xpath)
169 if n is None:
170 if fatal:
171 name = xpath if name is None else name
172 raise ExtractorError('Could not find XML element %s' % name)
173 else:
174 return None
175 return n.text
176
177
178 def get_element_by_id(id, html):
179 """Return the content of the tag with the specified ID in the passed HTML document"""
180 return get_element_by_attribute("id", id, html)
181
182
183 def get_element_by_attribute(attribute, value, html):
184 """Return the content of the tag with the specified attribute in the passed HTML document"""
185
186 m = re.search(r'''(?xs)
187 <([a-zA-Z0-9:._-]+)
188 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
189 \s+%s=['"]?%s['"]?
190 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
191 \s*>
192 (?P<content>.*?)
193 </\1>
194 ''' % (re.escape(attribute), re.escape(value)), html)
195
196 if not m:
197 return None
198 res = m.group('content')
199
200 if res.startswith('"') or res.startswith("'"):
201 res = res[1:-1]
202
203 return unescapeHTML(res)
204
205
206 def clean_html(html):
207 """Clean an HTML snippet into a readable string"""
208 # Newline vs <br />
209 html = html.replace('\n', ' ')
210 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
211 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
212 # Strip html tags
213 html = re.sub('<.*?>', '', html)
214 # Replace html entities
215 html = unescapeHTML(html)
216 return html.strip()
217
218
219 def sanitize_open(filename, open_mode):
220 """Try to open the given filename, and slightly tweak it if this fails.
221
222 Attempts to open the given filename. If this fails, it tries to change
223 the filename slightly, step by step, until it's either able to open it
224 or it fails and raises a final exception, like the standard open()
225 function.
226
227 It returns the tuple (stream, definitive_file_name).
228 """
229 try:
230 if filename == '-':
231 if sys.platform == 'win32':
232 import msvcrt
233 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
234 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
235 stream = open(encodeFilename(filename), open_mode)
236 return (stream, filename)
237 except (IOError, OSError) as err:
238 if err.errno in (errno.EACCES,):
239 raise
240
241 # In case of error, try to remove win32 forbidden chars
242 alt_filename = os.path.join(
243 re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
244 for path_part in os.path.split(filename)
245 )
246 if alt_filename == filename:
247 raise
248 else:
249 # An exception here should be caught in the caller
250 stream = open(encodeFilename(filename), open_mode)
251 return (stream, alt_filename)
252
253
254 def timeconvert(timestr):
255 """Convert RFC 2822 defined time string into system timestamp"""
256 timestamp = None
257 timetuple = email.utils.parsedate_tz(timestr)
258 if timetuple is not None:
259 timestamp = email.utils.mktime_tz(timetuple)
260 return timestamp
261
262
263 def sanitize_filename(s, restricted=False, is_id=False):
264 """Sanitizes a string so it could be used as part of a filename.
265 If restricted is set, use a stricter subset of allowed characters.
266 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
267 """
268 def replace_insane(char):
269 if char == '?' or ord(char) < 32 or ord(char) == 127:
270 return ''
271 elif char == '"':
272 return '' if restricted else '\''
273 elif char == ':':
274 return '_-' if restricted else ' -'
275 elif char in '\\/|*<>':
276 return '_'
277 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
278 return '_'
279 if restricted and ord(char) > 127:
280 return '_'
281 return char
282
283 result = ''.join(map(replace_insane, s))
284 if not is_id:
285 while '__' in result:
286 result = result.replace('__', '_')
287 result = result.strip('_')
288 # Common case of "Foreign band name - English song title"
289 if restricted and result.startswith('-_'):
290 result = result[2:]
291 if not result:
292 result = '_'
293 return result
294
295
296 def orderedSet(iterable):
297 """ Remove all duplicates from the input iterable """
298 res = []
299 for el in iterable:
300 if el not in res:
301 res.append(el)
302 return res
303
304
305 def _htmlentity_transform(entity):
306 """Transforms an HTML entity to a character."""
307 # Known non-numeric HTML entity
308 if entity in compat_html_entities.name2codepoint:
309 return compat_chr(compat_html_entities.name2codepoint[entity])
310
311 mobj = re.match(r'#(x?[0-9]+)', entity)
312 if mobj is not None:
313 numstr = mobj.group(1)
314 if numstr.startswith('x'):
315 base = 16
316 numstr = '0%s' % numstr
317 else:
318 base = 10
319 return compat_chr(int(numstr, base))
320
321 # Unknown entity in name, return its literal representation
322 return ('&%s;' % entity)
323
324
325 def unescapeHTML(s):
326 if s is None:
327 return None
328 assert type(s) == compat_str
329
330 return re.sub(
331 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
332
333
334 def encodeFilename(s, for_subprocess=False):
335 """
336 @param s The name of the file
337 """
338
339 assert type(s) == compat_str
340
341 # Python 3 has a Unicode API
342 if sys.version_info >= (3, 0):
343 return s
344
345 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
346 # Pass '' directly to use Unicode APIs on Windows 2000 and up
347 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
348 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
349 if not for_subprocess:
350 return s
351 else:
352 # For subprocess calls, encode with locale encoding
353 # Refer to http://stackoverflow.com/a/9951851/35070
354 encoding = preferredencoding()
355 else:
356 encoding = sys.getfilesystemencoding()
357 if encoding is None:
358 encoding = 'utf-8'
359 return s.encode(encoding, 'ignore')
360
361
362 def encodeArgument(s):
363 if not isinstance(s, compat_str):
364 # Legacy code that uses byte strings
365 # Uncomment the following line after fixing all post processors
366 #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
367 s = s.decode('ascii')
368 return encodeFilename(s, True)
369
370
371 def decodeOption(optval):
372 if optval is None:
373 return optval
374 if isinstance(optval, bytes):
375 optval = optval.decode(preferredencoding())
376
377 assert isinstance(optval, compat_str)
378 return optval
379
380
381 def formatSeconds(secs):
382 if secs > 3600:
383 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
384 elif secs > 60:
385 return '%d:%02d' % (secs // 60, secs % 60)
386 else:
387 return '%d' % secs
388
389
390 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
391 if sys.version_info < (3, 2):
392 import httplib
393
394 class HTTPSConnectionV3(httplib.HTTPSConnection):
395 def __init__(self, *args, **kwargs):
396 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
397
398 def connect(self):
399 sock = socket.create_connection((self.host, self.port), self.timeout)
400 if getattr(self, '_tunnel_host', False):
401 self.sock = sock
402 self._tunnel()
403 try:
404 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
405 except ssl.SSLError:
406 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
407
408 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
409 def https_open(self, req):
410 return self.do_open(HTTPSConnectionV3, req)
411 return HTTPSHandlerV3(**kwargs)
412 elif hasattr(ssl, 'create_default_context'): # Python >= 3.4
413 context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
414 context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3
415 if opts_no_check_certificate:
416 context.verify_mode = ssl.CERT_NONE
417 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
418 else: # Python < 3.4
419 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
420 context.verify_mode = (ssl.CERT_NONE
421 if opts_no_check_certificate
422 else ssl.CERT_REQUIRED)
423 context.set_default_verify_paths()
424 try:
425 context.load_default_certs()
426 except AttributeError:
427 pass # Python < 3.4
428 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
429
430
431 class ExtractorError(Exception):
432 """Error during info extraction."""
433
434 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
435 """ tb, if given, is the original traceback (so that it can be printed out).
436 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
437 """
438
439 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
440 expected = True
441 if video_id is not None:
442 msg = video_id + ': ' + msg
443 if cause:
444 msg += ' (caused by %r)' % cause
445 if not expected:
446 if ytdl_is_updateable():
447 update_cmd = 'type youtube-dl -U to update'
448 else:
449 update_cmd = 'see https://yt-dl.org/update on how to update'
450 msg += '; please report this issue on https://yt-dl.org/bug .'
451 msg += ' Make sure you are using the latest version; %s.' % update_cmd
452 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
453 super(ExtractorError, self).__init__(msg)
454
455 self.traceback = tb
456 self.exc_info = sys.exc_info() # preserve original exception
457 self.cause = cause
458 self.video_id = video_id
459
460 def format_traceback(self):
461 if self.traceback is None:
462 return None
463 return ''.join(traceback.format_tb(self.traceback))
464
465
466 class RegexNotFoundError(ExtractorError):
467 """Error when a regex didn't match"""
468 pass
469
470
471 class DownloadError(Exception):
472 """Download Error exception.
473
474 This exception may be thrown by FileDownloader objects if they are not
475 configured to continue on errors. They will contain the appropriate
476 error message.
477 """
478
479 def __init__(self, msg, exc_info=None):
480 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
481 super(DownloadError, self).__init__(msg)
482 self.exc_info = exc_info
483
484
485 class SameFileError(Exception):
486 """Same File exception.
487
488 This exception will be thrown by FileDownloader objects if they detect
489 multiple files would have to be downloaded to the same file on disk.
490 """
491 pass
492
493
494 class PostProcessingError(Exception):
495 """Post Processing exception.
496
497 This exception may be raised by PostProcessor's .run() method to
498 indicate an error in the postprocessing task.
499 """
500
501 def __init__(self, msg):
502 self.msg = msg
503
504
505 class MaxDownloadsReached(Exception):
506 """ --max-downloads limit has been reached. """
507 pass
508
509
510 class UnavailableVideoError(Exception):
511 """Unavailable Format exception.
512
513 This exception will be thrown when a video is requested
514 in a format that is not available for that video.
515 """
516 pass
517
518
519 class ContentTooShortError(Exception):
520 """Content Too Short exception.
521
522 This exception may be raised by FileDownloader objects when a file they
523 download is too small for what the server announced first, indicating
524 the connection was probably interrupted.
525 """
526 # Both in bytes
527 downloaded = None
528 expected = None
529
530 def __init__(self, downloaded, expected):
531 self.downloaded = downloaded
532 self.expected = expected
533
534
535 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
536 """Handler for HTTP requests and responses.
537
538 This class, when installed with an OpenerDirector, automatically adds
539 the standard headers to every HTTP request and handles gzipped and
540 deflated responses from web servers. If compression is to be avoided in
541 a particular request, the original request in the program code only has
542 to include the HTTP header "Youtubedl-No-Compression", which will be
543 removed before making the real request.
544
545 Part of this code was copied from:
546
547 http://techknack.net/python-urllib2-handlers/
548
549 Andrew Rowls, the author of that code, agreed to release it to the
550 public domain.
551 """
552
553 @staticmethod
554 def deflate(data):
555 try:
556 return zlib.decompress(data, -zlib.MAX_WBITS)
557 except zlib.error:
558 return zlib.decompress(data)
559
560 @staticmethod
561 def addinfourl_wrapper(stream, headers, url, code):
562 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
563 return compat_urllib_request.addinfourl(stream, headers, url, code)
564 ret = compat_urllib_request.addinfourl(stream, headers, url)
565 ret.code = code
566 return ret
567
568 def http_request(self, req):
569 for h, v in std_headers.items():
570 if h not in req.headers:
571 req.add_header(h, v)
572 if 'Youtubedl-no-compression' in req.headers:
573 if 'Accept-encoding' in req.headers:
574 del req.headers['Accept-encoding']
575 del req.headers['Youtubedl-no-compression']
576 if 'Youtubedl-user-agent' in req.headers:
577 if 'User-agent' in req.headers:
578 del req.headers['User-agent']
579 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
580 del req.headers['Youtubedl-user-agent']
581
582 if sys.version_info < (2, 7) and '#' in req.get_full_url():
583 # Python 2.6 is brain-dead when it comes to fragments
584 req._Request__original = req._Request__original.partition('#')[0]
585 req._Request__r_type = req._Request__r_type.partition('#')[0]
586
587 return req
588
589 def http_response(self, req, resp):
590 old_resp = resp
591 # gzip
592 if resp.headers.get('Content-encoding', '') == 'gzip':
593 content = resp.read()
594 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
595 try:
596 uncompressed = io.BytesIO(gz.read())
597 except IOError as original_ioerror:
598 # There may be junk add the end of the file
599 # See http://stackoverflow.com/q/4928560/35070 for details
600 for i in range(1, 1024):
601 try:
602 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
603 uncompressed = io.BytesIO(gz.read())
604 except IOError:
605 continue
606 break
607 else:
608 raise original_ioerror
609 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
610 resp.msg = old_resp.msg
611 # deflate
612 if resp.headers.get('Content-encoding', '') == 'deflate':
613 gz = io.BytesIO(self.deflate(resp.read()))
614 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
615 resp.msg = old_resp.msg
616 return resp
617
618 https_request = http_request
619 https_response = http_response
620
621
622 def parse_iso8601(date_str, delimiter='T'):
623 """ Return a UNIX timestamp from the given date """
624
625 if date_str is None:
626 return None
627
628 m = re.search(
629 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
630 date_str)
631 if not m:
632 timezone = datetime.timedelta()
633 else:
634 date_str = date_str[:-len(m.group(0))]
635 if not m.group('sign'):
636 timezone = datetime.timedelta()
637 else:
638 sign = 1 if m.group('sign') == '+' else -1
639 timezone = datetime.timedelta(
640 hours=sign * int(m.group('hours')),
641 minutes=sign * int(m.group('minutes')))
642 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
643 dt = datetime.datetime.strptime(date_str, date_format) - timezone
644 return calendar.timegm(dt.timetuple())
645
646
647 def unified_strdate(date_str):
648 """Return a string with the date in the format YYYYMMDD"""
649
650 if date_str is None:
651 return None
652
653 upload_date = None
654 # Replace commas
655 date_str = date_str.replace(',', ' ')
656 # %z (UTC offset) is only supported in python>=3.2
657 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
658 format_expressions = [
659 '%d %B %Y',
660 '%d %b %Y',
661 '%B %d %Y',
662 '%b %d %Y',
663 '%b %dst %Y %I:%M%p',
664 '%b %dnd %Y %I:%M%p',
665 '%b %dth %Y %I:%M%p',
666 '%Y-%m-%d',
667 '%Y/%m/%d',
668 '%d.%m.%Y',
669 '%d/%m/%Y',
670 '%d/%m/%y',
671 '%Y/%m/%d %H:%M:%S',
672 '%d/%m/%Y %H:%M:%S',
673 '%Y-%m-%d %H:%M:%S',
674 '%Y-%m-%d %H:%M:%S.%f',
675 '%d.%m.%Y %H:%M',
676 '%d.%m.%Y %H.%M',
677 '%Y-%m-%dT%H:%M:%SZ',
678 '%Y-%m-%dT%H:%M:%S.%fZ',
679 '%Y-%m-%dT%H:%M:%S.%f0Z',
680 '%Y-%m-%dT%H:%M:%S',
681 '%Y-%m-%dT%H:%M:%S.%f',
682 '%Y-%m-%dT%H:%M',
683 ]
684 for expression in format_expressions:
685 try:
686 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
687 except ValueError:
688 pass
689 if upload_date is None:
690 timetuple = email.utils.parsedate_tz(date_str)
691 if timetuple:
692 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
693 return upload_date
694
695
696 def determine_ext(url, default_ext='unknown_video'):
697 if url is None:
698 return default_ext
699 guess = url.partition('?')[0].rpartition('.')[2]
700 if re.match(r'^[A-Za-z0-9]+$', guess):
701 return guess
702 else:
703 return default_ext
704
705
706 def subtitles_filename(filename, sub_lang, sub_format):
707 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
708
709
710 def date_from_str(date_str):
711 """
712 Return a datetime object from a string in the format YYYYMMDD or
713 (now|today)[+-][0-9](day|week|month|year)(s)?"""
714 today = datetime.date.today()
715 if date_str == 'now'or date_str == 'today':
716 return today
717 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
718 if match is not None:
719 sign = match.group('sign')
720 time = int(match.group('time'))
721 if sign == '-':
722 time = -time
723 unit = match.group('unit')
724 # A bad aproximation?
725 if unit == 'month':
726 unit = 'day'
727 time *= 30
728 elif unit == 'year':
729 unit = 'day'
730 time *= 365
731 unit += 's'
732 delta = datetime.timedelta(**{unit: time})
733 return today + delta
734 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
735
736
737 def hyphenate_date(date_str):
738 """
739 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
740 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
741 if match is not None:
742 return '-'.join(match.groups())
743 else:
744 return date_str
745
746
747 class DateRange(object):
748 """Represents a time interval between two dates"""
749
750 def __init__(self, start=None, end=None):
751 """start and end must be strings in the format accepted by date"""
752 if start is not None:
753 self.start = date_from_str(start)
754 else:
755 self.start = datetime.datetime.min.date()
756 if end is not None:
757 self.end = date_from_str(end)
758 else:
759 self.end = datetime.datetime.max.date()
760 if self.start > self.end:
761 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
762
763 @classmethod
764 def day(cls, day):
765 """Returns a range that only contains the given day"""
766 return cls(day, day)
767
768 def __contains__(self, date):
769 """Check if the date is in the range"""
770 if not isinstance(date, datetime.date):
771 date = date_from_str(date)
772 return self.start <= date <= self.end
773
774 def __str__(self):
775 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
776
777
778 def platform_name():
779 """ Returns the platform name as a compat_str """
780 res = platform.platform()
781 if isinstance(res, bytes):
782 res = res.decode(preferredencoding())
783
784 assert isinstance(res, compat_str)
785 return res
786
787
788 def _windows_write_string(s, out):
789 """ Returns True if the string was written using special methods,
790 False if it has yet to be written out."""
791 # Adapted from http://stackoverflow.com/a/3259271/35070
792
793 import ctypes
794 import ctypes.wintypes
795
796 WIN_OUTPUT_IDS = {
797 1: -11,
798 2: -12,
799 }
800
801 try:
802 fileno = out.fileno()
803 except AttributeError:
804 # If the output stream doesn't have a fileno, it's virtual
805 return False
806 if fileno not in WIN_OUTPUT_IDS:
807 return False
808
809 GetStdHandle = ctypes.WINFUNCTYPE(
810 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
811 ("GetStdHandle", ctypes.windll.kernel32))
812 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
813
814 WriteConsoleW = ctypes.WINFUNCTYPE(
815 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
816 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
817 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
818 written = ctypes.wintypes.DWORD(0)
819
820 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
821 FILE_TYPE_CHAR = 0x0002
822 FILE_TYPE_REMOTE = 0x8000
823 GetConsoleMode = ctypes.WINFUNCTYPE(
824 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
825 ctypes.POINTER(ctypes.wintypes.DWORD))(
826 ("GetConsoleMode", ctypes.windll.kernel32))
827 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
828
829 def not_a_console(handle):
830 if handle == INVALID_HANDLE_VALUE or handle is None:
831 return True
832 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
833 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
834
835 if not_a_console(h):
836 return False
837
838 def next_nonbmp_pos(s):
839 try:
840 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
841 except StopIteration:
842 return len(s)
843
844 while s:
845 count = min(next_nonbmp_pos(s), 1024)
846
847 ret = WriteConsoleW(
848 h, s, count if count else 2, ctypes.byref(written), None)
849 if ret == 0:
850 raise OSError('Failed to write string')
851 if not count: # We just wrote a non-BMP character
852 assert written.value == 2
853 s = s[1:]
854 else:
855 assert written.value > 0
856 s = s[written.value:]
857 return True
858
859
860 def write_string(s, out=None, encoding=None):
861 if out is None:
862 out = sys.stderr
863 assert type(s) == compat_str
864
865 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
866 if _windows_write_string(s, out):
867 return
868
869 if ('b' in getattr(out, 'mode', '') or
870 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
871 byt = s.encode(encoding or preferredencoding(), 'ignore')
872 out.write(byt)
873 elif hasattr(out, 'buffer'):
874 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
875 byt = s.encode(enc, 'ignore')
876 out.buffer.write(byt)
877 else:
878 out.write(s)
879 out.flush()
880
881
882 def bytes_to_intlist(bs):
883 if not bs:
884 return []
885 if isinstance(bs[0], int): # Python 3
886 return list(bs)
887 else:
888 return [ord(c) for c in bs]
889
890
891 def intlist_to_bytes(xs):
892 if not xs:
893 return b''
894 return struct_pack('%dB' % len(xs), *xs)
895
896
897 # Cross-platform file locking
898 if sys.platform == 'win32':
899 import ctypes.wintypes
900 import msvcrt
901
902 class OVERLAPPED(ctypes.Structure):
903 _fields_ = [
904 ('Internal', ctypes.wintypes.LPVOID),
905 ('InternalHigh', ctypes.wintypes.LPVOID),
906 ('Offset', ctypes.wintypes.DWORD),
907 ('OffsetHigh', ctypes.wintypes.DWORD),
908 ('hEvent', ctypes.wintypes.HANDLE),
909 ]
910
911 kernel32 = ctypes.windll.kernel32
912 LockFileEx = kernel32.LockFileEx
913 LockFileEx.argtypes = [
914 ctypes.wintypes.HANDLE, # hFile
915 ctypes.wintypes.DWORD, # dwFlags
916 ctypes.wintypes.DWORD, # dwReserved
917 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
918 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
919 ctypes.POINTER(OVERLAPPED) # Overlapped
920 ]
921 LockFileEx.restype = ctypes.wintypes.BOOL
922 UnlockFileEx = kernel32.UnlockFileEx
923 UnlockFileEx.argtypes = [
924 ctypes.wintypes.HANDLE, # hFile
925 ctypes.wintypes.DWORD, # dwReserved
926 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
927 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
928 ctypes.POINTER(OVERLAPPED) # Overlapped
929 ]
930 UnlockFileEx.restype = ctypes.wintypes.BOOL
931 whole_low = 0xffffffff
932 whole_high = 0x7fffffff
933
934 def _lock_file(f, exclusive):
935 overlapped = OVERLAPPED()
936 overlapped.Offset = 0
937 overlapped.OffsetHigh = 0
938 overlapped.hEvent = 0
939 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
940 handle = msvcrt.get_osfhandle(f.fileno())
941 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
942 whole_low, whole_high, f._lock_file_overlapped_p):
943 raise OSError('Locking file failed: %r' % ctypes.FormatError())
944
945 def _unlock_file(f):
946 assert f._lock_file_overlapped_p
947 handle = msvcrt.get_osfhandle(f.fileno())
948 if not UnlockFileEx(handle, 0,
949 whole_low, whole_high, f._lock_file_overlapped_p):
950 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
951
952 else:
953 import fcntl
954
955 def _lock_file(f, exclusive):
956 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
957
958 def _unlock_file(f):
959 fcntl.flock(f, fcntl.LOCK_UN)
960
961
962 class locked_file(object):
963 def __init__(self, filename, mode, encoding=None):
964 assert mode in ['r', 'a', 'w']
965 self.f = io.open(filename, mode, encoding=encoding)
966 self.mode = mode
967
968 def __enter__(self):
969 exclusive = self.mode != 'r'
970 try:
971 _lock_file(self.f, exclusive)
972 except IOError:
973 self.f.close()
974 raise
975 return self
976
977 def __exit__(self, etype, value, traceback):
978 try:
979 _unlock_file(self.f)
980 finally:
981 self.f.close()
982
983 def __iter__(self):
984 return iter(self.f)
985
986 def write(self, *args):
987 return self.f.write(*args)
988
989 def read(self, *args):
990 return self.f.read(*args)
991
992
993 def get_filesystem_encoding():
994 encoding = sys.getfilesystemencoding()
995 return encoding if encoding is not None else 'utf-8'
996
997
998 def shell_quote(args):
999 quoted_args = []
1000 encoding = get_filesystem_encoding()
1001 for a in args:
1002 if isinstance(a, bytes):
1003 # We may get a filename encoded with 'encodeFilename'
1004 a = a.decode(encoding)
1005 quoted_args.append(pipes.quote(a))
1006 return ' '.join(quoted_args)
1007
1008
1009 def takewhile_inclusive(pred, seq):
1010 """ Like itertools.takewhile, but include the latest evaluated element
1011 (the first element so that Not pred(e)) """
1012 for e in seq:
1013 yield e
1014 if not pred(e):
1015 return
1016
1017
1018 def smuggle_url(url, data):
1019 """ Pass additional data in a URL for internal use. """
1020
1021 sdata = compat_urllib_parse.urlencode(
1022 {'__youtubedl_smuggle': json.dumps(data)})
1023 return url + '#' + sdata
1024
1025
1026 def unsmuggle_url(smug_url, default=None):
1027 if not '#__youtubedl_smuggle' in smug_url:
1028 return smug_url, default
1029 url, _, sdata = smug_url.rpartition('#')
1030 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1031 data = json.loads(jsond)
1032 return url, data
1033
1034
1035 def format_bytes(bytes):
1036 if bytes is None:
1037 return 'N/A'
1038 if type(bytes) is str:
1039 bytes = float(bytes)
1040 if bytes == 0.0:
1041 exponent = 0
1042 else:
1043 exponent = int(math.log(bytes, 1024.0))
1044 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1045 converted = float(bytes) / float(1024 ** exponent)
1046 return '%.2f%s' % (converted, suffix)
1047
1048
1049 def parse_filesize(s):
1050 if s is None:
1051 return None
1052
1053 # The lower-case forms are of course incorrect and inofficial,
1054 # but we support those too
1055 _UNIT_TABLE = {
1056 'B': 1,
1057 'b': 1,
1058 'KiB': 1024,
1059 'KB': 1000,
1060 'kB': 1024,
1061 'Kb': 1000,
1062 'MiB': 1024 ** 2,
1063 'MB': 1000 ** 2,
1064 'mB': 1024 ** 2,
1065 'Mb': 1000 ** 2,
1066 'GiB': 1024 ** 3,
1067 'GB': 1000 ** 3,
1068 'gB': 1024 ** 3,
1069 'Gb': 1000 ** 3,
1070 'TiB': 1024 ** 4,
1071 'TB': 1000 ** 4,
1072 'tB': 1024 ** 4,
1073 'Tb': 1000 ** 4,
1074 'PiB': 1024 ** 5,
1075 'PB': 1000 ** 5,
1076 'pB': 1024 ** 5,
1077 'Pb': 1000 ** 5,
1078 'EiB': 1024 ** 6,
1079 'EB': 1000 ** 6,
1080 'eB': 1024 ** 6,
1081 'Eb': 1000 ** 6,
1082 'ZiB': 1024 ** 7,
1083 'ZB': 1000 ** 7,
1084 'zB': 1024 ** 7,
1085 'Zb': 1000 ** 7,
1086 'YiB': 1024 ** 8,
1087 'YB': 1000 ** 8,
1088 'yB': 1024 ** 8,
1089 'Yb': 1000 ** 8,
1090 }
1091
1092 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
1093 m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
1094 if not m:
1095 return None
1096
1097 return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
1098
1099
1100 def get_term_width():
1101 columns = compat_getenv('COLUMNS', None)
1102 if columns:
1103 return int(columns)
1104
1105 try:
1106 sp = subprocess.Popen(
1107 ['stty', 'size'],
1108 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1109 out, err = sp.communicate()
1110 return int(out.split()[1])
1111 except:
1112 pass
1113 return None
1114
1115
1116 def month_by_name(name):
1117 """ Return the number of a month by (locale-independently) English name """
1118
1119 ENGLISH_NAMES = [
1120 'January', 'February', 'March', 'April', 'May', 'June',
1121 'July', 'August', 'September', 'October', 'November', 'December']
1122 try:
1123 return ENGLISH_NAMES.index(name) + 1
1124 except ValueError:
1125 return None
1126
1127
1128 def fix_xml_ampersands(xml_str):
1129 """Replace all the '&' by '&amp;' in XML"""
1130 return re.sub(
1131 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1132 '&amp;',
1133 xml_str)
1134
1135
1136 def setproctitle(title):
1137 assert isinstance(title, compat_str)
1138 try:
1139 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1140 except OSError:
1141 return
1142 title_bytes = title.encode('utf-8')
1143 buf = ctypes.create_string_buffer(len(title_bytes))
1144 buf.value = title_bytes
1145 try:
1146 libc.prctl(15, buf, 0, 0, 0)
1147 except AttributeError:
1148 return # Strange libc, just skip this
1149
1150
1151 def remove_start(s, start):
1152 if s.startswith(start):
1153 return s[len(start):]
1154 return s
1155
1156
1157 def remove_end(s, end):
1158 if s.endswith(end):
1159 return s[:-len(end)]
1160 return s
1161
1162
1163 def url_basename(url):
1164 path = compat_urlparse.urlparse(url).path
1165 return path.strip('/').split('/')[-1]
1166
1167
1168 class HEADRequest(compat_urllib_request.Request):
1169 def get_method(self):
1170 return "HEAD"
1171
1172
1173 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1174 if get_attr:
1175 if v is not None:
1176 v = getattr(v, get_attr, None)
1177 if v == '':
1178 v = None
1179 return default if v is None else (int(v) * invscale // scale)
1180
1181
1182 def str_or_none(v, default=None):
1183 return default if v is None else compat_str(v)
1184
1185
1186 def str_to_int(int_str):
1187 """ A more relaxed version of int_or_none """
1188 if int_str is None:
1189 return None
1190 int_str = re.sub(r'[,\.\+]', '', int_str)
1191 return int(int_str)
1192
1193
1194 def float_or_none(v, scale=1, invscale=1, default=None):
1195 return default if v is None else (float(v) * invscale / scale)
1196
1197
1198 def parse_duration(s):
1199 if s is None:
1200 return None
1201
1202 s = s.strip()
1203
1204 m = re.match(
1205 r'''(?ix)T?
1206 (?:
1207 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
1208 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1209 )?
1210 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
1211 if not m:
1212 return None
1213 res = int(m.group('secs'))
1214 if m.group('mins'):
1215 res += int(m.group('mins')) * 60
1216 if m.group('hours'):
1217 res += int(m.group('hours')) * 60 * 60
1218 if m.group('ms'):
1219 res += float(m.group('ms'))
1220 return res
1221
1222
1223 def prepend_extension(filename, ext):
1224 name, real_ext = os.path.splitext(filename)
1225 return '{0}.{1}{2}'.format(name, ext, real_ext)
1226
1227
1228 def check_executable(exe, args=[]):
1229 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1230 args can be a list of arguments for a short output (like -version) """
1231 try:
1232 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1233 except OSError:
1234 return False
1235 return exe
1236
1237
1238 def get_exe_version(exe, args=['--version'],
1239 version_re=r'version\s+([0-9._-a-zA-Z]+)',
1240 unrecognized='present'):
1241 """ Returns the version of the specified executable,
1242 or False if the executable is not present """
1243 try:
1244 out, err = subprocess.Popen(
1245 [exe] + args,
1246 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1247 except OSError:
1248 return False
1249 firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1250 m = re.search(version_re, firstline)
1251 if m:
1252 return m.group(1)
1253 else:
1254 return unrecognized
1255
1256
1257 class PagedList(object):
1258 def __len__(self):
1259 # This is only useful for tests
1260 return len(self.getslice())
1261
1262
1263 class OnDemandPagedList(PagedList):
1264 def __init__(self, pagefunc, pagesize):
1265 self._pagefunc = pagefunc
1266 self._pagesize = pagesize
1267
1268 def getslice(self, start=0, end=None):
1269 res = []
1270 for pagenum in itertools.count(start // self._pagesize):
1271 firstid = pagenum * self._pagesize
1272 nextfirstid = pagenum * self._pagesize + self._pagesize
1273 if start >= nextfirstid:
1274 continue
1275
1276 page_results = list(self._pagefunc(pagenum))
1277
1278 startv = (
1279 start % self._pagesize
1280 if firstid <= start < nextfirstid
1281 else 0)
1282
1283 endv = (
1284 ((end - 1) % self._pagesize) + 1
1285 if (end is not None and firstid <= end <= nextfirstid)
1286 else None)
1287
1288 if startv != 0 or endv is not None:
1289 page_results = page_results[startv:endv]
1290 res.extend(page_results)
1291
1292 # A little optimization - if current page is not "full", ie. does
1293 # not contain page_size videos then we can assume that this page
1294 # is the last one - there are no more ids on further pages -
1295 # i.e. no need to query again.
1296 if len(page_results) + startv < self._pagesize:
1297 break
1298
1299 # If we got the whole page, but the next page is not interesting,
1300 # break out early as well
1301 if end == nextfirstid:
1302 break
1303 return res
1304
1305
1306 class InAdvancePagedList(PagedList):
1307 def __init__(self, pagefunc, pagecount, pagesize):
1308 self._pagefunc = pagefunc
1309 self._pagecount = pagecount
1310 self._pagesize = pagesize
1311
1312 def getslice(self, start=0, end=None):
1313 res = []
1314 start_page = start // self._pagesize
1315 end_page = (
1316 self._pagecount if end is None else (end // self._pagesize + 1))
1317 skip_elems = start - start_page * self._pagesize
1318 only_more = None if end is None else end - start
1319 for pagenum in range(start_page, end_page):
1320 page = list(self._pagefunc(pagenum))
1321 if skip_elems:
1322 page = page[skip_elems:]
1323 skip_elems = None
1324 if only_more is not None:
1325 if len(page) < only_more:
1326 only_more -= len(page)
1327 else:
1328 page = page[:only_more]
1329 res.extend(page)
1330 break
1331 res.extend(page)
1332 return res
1333
1334
1335 def uppercase_escape(s):
1336 unicode_escape = codecs.getdecoder('unicode_escape')
1337 return re.sub(
1338 r'\\U[0-9a-fA-F]{8}',
1339 lambda m: unicode_escape(m.group(0))[0],
1340 s)
1341
1342
1343 def escape_rfc3986(s):
1344 """Escape non-ASCII characters as suggested by RFC 3986"""
1345 if sys.version_info < (3, 0) and isinstance(s, unicode):
1346 s = s.encode('utf-8')
1347 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1348
1349
1350 def escape_url(url):
1351 """Escape URL as suggested by RFC 3986"""
1352 url_parsed = compat_urllib_parse_urlparse(url)
1353 return url_parsed._replace(
1354 path=escape_rfc3986(url_parsed.path),
1355 params=escape_rfc3986(url_parsed.params),
1356 query=escape_rfc3986(url_parsed.query),
1357 fragment=escape_rfc3986(url_parsed.fragment)
1358 ).geturl()
1359
1360 try:
1361 struct.pack('!I', 0)
1362 except TypeError:
1363 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1364 def struct_pack(spec, *args):
1365 if isinstance(spec, compat_str):
1366 spec = spec.encode('ascii')
1367 return struct.pack(spec, *args)
1368
1369 def struct_unpack(spec, *args):
1370 if isinstance(spec, compat_str):
1371 spec = spec.encode('ascii')
1372 return struct.unpack(spec, *args)
1373 else:
1374 struct_pack = struct.pack
1375 struct_unpack = struct.unpack
1376
1377
1378 def read_batch_urls(batch_fd):
1379 def fixup(url):
1380 if not isinstance(url, compat_str):
1381 url = url.decode('utf-8', 'replace')
1382 BOM_UTF8 = '\xef\xbb\xbf'
1383 if url.startswith(BOM_UTF8):
1384 url = url[len(BOM_UTF8):]
1385 url = url.strip()
1386 if url.startswith(('#', ';', ']')):
1387 return False
1388 return url
1389
1390 with contextlib.closing(batch_fd) as fd:
1391 return [url for url in map(fixup, fd) if url]
1392
1393
1394 def urlencode_postdata(*args, **kargs):
1395 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1396
1397
1398 try:
1399 etree_iter = xml.etree.ElementTree.Element.iter
1400 except AttributeError: # Python <=2.6
1401 etree_iter = lambda n: n.findall('.//*')
1402
1403
1404 def parse_xml(s):
1405 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1406 def doctype(self, name, pubid, system):
1407 pass # Ignore doctypes
1408
1409 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1410 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1411 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1412 # Fix up XML parser in Python 2.x
1413 if sys.version_info < (3, 0):
1414 for n in etree_iter(tree):
1415 if n.text is not None:
1416 if not isinstance(n.text, compat_str):
1417 n.text = n.text.decode('utf-8')
1418 return tree
1419
1420
1421 US_RATINGS = {
1422 'G': 0,
1423 'PG': 10,
1424 'PG-13': 13,
1425 'R': 16,
1426 'NC': 18,
1427 }
1428
1429
1430 def parse_age_limit(s):
1431 if s is None:
1432 return None
1433 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1434 return int(m.group('age')) if m else US_RATINGS.get(s, None)
1435
1436
1437 def strip_jsonp(code):
1438 return re.sub(
1439 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1440
1441
1442 def js_to_json(code):
1443 def fix_kv(m):
1444 v = m.group(0)
1445 if v in ('true', 'false', 'null'):
1446 return v
1447 if v.startswith('"'):
1448 return v
1449 if v.startswith("'"):
1450 v = v[1:-1]
1451 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1452 '\\\\': '\\\\',
1453 "\\'": "'",
1454 '"': '\\"',
1455 }[m.group(0)], v)
1456 return '"%s"' % v
1457
1458 res = re.sub(r'''(?x)
1459 "(?:[^"\\]*(?:\\\\|\\")?)*"|
1460 '(?:[^'\\]*(?:\\\\|\\')?)*'|
1461 [a-zA-Z_][a-zA-Z_0-9]*
1462 ''', fix_kv, code)
1463 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1464 return res
1465
1466
1467 def qualities(quality_ids):
1468 """ Get a numeric quality value out of a list of possible values """
1469 def q(qid):
1470 try:
1471 return quality_ids.index(qid)
1472 except ValueError:
1473 return -1
1474 return q
1475
1476
1477 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1478
1479
1480 def limit_length(s, length):
1481 """ Add ellipses to overly long strings """
1482 if s is None:
1483 return None
1484 ELLIPSES = '...'
1485 if len(s) > length:
1486 return s[:length - len(ELLIPSES)] + ELLIPSES
1487 return s
1488
1489
1490 def version_tuple(v):
1491 return [int(e) for e in v.split('.')]
1492
1493
1494 def is_outdated_version(version, limit, assume_new=True):
1495 if not version:
1496 return not assume_new
1497 try:
1498 return version_tuple(version) < version_tuple(limit)
1499 except ValueError:
1500 return not assume_new
1501
1502
1503 def ytdl_is_updateable():
1504 """ Returns if youtube-dl can be updated with -U """
1505 from zipimport import zipimporter
1506
1507 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
1508
1509
1510 def args_to_str(args):
1511 # Get a short string representation for a subprocess command
1512 return ' '.join(shlex_quote(a) for a in args)