]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/utils.py
Update changelog
[youtubedl] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import calendar
5 import codecs
6 import contextlib
7 import ctypes
8 import datetime
9 import email.utils
10 import errno
11 import getpass
12 import gzip
13 import itertools
14 import io
15 import json
16 import locale
17 import math
18 import os
19 import pipes
20 import platform
21 import re
22 import ssl
23 import socket
24 import struct
25 import subprocess
26 import sys
27 import traceback
28 import xml.etree.ElementTree
29 import zlib
30
31 try:
32 import urllib.request as compat_urllib_request
33 except ImportError: # Python 2
34 import urllib2 as compat_urllib_request
35
36 try:
37 import urllib.error as compat_urllib_error
38 except ImportError: # Python 2
39 import urllib2 as compat_urllib_error
40
41 try:
42 import urllib.parse as compat_urllib_parse
43 except ImportError: # Python 2
44 import urllib as compat_urllib_parse
45
46 try:
47 from urllib.parse import urlparse as compat_urllib_parse_urlparse
48 except ImportError: # Python 2
49 from urlparse import urlparse as compat_urllib_parse_urlparse
50
51 try:
52 import urllib.parse as compat_urlparse
53 except ImportError: # Python 2
54 import urlparse as compat_urlparse
55
56 try:
57 import http.cookiejar as compat_cookiejar
58 except ImportError: # Python 2
59 import cookielib as compat_cookiejar
60
61 try:
62 import html.entities as compat_html_entities
63 except ImportError: # Python 2
64 import htmlentitydefs as compat_html_entities
65
66 try:
67 import html.parser as compat_html_parser
68 except ImportError: # Python 2
69 import HTMLParser as compat_html_parser
70
71 try:
72 import http.client as compat_http_client
73 except ImportError: # Python 2
74 import httplib as compat_http_client
75
76 try:
77 from urllib.error import HTTPError as compat_HTTPError
78 except ImportError: # Python 2
79 from urllib2 import HTTPError as compat_HTTPError
80
81 try:
82 from urllib.request import urlretrieve as compat_urlretrieve
83 except ImportError: # Python 2
84 from urllib import urlretrieve as compat_urlretrieve
85
86
87 try:
88 from subprocess import DEVNULL
89 compat_subprocess_get_DEVNULL = lambda: DEVNULL
90 except ImportError:
91 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
92
93 try:
94 from urllib.parse import parse_qs as compat_parse_qs
95 except ImportError: # Python 2
96 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
97 # Python 2's version is apparently totally broken
98 def _unquote(string, encoding='utf-8', errors='replace'):
99 if string == '':
100 return string
101 res = string.split('%')
102 if len(res) == 1:
103 return string
104 if encoding is None:
105 encoding = 'utf-8'
106 if errors is None:
107 errors = 'replace'
108 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
109 pct_sequence = b''
110 string = res[0]
111 for item in res[1:]:
112 try:
113 if not item:
114 raise ValueError
115 pct_sequence += item[:2].decode('hex')
116 rest = item[2:]
117 if not rest:
118 # This segment was just a single percent-encoded character.
119 # May be part of a sequence of code units, so delay decoding.
120 # (Stored in pct_sequence).
121 continue
122 except ValueError:
123 rest = '%' + item
124 # Encountered non-percent-encoded characters. Flush the current
125 # pct_sequence.
126 string += pct_sequence.decode(encoding, errors) + rest
127 pct_sequence = b''
128 if pct_sequence:
129 # Flush the final pct_sequence
130 string += pct_sequence.decode(encoding, errors)
131 return string
132
133 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
134 encoding='utf-8', errors='replace'):
135 qs, _coerce_result = qs, unicode
136 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
137 r = []
138 for name_value in pairs:
139 if not name_value and not strict_parsing:
140 continue
141 nv = name_value.split('=', 1)
142 if len(nv) != 2:
143 if strict_parsing:
144 raise ValueError("bad query field: %r" % (name_value,))
145 # Handle case of a control-name with no equal sign
146 if keep_blank_values:
147 nv.append('')
148 else:
149 continue
150 if len(nv[1]) or keep_blank_values:
151 name = nv[0].replace('+', ' ')
152 name = _unquote(name, encoding=encoding, errors=errors)
153 name = _coerce_result(name)
154 value = nv[1].replace('+', ' ')
155 value = _unquote(value, encoding=encoding, errors=errors)
156 value = _coerce_result(value)
157 r.append((name, value))
158 return r
159
160 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
161 encoding='utf-8', errors='replace'):
162 parsed_result = {}
163 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
164 encoding=encoding, errors=errors)
165 for name, value in pairs:
166 if name in parsed_result:
167 parsed_result[name].append(value)
168 else:
169 parsed_result[name] = [value]
170 return parsed_result
171
172 try:
173 compat_str = unicode # Python 2
174 except NameError:
175 compat_str = str
176
177 try:
178 compat_chr = unichr # Python 2
179 except NameError:
180 compat_chr = chr
181
182 try:
183 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
184 except ImportError: # Python 2.6
185 from xml.parsers.expat import ExpatError as compat_xml_parse_error
186
187 def compat_ord(c):
188 if type(c) is int: return c
189 else: return ord(c)
190
191 # This is not clearly defined otherwise
192 compiled_regex_type = type(re.compile(''))
193
194 std_headers = {
195 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
196 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
197 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
198 'Accept-Encoding': 'gzip, deflate',
199 'Accept-Language': 'en-us,en;q=0.5',
200 }
201
202 def preferredencoding():
203 """Get preferred encoding.
204
205 Returns the best encoding scheme for the system, based on
206 locale.getpreferredencoding() and some further tweaks.
207 """
208 try:
209 pref = locale.getpreferredencoding()
210 u'TEST'.encode(pref)
211 except:
212 pref = 'UTF-8'
213
214 return pref
215
216 if sys.version_info < (3,0):
217 def compat_print(s):
218 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
219 else:
220 def compat_print(s):
221 assert type(s) == type(u'')
222 print(s)
223
224 # In Python 2.x, json.dump expects a bytestream.
225 # In Python 3.x, it writes to a character stream
226 if sys.version_info < (3,0):
227 def write_json_file(obj, fn):
228 with open(fn, 'wb') as f:
229 json.dump(obj, f)
230 else:
231 def write_json_file(obj, fn):
232 with open(fn, 'w', encoding='utf-8') as f:
233 json.dump(obj, f)
234
235 if sys.version_info >= (2,7):
236 def find_xpath_attr(node, xpath, key, val):
237 """ Find the xpath xpath[@key=val] """
238 assert re.match(r'^[a-zA-Z]+$', key)
239 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
240 expr = xpath + u"[@%s='%s']" % (key, val)
241 return node.find(expr)
242 else:
243 def find_xpath_attr(node, xpath, key, val):
244 for f in node.findall(xpath):
245 if f.attrib.get(key) == val:
246 return f
247 return None
248
249 # On python2.6 the xml.etree.ElementTree.Element methods don't support
250 # the namespace parameter
251 def xpath_with_ns(path, ns_map):
252 components = [c.split(':') for c in path.split('/')]
253 replaced = []
254 for c in components:
255 if len(c) == 1:
256 replaced.append(c[0])
257 else:
258 ns, tag = c
259 replaced.append('{%s}%s' % (ns_map[ns], tag))
260 return '/'.join(replaced)
261
262 def htmlentity_transform(matchobj):
263 """Transforms an HTML entity to a character.
264
265 This function receives a match object and is intended to be used with
266 the re.sub() function.
267 """
268 entity = matchobj.group(1)
269
270 # Known non-numeric HTML entity
271 if entity in compat_html_entities.name2codepoint:
272 return compat_chr(compat_html_entities.name2codepoint[entity])
273
274 mobj = re.match(u'(?u)#(x?\\d+)', entity)
275 if mobj is not None:
276 numstr = mobj.group(1)
277 if numstr.startswith(u'x'):
278 base = 16
279 numstr = u'0%s' % numstr
280 else:
281 base = 10
282 return compat_chr(int(numstr, base))
283
284 # Unknown entity in name, return its literal representation
285 return (u'&%s;' % entity)
286
287 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
288 class BaseHTMLParser(compat_html_parser.HTMLParser):
289 def __init(self):
290 compat_html_parser.HTMLParser.__init__(self)
291 self.html = None
292
293 def loads(self, html):
294 self.html = html
295 self.feed(html)
296 self.close()
297
298 class AttrParser(BaseHTMLParser):
299 """Modified HTMLParser that isolates a tag with the specified attribute"""
300 def __init__(self, attribute, value):
301 self.attribute = attribute
302 self.value = value
303 self.result = None
304 self.started = False
305 self.depth = {}
306 self.watch_startpos = False
307 self.error_count = 0
308 BaseHTMLParser.__init__(self)
309
310 def error(self, message):
311 if self.error_count > 10 or self.started:
312 raise compat_html_parser.HTMLParseError(message, self.getpos())
313 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
314 self.error_count += 1
315 self.goahead(1)
316
317 def handle_starttag(self, tag, attrs):
318 attrs = dict(attrs)
319 if self.started:
320 self.find_startpos(None)
321 if self.attribute in attrs and attrs[self.attribute] == self.value:
322 self.result = [tag]
323 self.started = True
324 self.watch_startpos = True
325 if self.started:
326 if not tag in self.depth: self.depth[tag] = 0
327 self.depth[tag] += 1
328
329 def handle_endtag(self, tag):
330 if self.started:
331 if tag in self.depth: self.depth[tag] -= 1
332 if self.depth[self.result[0]] == 0:
333 self.started = False
334 self.result.append(self.getpos())
335
336 def find_startpos(self, x):
337 """Needed to put the start position of the result (self.result[1])
338 after the opening tag with the requested id"""
339 if self.watch_startpos:
340 self.watch_startpos = False
341 self.result.append(self.getpos())
342 handle_entityref = handle_charref = handle_data = handle_comment = \
343 handle_decl = handle_pi = unknown_decl = find_startpos
344
345 def get_result(self):
346 if self.result is None:
347 return None
348 if len(self.result) != 3:
349 return None
350 lines = self.html.split('\n')
351 lines = lines[self.result[1][0]-1:self.result[2][0]]
352 lines[0] = lines[0][self.result[1][1]:]
353 if len(lines) == 1:
354 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
355 lines[-1] = lines[-1][:self.result[2][1]]
356 return '\n'.join(lines).strip()
357 # Hack for https://github.com/rg3/youtube-dl/issues/662
358 if sys.version_info < (2, 7, 3):
359 AttrParser.parse_endtag = (lambda self, i:
360 i + len("</scr'+'ipt>")
361 if self.rawdata[i:].startswith("</scr'+'ipt>")
362 else compat_html_parser.HTMLParser.parse_endtag(self, i))
363
364 def get_element_by_id(id, html):
365 """Return the content of the tag with the specified ID in the passed HTML document"""
366 return get_element_by_attribute("id", id, html)
367
368 def get_element_by_attribute(attribute, value, html):
369 """Return the content of the tag with the specified attribute in the passed HTML document"""
370 parser = AttrParser(attribute, value)
371 try:
372 parser.loads(html)
373 except compat_html_parser.HTMLParseError:
374 pass
375 return parser.get_result()
376
377 class MetaParser(BaseHTMLParser):
378 """
379 Modified HTMLParser that isolates a meta tag with the specified name
380 attribute.
381 """
382 def __init__(self, name):
383 BaseHTMLParser.__init__(self)
384 self.name = name
385 self.content = None
386 self.result = None
387
388 def handle_starttag(self, tag, attrs):
389 if tag != 'meta':
390 return
391 attrs = dict(attrs)
392 if attrs.get('name') == self.name:
393 self.result = attrs.get('content')
394
395 def get_result(self):
396 return self.result
397
398 def get_meta_content(name, html):
399 """
400 Return the content attribute from the meta tag with the given name attribute.
401 """
402 parser = MetaParser(name)
403 try:
404 parser.loads(html)
405 except compat_html_parser.HTMLParseError:
406 pass
407 return parser.get_result()
408
409
410 def clean_html(html):
411 """Clean an HTML snippet into a readable string"""
412 # Newline vs <br />
413 html = html.replace('\n', ' ')
414 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
415 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
416 # Strip html tags
417 html = re.sub('<.*?>', '', html)
418 # Replace html entities
419 html = unescapeHTML(html)
420 return html.strip()
421
422
423 def sanitize_open(filename, open_mode):
424 """Try to open the given filename, and slightly tweak it if this fails.
425
426 Attempts to open the given filename. If this fails, it tries to change
427 the filename slightly, step by step, until it's either able to open it
428 or it fails and raises a final exception, like the standard open()
429 function.
430
431 It returns the tuple (stream, definitive_file_name).
432 """
433 try:
434 if filename == u'-':
435 if sys.platform == 'win32':
436 import msvcrt
437 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
438 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
439 stream = open(encodeFilename(filename), open_mode)
440 return (stream, filename)
441 except (IOError, OSError) as err:
442 if err.errno in (errno.EACCES,):
443 raise
444
445 # In case of error, try to remove win32 forbidden chars
446 alt_filename = os.path.join(
447 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
448 for path_part in os.path.split(filename)
449 )
450 if alt_filename == filename:
451 raise
452 else:
453 # An exception here should be caught in the caller
454 stream = open(encodeFilename(filename), open_mode)
455 return (stream, alt_filename)
456
457
458 def timeconvert(timestr):
459 """Convert RFC 2822 defined time string into system timestamp"""
460 timestamp = None
461 timetuple = email.utils.parsedate_tz(timestr)
462 if timetuple is not None:
463 timestamp = email.utils.mktime_tz(timetuple)
464 return timestamp
465
466 def sanitize_filename(s, restricted=False, is_id=False):
467 """Sanitizes a string so it could be used as part of a filename.
468 If restricted is set, use a stricter subset of allowed characters.
469 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
470 """
471 def replace_insane(char):
472 if char == '?' or ord(char) < 32 or ord(char) == 127:
473 return ''
474 elif char == '"':
475 return '' if restricted else '\''
476 elif char == ':':
477 return '_-' if restricted else ' -'
478 elif char in '\\/|*<>':
479 return '_'
480 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
481 return '_'
482 if restricted and ord(char) > 127:
483 return '_'
484 return char
485
486 result = u''.join(map(replace_insane, s))
487 if not is_id:
488 while '__' in result:
489 result = result.replace('__', '_')
490 result = result.strip('_')
491 # Common case of "Foreign band name - English song title"
492 if restricted and result.startswith('-_'):
493 result = result[2:]
494 if not result:
495 result = '_'
496 return result
497
498 def orderedSet(iterable):
499 """ Remove all duplicates from the input iterable """
500 res = []
501 for el in iterable:
502 if el not in res:
503 res.append(el)
504 return res
505
506
507 def unescapeHTML(s):
508 if s is None:
509 return None
510 assert type(s) == compat_str
511
512 result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
513 return result
514
515
516 def encodeFilename(s, for_subprocess=False):
517 """
518 @param s The name of the file
519 """
520
521 assert type(s) == compat_str
522
523 # Python 3 has a Unicode API
524 if sys.version_info >= (3, 0):
525 return s
526
527 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
528 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
529 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
530 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
531 if not for_subprocess:
532 return s
533 else:
534 # For subprocess calls, encode with locale encoding
535 # Refer to http://stackoverflow.com/a/9951851/35070
536 encoding = preferredencoding()
537 else:
538 encoding = sys.getfilesystemencoding()
539 if encoding is None:
540 encoding = 'utf-8'
541 return s.encode(encoding, 'ignore')
542
543
544 def encodeArgument(s):
545 if not isinstance(s, compat_str):
546 # Legacy code that uses byte strings
547 # Uncomment the following line after fixing all post processors
548 #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
549 s = s.decode('ascii')
550 return encodeFilename(s, True)
551
552
553 def decodeOption(optval):
554 if optval is None:
555 return optval
556 if isinstance(optval, bytes):
557 optval = optval.decode(preferredencoding())
558
559 assert isinstance(optval, compat_str)
560 return optval
561
562 def formatSeconds(secs):
563 if secs > 3600:
564 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
565 elif secs > 60:
566 return '%d:%02d' % (secs // 60, secs % 60)
567 else:
568 return '%d' % secs
569
570
571 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
572 if sys.version_info < (3, 2):
573 import httplib
574
575 class HTTPSConnectionV3(httplib.HTTPSConnection):
576 def __init__(self, *args, **kwargs):
577 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
578
579 def connect(self):
580 sock = socket.create_connection((self.host, self.port), self.timeout)
581 if getattr(self, '_tunnel_host', False):
582 self.sock = sock
583 self._tunnel()
584 try:
585 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
586 except ssl.SSLError:
587 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
588
589 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
590 def https_open(self, req):
591 return self.do_open(HTTPSConnectionV3, req)
592 return HTTPSHandlerV3(**kwargs)
593 else:
594 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
595 context.verify_mode = (ssl.CERT_NONE
596 if opts_no_check_certificate
597 else ssl.CERT_REQUIRED)
598 context.set_default_verify_paths()
599 try:
600 context.load_default_certs()
601 except AttributeError:
602 pass # Python < 3.4
603 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
604
605 class ExtractorError(Exception):
606 """Error during info extraction."""
607 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
608 """ tb, if given, is the original traceback (so that it can be printed out).
609 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
610 """
611
612 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
613 expected = True
614 if video_id is not None:
615 msg = video_id + ': ' + msg
616 if not expected:
617 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
618 super(ExtractorError, self).__init__(msg)
619
620 self.traceback = tb
621 self.exc_info = sys.exc_info() # preserve original exception
622 self.cause = cause
623 self.video_id = video_id
624
625 def format_traceback(self):
626 if self.traceback is None:
627 return None
628 return u''.join(traceback.format_tb(self.traceback))
629
630
631 class RegexNotFoundError(ExtractorError):
632 """Error when a regex didn't match"""
633 pass
634
635
636 class DownloadError(Exception):
637 """Download Error exception.
638
639 This exception may be thrown by FileDownloader objects if they are not
640 configured to continue on errors. They will contain the appropriate
641 error message.
642 """
643 def __init__(self, msg, exc_info=None):
644 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
645 super(DownloadError, self).__init__(msg)
646 self.exc_info = exc_info
647
648
649 class SameFileError(Exception):
650 """Same File exception.
651
652 This exception will be thrown by FileDownloader objects if they detect
653 multiple files would have to be downloaded to the same file on disk.
654 """
655 pass
656
657
658 class PostProcessingError(Exception):
659 """Post Processing exception.
660
661 This exception may be raised by PostProcessor's .run() method to
662 indicate an error in the postprocessing task.
663 """
664 def __init__(self, msg):
665 self.msg = msg
666
667 class MaxDownloadsReached(Exception):
668 """ --max-downloads limit has been reached. """
669 pass
670
671
672 class UnavailableVideoError(Exception):
673 """Unavailable Format exception.
674
675 This exception will be thrown when a video is requested
676 in a format that is not available for that video.
677 """
678 pass
679
680
681 class ContentTooShortError(Exception):
682 """Content Too Short exception.
683
684 This exception may be raised by FileDownloader objects when a file they
685 download is too small for what the server announced first, indicating
686 the connection was probably interrupted.
687 """
688 # Both in bytes
689 downloaded = None
690 expected = None
691
692 def __init__(self, downloaded, expected):
693 self.downloaded = downloaded
694 self.expected = expected
695
696 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
697 """Handler for HTTP requests and responses.
698
699 This class, when installed with an OpenerDirector, automatically adds
700 the standard headers to every HTTP request and handles gzipped and
701 deflated responses from web servers. If compression is to be avoided in
702 a particular request, the original request in the program code only has
703 to include the HTTP header "Youtubedl-No-Compression", which will be
704 removed before making the real request.
705
706 Part of this code was copied from:
707
708 http://techknack.net/python-urllib2-handlers/
709
710 Andrew Rowls, the author of that code, agreed to release it to the
711 public domain.
712 """
713
714 @staticmethod
715 def deflate(data):
716 try:
717 return zlib.decompress(data, -zlib.MAX_WBITS)
718 except zlib.error:
719 return zlib.decompress(data)
720
721 @staticmethod
722 def addinfourl_wrapper(stream, headers, url, code):
723 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
724 return compat_urllib_request.addinfourl(stream, headers, url, code)
725 ret = compat_urllib_request.addinfourl(stream, headers, url)
726 ret.code = code
727 return ret
728
729 def http_request(self, req):
730 for h,v in std_headers.items():
731 if h in req.headers:
732 del req.headers[h]
733 req.add_header(h, v)
734 if 'Youtubedl-no-compression' in req.headers:
735 if 'Accept-encoding' in req.headers:
736 del req.headers['Accept-encoding']
737 del req.headers['Youtubedl-no-compression']
738 if 'Youtubedl-user-agent' in req.headers:
739 if 'User-agent' in req.headers:
740 del req.headers['User-agent']
741 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
742 del req.headers['Youtubedl-user-agent']
743 return req
744
745 def http_response(self, req, resp):
746 old_resp = resp
747 # gzip
748 if resp.headers.get('Content-encoding', '') == 'gzip':
749 content = resp.read()
750 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
751 try:
752 uncompressed = io.BytesIO(gz.read())
753 except IOError as original_ioerror:
754 # There may be junk add the end of the file
755 # See http://stackoverflow.com/q/4928560/35070 for details
756 for i in range(1, 1024):
757 try:
758 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
759 uncompressed = io.BytesIO(gz.read())
760 except IOError:
761 continue
762 break
763 else:
764 raise original_ioerror
765 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
766 resp.msg = old_resp.msg
767 # deflate
768 if resp.headers.get('Content-encoding', '') == 'deflate':
769 gz = io.BytesIO(self.deflate(resp.read()))
770 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
771 resp.msg = old_resp.msg
772 return resp
773
774 https_request = http_request
775 https_response = http_response
776
777
778 def parse_iso8601(date_str):
779 """ Return a UNIX timestamp from the given date """
780
781 if date_str is None:
782 return None
783
784 m = re.search(
785 r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
786 date_str)
787 if not m:
788 timezone = datetime.timedelta()
789 else:
790 date_str = date_str[:-len(m.group(0))]
791 if not m.group('sign'):
792 timezone = datetime.timedelta()
793 else:
794 sign = 1 if m.group('sign') == '+' else -1
795 timezone = datetime.timedelta(
796 hours=sign * int(m.group('hours')),
797 minutes=sign * int(m.group('minutes')))
798
799 dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
800 return calendar.timegm(dt.timetuple())
801
802
803 def unified_strdate(date_str):
804 """Return a string with the date in the format YYYYMMDD"""
805
806 if date_str is None:
807 return None
808
809 upload_date = None
810 #Replace commas
811 date_str = date_str.replace(',', ' ')
812 # %z (UTC offset) is only supported in python>=3.2
813 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
814 format_expressions = [
815 '%d %B %Y',
816 '%d %b %Y',
817 '%B %d %Y',
818 '%b %d %Y',
819 '%Y-%m-%d',
820 '%d.%m.%Y',
821 '%d/%m/%Y',
822 '%Y/%m/%d %H:%M:%S',
823 '%Y-%m-%d %H:%M:%S',
824 '%d.%m.%Y %H:%M',
825 '%d.%m.%Y %H.%M',
826 '%Y-%m-%dT%H:%M:%SZ',
827 '%Y-%m-%dT%H:%M:%S.%fZ',
828 '%Y-%m-%dT%H:%M:%S.%f0Z',
829 '%Y-%m-%dT%H:%M:%S',
830 '%Y-%m-%dT%H:%M:%S.%f',
831 '%Y-%m-%dT%H:%M',
832 ]
833 for expression in format_expressions:
834 try:
835 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
836 except ValueError:
837 pass
838 if upload_date is None:
839 timetuple = email.utils.parsedate_tz(date_str)
840 if timetuple:
841 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
842 return upload_date
843
844 def determine_ext(url, default_ext=u'unknown_video'):
845 guess = url.partition(u'?')[0].rpartition(u'.')[2]
846 if re.match(r'^[A-Za-z0-9]+$', guess):
847 return guess
848 else:
849 return default_ext
850
851 def subtitles_filename(filename, sub_lang, sub_format):
852 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
853
854 def date_from_str(date_str):
855 """
856 Return a datetime object from a string in the format YYYYMMDD or
857 (now|today)[+-][0-9](day|week|month|year)(s)?"""
858 today = datetime.date.today()
859 if date_str == 'now'or date_str == 'today':
860 return today
861 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
862 if match is not None:
863 sign = match.group('sign')
864 time = int(match.group('time'))
865 if sign == '-':
866 time = -time
867 unit = match.group('unit')
868 #A bad aproximation?
869 if unit == 'month':
870 unit = 'day'
871 time *= 30
872 elif unit == 'year':
873 unit = 'day'
874 time *= 365
875 unit += 's'
876 delta = datetime.timedelta(**{unit: time})
877 return today + delta
878 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
879
880 def hyphenate_date(date_str):
881 """
882 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
883 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
884 if match is not None:
885 return '-'.join(match.groups())
886 else:
887 return date_str
888
889 class DateRange(object):
890 """Represents a time interval between two dates"""
891 def __init__(self, start=None, end=None):
892 """start and end must be strings in the format accepted by date"""
893 if start is not None:
894 self.start = date_from_str(start)
895 else:
896 self.start = datetime.datetime.min.date()
897 if end is not None:
898 self.end = date_from_str(end)
899 else:
900 self.end = datetime.datetime.max.date()
901 if self.start > self.end:
902 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
903 @classmethod
904 def day(cls, day):
905 """Returns a range that only contains the given day"""
906 return cls(day,day)
907 def __contains__(self, date):
908 """Check if the date is in the range"""
909 if not isinstance(date, datetime.date):
910 date = date_from_str(date)
911 return self.start <= date <= self.end
912 def __str__(self):
913 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
914
915
916 def platform_name():
917 """ Returns the platform name as a compat_str """
918 res = platform.platform()
919 if isinstance(res, bytes):
920 res = res.decode(preferredencoding())
921
922 assert isinstance(res, compat_str)
923 return res
924
925
926 def _windows_write_string(s, out):
927 """ Returns True if the string was written using special methods,
928 False if it has yet to be written out."""
929 # Adapted from http://stackoverflow.com/a/3259271/35070
930
931 import ctypes
932 import ctypes.wintypes
933
934 WIN_OUTPUT_IDS = {
935 1: -11,
936 2: -12,
937 }
938
939 try:
940 fileno = out.fileno()
941 except AttributeError:
942 # If the output stream doesn't have a fileno, it's virtual
943 return False
944 if fileno not in WIN_OUTPUT_IDS:
945 return False
946
947 GetStdHandle = ctypes.WINFUNCTYPE(
948 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
949 ("GetStdHandle", ctypes.windll.kernel32))
950 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
951
952 WriteConsoleW = ctypes.WINFUNCTYPE(
953 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
954 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
955 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
956 written = ctypes.wintypes.DWORD(0)
957
958 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
959 FILE_TYPE_CHAR = 0x0002
960 FILE_TYPE_REMOTE = 0x8000
961 GetConsoleMode = ctypes.WINFUNCTYPE(
962 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
963 ctypes.POINTER(ctypes.wintypes.DWORD))(
964 ("GetConsoleMode", ctypes.windll.kernel32))
965 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
966
967 def not_a_console(handle):
968 if handle == INVALID_HANDLE_VALUE or handle is None:
969 return True
970 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
971 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
972
973 if not_a_console(h):
974 return False
975
976 def next_nonbmp_pos(s):
977 try:
978 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
979 except StopIteration:
980 return len(s)
981
982 while s:
983 count = min(next_nonbmp_pos(s), 1024)
984
985 ret = WriteConsoleW(
986 h, s, count if count else 2, ctypes.byref(written), None)
987 if ret == 0:
988 raise OSError('Failed to write string')
989 if not count: # We just wrote a non-BMP character
990 assert written.value == 2
991 s = s[1:]
992 else:
993 assert written.value > 0
994 s = s[written.value:]
995 return True
996
997
998 def write_string(s, out=None, encoding=None):
999 if out is None:
1000 out = sys.stderr
1001 assert type(s) == compat_str
1002
1003 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1004 if _windows_write_string(s, out):
1005 return
1006
1007 if ('b' in getattr(out, 'mode', '') or
1008 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
1009 byt = s.encode(encoding or preferredencoding(), 'ignore')
1010 out.write(byt)
1011 elif hasattr(out, 'buffer'):
1012 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1013 byt = s.encode(enc, 'ignore')
1014 out.buffer.write(byt)
1015 else:
1016 out.write(s)
1017 out.flush()
1018
1019
1020 def bytes_to_intlist(bs):
1021 if not bs:
1022 return []
1023 if isinstance(bs[0], int): # Python 3
1024 return list(bs)
1025 else:
1026 return [ord(c) for c in bs]
1027
1028
1029 def intlist_to_bytes(xs):
1030 if not xs:
1031 return b''
1032 if isinstance(chr(0), bytes): # Python 2
1033 return ''.join([chr(x) for x in xs])
1034 else:
1035 return bytes(xs)
1036
1037
1038 def get_cachedir(params={}):
1039 cache_root = os.environ.get('XDG_CACHE_HOME',
1040 os.path.expanduser('~/.cache'))
1041 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
1042
1043
1044 # Cross-platform file locking
1045 if sys.platform == 'win32':
1046 import ctypes.wintypes
1047 import msvcrt
1048
1049 class OVERLAPPED(ctypes.Structure):
1050 _fields_ = [
1051 ('Internal', ctypes.wintypes.LPVOID),
1052 ('InternalHigh', ctypes.wintypes.LPVOID),
1053 ('Offset', ctypes.wintypes.DWORD),
1054 ('OffsetHigh', ctypes.wintypes.DWORD),
1055 ('hEvent', ctypes.wintypes.HANDLE),
1056 ]
1057
1058 kernel32 = ctypes.windll.kernel32
1059 LockFileEx = kernel32.LockFileEx
1060 LockFileEx.argtypes = [
1061 ctypes.wintypes.HANDLE, # hFile
1062 ctypes.wintypes.DWORD, # dwFlags
1063 ctypes.wintypes.DWORD, # dwReserved
1064 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1065 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1066 ctypes.POINTER(OVERLAPPED) # Overlapped
1067 ]
1068 LockFileEx.restype = ctypes.wintypes.BOOL
1069 UnlockFileEx = kernel32.UnlockFileEx
1070 UnlockFileEx.argtypes = [
1071 ctypes.wintypes.HANDLE, # hFile
1072 ctypes.wintypes.DWORD, # dwReserved
1073 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1074 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1075 ctypes.POINTER(OVERLAPPED) # Overlapped
1076 ]
1077 UnlockFileEx.restype = ctypes.wintypes.BOOL
1078 whole_low = 0xffffffff
1079 whole_high = 0x7fffffff
1080
1081 def _lock_file(f, exclusive):
1082 overlapped = OVERLAPPED()
1083 overlapped.Offset = 0
1084 overlapped.OffsetHigh = 0
1085 overlapped.hEvent = 0
1086 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1087 handle = msvcrt.get_osfhandle(f.fileno())
1088 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1089 whole_low, whole_high, f._lock_file_overlapped_p):
1090 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1091
1092 def _unlock_file(f):
1093 assert f._lock_file_overlapped_p
1094 handle = msvcrt.get_osfhandle(f.fileno())
1095 if not UnlockFileEx(handle, 0,
1096 whole_low, whole_high, f._lock_file_overlapped_p):
1097 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1098
1099 else:
1100 import fcntl
1101
1102 def _lock_file(f, exclusive):
1103 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
1104
1105 def _unlock_file(f):
1106 fcntl.lockf(f, fcntl.LOCK_UN)
1107
1108
1109 class locked_file(object):
1110 def __init__(self, filename, mode, encoding=None):
1111 assert mode in ['r', 'a', 'w']
1112 self.f = io.open(filename, mode, encoding=encoding)
1113 self.mode = mode
1114
1115 def __enter__(self):
1116 exclusive = self.mode != 'r'
1117 try:
1118 _lock_file(self.f, exclusive)
1119 except IOError:
1120 self.f.close()
1121 raise
1122 return self
1123
1124 def __exit__(self, etype, value, traceback):
1125 try:
1126 _unlock_file(self.f)
1127 finally:
1128 self.f.close()
1129
1130 def __iter__(self):
1131 return iter(self.f)
1132
1133 def write(self, *args):
1134 return self.f.write(*args)
1135
1136 def read(self, *args):
1137 return self.f.read(*args)
1138
1139
1140 def shell_quote(args):
1141 quoted_args = []
1142 encoding = sys.getfilesystemencoding()
1143 if encoding is None:
1144 encoding = 'utf-8'
1145 for a in args:
1146 if isinstance(a, bytes):
1147 # We may get a filename encoded with 'encodeFilename'
1148 a = a.decode(encoding)
1149 quoted_args.append(pipes.quote(a))
1150 return u' '.join(quoted_args)
1151
1152
1153 def takewhile_inclusive(pred, seq):
1154 """ Like itertools.takewhile, but include the latest evaluated element
1155 (the first element so that Not pred(e)) """
1156 for e in seq:
1157 yield e
1158 if not pred(e):
1159 return
1160
1161
1162 def smuggle_url(url, data):
1163 """ Pass additional data in a URL for internal use. """
1164
1165 sdata = compat_urllib_parse.urlencode(
1166 {u'__youtubedl_smuggle': json.dumps(data)})
1167 return url + u'#' + sdata
1168
1169
1170 def unsmuggle_url(smug_url, default=None):
1171 if not '#__youtubedl_smuggle' in smug_url:
1172 return smug_url, default
1173 url, _, sdata = smug_url.rpartition(u'#')
1174 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1175 data = json.loads(jsond)
1176 return url, data
1177
1178
1179 def format_bytes(bytes):
1180 if bytes is None:
1181 return u'N/A'
1182 if type(bytes) is str:
1183 bytes = float(bytes)
1184 if bytes == 0.0:
1185 exponent = 0
1186 else:
1187 exponent = int(math.log(bytes, 1024.0))
1188 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1189 converted = float(bytes) / float(1024 ** exponent)
1190 return u'%.2f%s' % (converted, suffix)
1191
1192
1193 def str_to_int(int_str):
1194 int_str = re.sub(r'[,\.]', u'', int_str)
1195 return int(int_str)
1196
1197
1198 def get_term_width():
1199 columns = os.environ.get('COLUMNS', None)
1200 if columns:
1201 return int(columns)
1202
1203 try:
1204 sp = subprocess.Popen(
1205 ['stty', 'size'],
1206 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1207 out, err = sp.communicate()
1208 return int(out.split()[1])
1209 except:
1210 pass
1211 return None
1212
1213
1214 def month_by_name(name):
1215 """ Return the number of a month by (locale-independently) English name """
1216
1217 ENGLISH_NAMES = [
1218 u'January', u'February', u'March', u'April', u'May', u'June',
1219 u'July', u'August', u'September', u'October', u'November', u'December']
1220 try:
1221 return ENGLISH_NAMES.index(name) + 1
1222 except ValueError:
1223 return None
1224
1225
1226 def fix_xml_ampersands(xml_str):
1227 """Replace all the '&' by '&amp;' in XML"""
1228 return re.sub(
1229 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1230 u'&amp;',
1231 xml_str)
1232
1233
1234 def setproctitle(title):
1235 assert isinstance(title, compat_str)
1236 try:
1237 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1238 except OSError:
1239 return
1240 title_bytes = title.encode('utf-8')
1241 buf = ctypes.create_string_buffer(len(title_bytes))
1242 buf.value = title_bytes
1243 try:
1244 libc.prctl(15, buf, 0, 0, 0)
1245 except AttributeError:
1246 return # Strange libc, just skip this
1247
1248
1249 def remove_start(s, start):
1250 if s.startswith(start):
1251 return s[len(start):]
1252 return s
1253
1254
1255 def url_basename(url):
1256 path = compat_urlparse.urlparse(url).path
1257 return path.strip(u'/').split(u'/')[-1]
1258
1259
1260 class HEADRequest(compat_urllib_request.Request):
1261 def get_method(self):
1262 return "HEAD"
1263
1264
1265 def int_or_none(v, scale=1, default=None, get_attr=None):
1266 if get_attr:
1267 if v is not None:
1268 v = getattr(v, get_attr, None)
1269 return default if v is None else (int(v) // scale)
1270
1271
1272 def float_or_none(v, scale=1, default=None):
1273 return default if v is None else (float(v) / scale)
1274
1275
1276 def parse_duration(s):
1277 if s is None:
1278 return None
1279
1280 m = re.match(
1281 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
1282 if not m:
1283 return None
1284 res = int(m.group('secs'))
1285 if m.group('mins'):
1286 res += int(m.group('mins')) * 60
1287 if m.group('hours'):
1288 res += int(m.group('hours')) * 60 * 60
1289 return res
1290
1291
1292 def prepend_extension(filename, ext):
1293 name, real_ext = os.path.splitext(filename)
1294 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1295
1296
1297 def check_executable(exe, args=[]):
1298 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1299 args can be a list of arguments for a short output (like -version) """
1300 try:
1301 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1302 except OSError:
1303 return False
1304 return exe
1305
1306
1307 class PagedList(object):
1308 def __init__(self, pagefunc, pagesize):
1309 self._pagefunc = pagefunc
1310 self._pagesize = pagesize
1311
1312 def __len__(self):
1313 # This is only useful for tests
1314 return len(self.getslice())
1315
1316 def getslice(self, start=0, end=None):
1317 res = []
1318 for pagenum in itertools.count(start // self._pagesize):
1319 firstid = pagenum * self._pagesize
1320 nextfirstid = pagenum * self._pagesize + self._pagesize
1321 if start >= nextfirstid:
1322 continue
1323
1324 page_results = list(self._pagefunc(pagenum))
1325
1326 startv = (
1327 start % self._pagesize
1328 if firstid <= start < nextfirstid
1329 else 0)
1330
1331 endv = (
1332 ((end - 1) % self._pagesize) + 1
1333 if (end is not None and firstid <= end <= nextfirstid)
1334 else None)
1335
1336 if startv != 0 or endv is not None:
1337 page_results = page_results[startv:endv]
1338 res.extend(page_results)
1339
1340 # A little optimization - if current page is not "full", ie. does
1341 # not contain page_size videos then we can assume that this page
1342 # is the last one - there are no more ids on further pages -
1343 # i.e. no need to query again.
1344 if len(page_results) + startv < self._pagesize:
1345 break
1346
1347 # If we got the whole page, but the next page is not interesting,
1348 # break out early as well
1349 if end == nextfirstid:
1350 break
1351 return res
1352
1353
1354 def uppercase_escape(s):
1355 unicode_escape = codecs.getdecoder('unicode_escape')
1356 return re.sub(
1357 r'\\U[0-9a-fA-F]{8}',
1358 lambda m: unicode_escape(m.group(0))[0],
1359 s)
1360
1361 try:
1362 struct.pack(u'!I', 0)
1363 except TypeError:
1364 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1365 def struct_pack(spec, *args):
1366 if isinstance(spec, compat_str):
1367 spec = spec.encode('ascii')
1368 return struct.pack(spec, *args)
1369
1370 def struct_unpack(spec, *args):
1371 if isinstance(spec, compat_str):
1372 spec = spec.encode('ascii')
1373 return struct.unpack(spec, *args)
1374 else:
1375 struct_pack = struct.pack
1376 struct_unpack = struct.unpack
1377
1378
1379 def read_batch_urls(batch_fd):
1380 def fixup(url):
1381 if not isinstance(url, compat_str):
1382 url = url.decode('utf-8', 'replace')
1383 BOM_UTF8 = u'\xef\xbb\xbf'
1384 if url.startswith(BOM_UTF8):
1385 url = url[len(BOM_UTF8):]
1386 url = url.strip()
1387 if url.startswith(('#', ';', ']')):
1388 return False
1389 return url
1390
1391 with contextlib.closing(batch_fd) as fd:
1392 return [url for url in map(fixup, fd) if url]
1393
1394
1395 def urlencode_postdata(*args, **kargs):
1396 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1397
1398
1399 def parse_xml(s):
1400 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1401 def doctype(self, name, pubid, system):
1402 pass # Ignore doctypes
1403
1404 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1405 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1406 return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1407
1408
1409 if sys.version_info < (3, 0) and sys.platform == 'win32':
1410 def compat_getpass(prompt, *args, **kwargs):
1411 if isinstance(prompt, compat_str):
1412 prompt = prompt.encode(preferredencoding())
1413 return getpass.getpass(prompt, *args, **kwargs)
1414 else:
1415 compat_getpass = getpass.getpass
1416
1417
1418 US_RATINGS = {
1419 'G': 0,
1420 'PG': 10,
1421 'PG-13': 13,
1422 'R': 16,
1423 'NC': 18,
1424 }
1425
1426
1427 def strip_jsonp(code):
1428 return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
1429
1430
1431 def qualities(quality_ids):
1432 """ Get a numeric quality value out of a list of possible values """
1433 def q(qid):
1434 try:
1435 return quality_ids.index(qid)
1436 except ValueError:
1437 return -1
1438 return q
1439
1440
1441 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1442
1443 try:
1444 subprocess_check_output = subprocess.check_output
1445 except AttributeError:
1446 def subprocess_check_output(*args, **kwargs):
1447 assert 'input' not in kwargs
1448 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
1449 output, _ = p.communicate()
1450 ret = p.poll()
1451 if ret:
1452 raise subprocess.CalledProcessError(ret, p.args, output=output)
1453 return output