]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/utils.py
e40b367c255719046bf2d5dd2fd63bc6bb2e4d8a
[youtubedl] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import calendar
5 import codecs
6 import contextlib
7 import ctypes
8 import datetime
9 import email.utils
10 import errno
11 import getpass
12 import gzip
13 import itertools
14 import io
15 import json
16 import locale
17 import math
18 import os
19 import pipes
20 import platform
21 import re
22 import ssl
23 import socket
24 import struct
25 import subprocess
26 import sys
27 import traceback
28 import xml.etree.ElementTree
29 import zlib
30
31 try:
32 import urllib.request as compat_urllib_request
33 except ImportError: # Python 2
34 import urllib2 as compat_urllib_request
35
36 try:
37 import urllib.error as compat_urllib_error
38 except ImportError: # Python 2
39 import urllib2 as compat_urllib_error
40
41 try:
42 import urllib.parse as compat_urllib_parse
43 except ImportError: # Python 2
44 import urllib as compat_urllib_parse
45
46 try:
47 from urllib.parse import urlparse as compat_urllib_parse_urlparse
48 except ImportError: # Python 2
49 from urlparse import urlparse as compat_urllib_parse_urlparse
50
51 try:
52 import urllib.parse as compat_urlparse
53 except ImportError: # Python 2
54 import urlparse as compat_urlparse
55
56 try:
57 import http.cookiejar as compat_cookiejar
58 except ImportError: # Python 2
59 import cookielib as compat_cookiejar
60
61 try:
62 import html.entities as compat_html_entities
63 except ImportError: # Python 2
64 import htmlentitydefs as compat_html_entities
65
66 try:
67 import html.parser as compat_html_parser
68 except ImportError: # Python 2
69 import HTMLParser as compat_html_parser
70
71 try:
72 import http.client as compat_http_client
73 except ImportError: # Python 2
74 import httplib as compat_http_client
75
76 try:
77 from urllib.error import HTTPError as compat_HTTPError
78 except ImportError: # Python 2
79 from urllib2 import HTTPError as compat_HTTPError
80
81 try:
82 from urllib.request import urlretrieve as compat_urlretrieve
83 except ImportError: # Python 2
84 from urllib import urlretrieve as compat_urlretrieve
85
86
87 try:
88 from subprocess import DEVNULL
89 compat_subprocess_get_DEVNULL = lambda: DEVNULL
90 except ImportError:
91 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
92
93 try:
94 from urllib.parse import unquote as compat_urllib_parse_unquote
95 except ImportError:
96 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
97 if string == '':
98 return string
99 res = string.split('%')
100 if len(res) == 1:
101 return string
102 if encoding is None:
103 encoding = 'utf-8'
104 if errors is None:
105 errors = 'replace'
106 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
107 pct_sequence = b''
108 string = res[0]
109 for item in res[1:]:
110 try:
111 if not item:
112 raise ValueError
113 pct_sequence += item[:2].decode('hex')
114 rest = item[2:]
115 if not rest:
116 # This segment was just a single percent-encoded character.
117 # May be part of a sequence of code units, so delay decoding.
118 # (Stored in pct_sequence).
119 continue
120 except ValueError:
121 rest = '%' + item
122 # Encountered non-percent-encoded characters. Flush the current
123 # pct_sequence.
124 string += pct_sequence.decode(encoding, errors) + rest
125 pct_sequence = b''
126 if pct_sequence:
127 # Flush the final pct_sequence
128 string += pct_sequence.decode(encoding, errors)
129 return string
130
131
132 try:
133 from urllib.parse import parse_qs as compat_parse_qs
134 except ImportError: # Python 2
135 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
136 # Python 2's version is apparently totally broken
137
138 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
139 encoding='utf-8', errors='replace'):
140 qs, _coerce_result = qs, unicode
141 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
142 r = []
143 for name_value in pairs:
144 if not name_value and not strict_parsing:
145 continue
146 nv = name_value.split('=', 1)
147 if len(nv) != 2:
148 if strict_parsing:
149 raise ValueError("bad query field: %r" % (name_value,))
150 # Handle case of a control-name with no equal sign
151 if keep_blank_values:
152 nv.append('')
153 else:
154 continue
155 if len(nv[1]) or keep_blank_values:
156 name = nv[0].replace('+', ' ')
157 name = compat_urllib_parse_unquote(
158 name, encoding=encoding, errors=errors)
159 name = _coerce_result(name)
160 value = nv[1].replace('+', ' ')
161 value = compat_urllib_parse_unquote(
162 value, encoding=encoding, errors=errors)
163 value = _coerce_result(value)
164 r.append((name, value))
165 return r
166
167 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
168 encoding='utf-8', errors='replace'):
169 parsed_result = {}
170 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
171 encoding=encoding, errors=errors)
172 for name, value in pairs:
173 if name in parsed_result:
174 parsed_result[name].append(value)
175 else:
176 parsed_result[name] = [value]
177 return parsed_result
178
179 try:
180 compat_str = unicode # Python 2
181 except NameError:
182 compat_str = str
183
184 try:
185 compat_chr = unichr # Python 2
186 except NameError:
187 compat_chr = chr
188
189 try:
190 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
191 except ImportError: # Python 2.6
192 from xml.parsers.expat import ExpatError as compat_xml_parse_error
193
194 def compat_ord(c):
195 if type(c) is int: return c
196 else: return ord(c)
197
198 # This is not clearly defined otherwise
199 compiled_regex_type = type(re.compile(''))
200
201 std_headers = {
202 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
203 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
204 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
205 'Accept-Encoding': 'gzip, deflate',
206 'Accept-Language': 'en-us,en;q=0.5',
207 }
208
209 def preferredencoding():
210 """Get preferred encoding.
211
212 Returns the best encoding scheme for the system, based on
213 locale.getpreferredencoding() and some further tweaks.
214 """
215 try:
216 pref = locale.getpreferredencoding()
217 u'TEST'.encode(pref)
218 except:
219 pref = 'UTF-8'
220
221 return pref
222
223 if sys.version_info < (3,0):
224 def compat_print(s):
225 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
226 else:
227 def compat_print(s):
228 assert type(s) == type(u'')
229 print(s)
230
231 # In Python 2.x, json.dump expects a bytestream.
232 # In Python 3.x, it writes to a character stream
233 if sys.version_info < (3,0):
234 def write_json_file(obj, fn):
235 with open(fn, 'wb') as f:
236 json.dump(obj, f)
237 else:
238 def write_json_file(obj, fn):
239 with open(fn, 'w', encoding='utf-8') as f:
240 json.dump(obj, f)
241
242 if sys.version_info >= (2,7):
243 def find_xpath_attr(node, xpath, key, val):
244 """ Find the xpath xpath[@key=val] """
245 assert re.match(r'^[a-zA-Z-]+$', key)
246 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
247 expr = xpath + u"[@%s='%s']" % (key, val)
248 return node.find(expr)
249 else:
250 def find_xpath_attr(node, xpath, key, val):
251 for f in node.findall(xpath):
252 if f.attrib.get(key) == val:
253 return f
254 return None
255
256 # On python2.6 the xml.etree.ElementTree.Element methods don't support
257 # the namespace parameter
258 def xpath_with_ns(path, ns_map):
259 components = [c.split(':') for c in path.split('/')]
260 replaced = []
261 for c in components:
262 if len(c) == 1:
263 replaced.append(c[0])
264 else:
265 ns, tag = c
266 replaced.append('{%s}%s' % (ns_map[ns], tag))
267 return '/'.join(replaced)
268
269 def htmlentity_transform(matchobj):
270 """Transforms an HTML entity to a character.
271
272 This function receives a match object and is intended to be used with
273 the re.sub() function.
274 """
275 entity = matchobj.group(1)
276
277 # Known non-numeric HTML entity
278 if entity in compat_html_entities.name2codepoint:
279 return compat_chr(compat_html_entities.name2codepoint[entity])
280
281 mobj = re.match(u'(?u)#(x?\\d+)', entity)
282 if mobj is not None:
283 numstr = mobj.group(1)
284 if numstr.startswith(u'x'):
285 base = 16
286 numstr = u'0%s' % numstr
287 else:
288 base = 10
289 return compat_chr(int(numstr, base))
290
291 # Unknown entity in name, return its literal representation
292 return (u'&%s;' % entity)
293
294 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
295 class BaseHTMLParser(compat_html_parser.HTMLParser):
296 def __init(self):
297 compat_html_parser.HTMLParser.__init__(self)
298 self.html = None
299
300 def loads(self, html):
301 self.html = html
302 self.feed(html)
303 self.close()
304
305 class AttrParser(BaseHTMLParser):
306 """Modified HTMLParser that isolates a tag with the specified attribute"""
307 def __init__(self, attribute, value):
308 self.attribute = attribute
309 self.value = value
310 self.result = None
311 self.started = False
312 self.depth = {}
313 self.watch_startpos = False
314 self.error_count = 0
315 BaseHTMLParser.__init__(self)
316
317 def error(self, message):
318 if self.error_count > 10 or self.started:
319 raise compat_html_parser.HTMLParseError(message, self.getpos())
320 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
321 self.error_count += 1
322 self.goahead(1)
323
324 def handle_starttag(self, tag, attrs):
325 attrs = dict(attrs)
326 if self.started:
327 self.find_startpos(None)
328 if self.attribute in attrs and attrs[self.attribute] == self.value:
329 self.result = [tag]
330 self.started = True
331 self.watch_startpos = True
332 if self.started:
333 if not tag in self.depth: self.depth[tag] = 0
334 self.depth[tag] += 1
335
336 def handle_endtag(self, tag):
337 if self.started:
338 if tag in self.depth: self.depth[tag] -= 1
339 if self.depth[self.result[0]] == 0:
340 self.started = False
341 self.result.append(self.getpos())
342
343 def find_startpos(self, x):
344 """Needed to put the start position of the result (self.result[1])
345 after the opening tag with the requested id"""
346 if self.watch_startpos:
347 self.watch_startpos = False
348 self.result.append(self.getpos())
349 handle_entityref = handle_charref = handle_data = handle_comment = \
350 handle_decl = handle_pi = unknown_decl = find_startpos
351
352 def get_result(self):
353 if self.result is None:
354 return None
355 if len(self.result) != 3:
356 return None
357 lines = self.html.split('\n')
358 lines = lines[self.result[1][0]-1:self.result[2][0]]
359 lines[0] = lines[0][self.result[1][1]:]
360 if len(lines) == 1:
361 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
362 lines[-1] = lines[-1][:self.result[2][1]]
363 return '\n'.join(lines).strip()
364 # Hack for https://github.com/rg3/youtube-dl/issues/662
365 if sys.version_info < (2, 7, 3):
366 AttrParser.parse_endtag = (lambda self, i:
367 i + len("</scr'+'ipt>")
368 if self.rawdata[i:].startswith("</scr'+'ipt>")
369 else compat_html_parser.HTMLParser.parse_endtag(self, i))
370
371 def get_element_by_id(id, html):
372 """Return the content of the tag with the specified ID in the passed HTML document"""
373 return get_element_by_attribute("id", id, html)
374
375 def get_element_by_attribute(attribute, value, html):
376 """Return the content of the tag with the specified attribute in the passed HTML document"""
377 parser = AttrParser(attribute, value)
378 try:
379 parser.loads(html)
380 except compat_html_parser.HTMLParseError:
381 pass
382 return parser.get_result()
383
384 class MetaParser(BaseHTMLParser):
385 """
386 Modified HTMLParser that isolates a meta tag with the specified name
387 attribute.
388 """
389 def __init__(self, name):
390 BaseHTMLParser.__init__(self)
391 self.name = name
392 self.content = None
393 self.result = None
394
395 def handle_starttag(self, tag, attrs):
396 if tag != 'meta':
397 return
398 attrs = dict(attrs)
399 if attrs.get('name') == self.name:
400 self.result = attrs.get('content')
401
402 def get_result(self):
403 return self.result
404
405 def get_meta_content(name, html):
406 """
407 Return the content attribute from the meta tag with the given name attribute.
408 """
409 parser = MetaParser(name)
410 try:
411 parser.loads(html)
412 except compat_html_parser.HTMLParseError:
413 pass
414 return parser.get_result()
415
416
417 def clean_html(html):
418 """Clean an HTML snippet into a readable string"""
419 # Newline vs <br />
420 html = html.replace('\n', ' ')
421 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
422 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
423 # Strip html tags
424 html = re.sub('<.*?>', '', html)
425 # Replace html entities
426 html = unescapeHTML(html)
427 return html.strip()
428
429
430 def sanitize_open(filename, open_mode):
431 """Try to open the given filename, and slightly tweak it if this fails.
432
433 Attempts to open the given filename. If this fails, it tries to change
434 the filename slightly, step by step, until it's either able to open it
435 or it fails and raises a final exception, like the standard open()
436 function.
437
438 It returns the tuple (stream, definitive_file_name).
439 """
440 try:
441 if filename == u'-':
442 if sys.platform == 'win32':
443 import msvcrt
444 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
445 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
446 stream = open(encodeFilename(filename), open_mode)
447 return (stream, filename)
448 except (IOError, OSError) as err:
449 if err.errno in (errno.EACCES,):
450 raise
451
452 # In case of error, try to remove win32 forbidden chars
453 alt_filename = os.path.join(
454 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
455 for path_part in os.path.split(filename)
456 )
457 if alt_filename == filename:
458 raise
459 else:
460 # An exception here should be caught in the caller
461 stream = open(encodeFilename(filename), open_mode)
462 return (stream, alt_filename)
463
464
465 def timeconvert(timestr):
466 """Convert RFC 2822 defined time string into system timestamp"""
467 timestamp = None
468 timetuple = email.utils.parsedate_tz(timestr)
469 if timetuple is not None:
470 timestamp = email.utils.mktime_tz(timetuple)
471 return timestamp
472
473 def sanitize_filename(s, restricted=False, is_id=False):
474 """Sanitizes a string so it could be used as part of a filename.
475 If restricted is set, use a stricter subset of allowed characters.
476 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
477 """
478 def replace_insane(char):
479 if char == '?' or ord(char) < 32 or ord(char) == 127:
480 return ''
481 elif char == '"':
482 return '' if restricted else '\''
483 elif char == ':':
484 return '_-' if restricted else ' -'
485 elif char in '\\/|*<>':
486 return '_'
487 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
488 return '_'
489 if restricted and ord(char) > 127:
490 return '_'
491 return char
492
493 result = u''.join(map(replace_insane, s))
494 if not is_id:
495 while '__' in result:
496 result = result.replace('__', '_')
497 result = result.strip('_')
498 # Common case of "Foreign band name - English song title"
499 if restricted and result.startswith('-_'):
500 result = result[2:]
501 if not result:
502 result = '_'
503 return result
504
505 def orderedSet(iterable):
506 """ Remove all duplicates from the input iterable """
507 res = []
508 for el in iterable:
509 if el not in res:
510 res.append(el)
511 return res
512
513
514 def unescapeHTML(s):
515 if s is None:
516 return None
517 assert type(s) == compat_str
518
519 result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
520 return result
521
522
523 def encodeFilename(s, for_subprocess=False):
524 """
525 @param s The name of the file
526 """
527
528 assert type(s) == compat_str
529
530 # Python 3 has a Unicode API
531 if sys.version_info >= (3, 0):
532 return s
533
534 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
535 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
536 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
537 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
538 if not for_subprocess:
539 return s
540 else:
541 # For subprocess calls, encode with locale encoding
542 # Refer to http://stackoverflow.com/a/9951851/35070
543 encoding = preferredencoding()
544 else:
545 encoding = sys.getfilesystemencoding()
546 if encoding is None:
547 encoding = 'utf-8'
548 return s.encode(encoding, 'ignore')
549
550
551 def encodeArgument(s):
552 if not isinstance(s, compat_str):
553 # Legacy code that uses byte strings
554 # Uncomment the following line after fixing all post processors
555 #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
556 s = s.decode('ascii')
557 return encodeFilename(s, True)
558
559
560 def decodeOption(optval):
561 if optval is None:
562 return optval
563 if isinstance(optval, bytes):
564 optval = optval.decode(preferredencoding())
565
566 assert isinstance(optval, compat_str)
567 return optval
568
569 def formatSeconds(secs):
570 if secs > 3600:
571 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
572 elif secs > 60:
573 return '%d:%02d' % (secs // 60, secs % 60)
574 else:
575 return '%d' % secs
576
577
578 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
579 if sys.version_info < (3, 2):
580 import httplib
581
582 class HTTPSConnectionV3(httplib.HTTPSConnection):
583 def __init__(self, *args, **kwargs):
584 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
585
586 def connect(self):
587 sock = socket.create_connection((self.host, self.port), self.timeout)
588 if getattr(self, '_tunnel_host', False):
589 self.sock = sock
590 self._tunnel()
591 try:
592 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
593 except ssl.SSLError:
594 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
595
596 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
597 def https_open(self, req):
598 return self.do_open(HTTPSConnectionV3, req)
599 return HTTPSHandlerV3(**kwargs)
600 else:
601 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
602 context.verify_mode = (ssl.CERT_NONE
603 if opts_no_check_certificate
604 else ssl.CERT_REQUIRED)
605 context.set_default_verify_paths()
606 try:
607 context.load_default_certs()
608 except AttributeError:
609 pass # Python < 3.4
610 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
611
612 class ExtractorError(Exception):
613 """Error during info extraction."""
614 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
615 """ tb, if given, is the original traceback (so that it can be printed out).
616 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
617 """
618
619 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
620 expected = True
621 if video_id is not None:
622 msg = video_id + ': ' + msg
623 if not expected:
624 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
625 super(ExtractorError, self).__init__(msg)
626
627 self.traceback = tb
628 self.exc_info = sys.exc_info() # preserve original exception
629 self.cause = cause
630 self.video_id = video_id
631
632 def format_traceback(self):
633 if self.traceback is None:
634 return None
635 return u''.join(traceback.format_tb(self.traceback))
636
637
638 class RegexNotFoundError(ExtractorError):
639 """Error when a regex didn't match"""
640 pass
641
642
643 class DownloadError(Exception):
644 """Download Error exception.
645
646 This exception may be thrown by FileDownloader objects if they are not
647 configured to continue on errors. They will contain the appropriate
648 error message.
649 """
650 def __init__(self, msg, exc_info=None):
651 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
652 super(DownloadError, self).__init__(msg)
653 self.exc_info = exc_info
654
655
656 class SameFileError(Exception):
657 """Same File exception.
658
659 This exception will be thrown by FileDownloader objects if they detect
660 multiple files would have to be downloaded to the same file on disk.
661 """
662 pass
663
664
665 class PostProcessingError(Exception):
666 """Post Processing exception.
667
668 This exception may be raised by PostProcessor's .run() method to
669 indicate an error in the postprocessing task.
670 """
671 def __init__(self, msg):
672 self.msg = msg
673
674 class MaxDownloadsReached(Exception):
675 """ --max-downloads limit has been reached. """
676 pass
677
678
679 class UnavailableVideoError(Exception):
680 """Unavailable Format exception.
681
682 This exception will be thrown when a video is requested
683 in a format that is not available for that video.
684 """
685 pass
686
687
688 class ContentTooShortError(Exception):
689 """Content Too Short exception.
690
691 This exception may be raised by FileDownloader objects when a file they
692 download is too small for what the server announced first, indicating
693 the connection was probably interrupted.
694 """
695 # Both in bytes
696 downloaded = None
697 expected = None
698
699 def __init__(self, downloaded, expected):
700 self.downloaded = downloaded
701 self.expected = expected
702
703 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
704 """Handler for HTTP requests and responses.
705
706 This class, when installed with an OpenerDirector, automatically adds
707 the standard headers to every HTTP request and handles gzipped and
708 deflated responses from web servers. If compression is to be avoided in
709 a particular request, the original request in the program code only has
710 to include the HTTP header "Youtubedl-No-Compression", which will be
711 removed before making the real request.
712
713 Part of this code was copied from:
714
715 http://techknack.net/python-urllib2-handlers/
716
717 Andrew Rowls, the author of that code, agreed to release it to the
718 public domain.
719 """
720
721 @staticmethod
722 def deflate(data):
723 try:
724 return zlib.decompress(data, -zlib.MAX_WBITS)
725 except zlib.error:
726 return zlib.decompress(data)
727
728 @staticmethod
729 def addinfourl_wrapper(stream, headers, url, code):
730 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
731 return compat_urllib_request.addinfourl(stream, headers, url, code)
732 ret = compat_urllib_request.addinfourl(stream, headers, url)
733 ret.code = code
734 return ret
735
736 def http_request(self, req):
737 for h,v in std_headers.items():
738 if h in req.headers:
739 del req.headers[h]
740 req.add_header(h, v)
741 if 'Youtubedl-no-compression' in req.headers:
742 if 'Accept-encoding' in req.headers:
743 del req.headers['Accept-encoding']
744 del req.headers['Youtubedl-no-compression']
745 if 'Youtubedl-user-agent' in req.headers:
746 if 'User-agent' in req.headers:
747 del req.headers['User-agent']
748 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
749 del req.headers['Youtubedl-user-agent']
750 return req
751
752 def http_response(self, req, resp):
753 old_resp = resp
754 # gzip
755 if resp.headers.get('Content-encoding', '') == 'gzip':
756 content = resp.read()
757 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
758 try:
759 uncompressed = io.BytesIO(gz.read())
760 except IOError as original_ioerror:
761 # There may be junk add the end of the file
762 # See http://stackoverflow.com/q/4928560/35070 for details
763 for i in range(1, 1024):
764 try:
765 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
766 uncompressed = io.BytesIO(gz.read())
767 except IOError:
768 continue
769 break
770 else:
771 raise original_ioerror
772 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
773 resp.msg = old_resp.msg
774 # deflate
775 if resp.headers.get('Content-encoding', '') == 'deflate':
776 gz = io.BytesIO(self.deflate(resp.read()))
777 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
778 resp.msg = old_resp.msg
779 return resp
780
781 https_request = http_request
782 https_response = http_response
783
784
785 def parse_iso8601(date_str, delimiter='T'):
786 """ Return a UNIX timestamp from the given date """
787
788 if date_str is None:
789 return None
790
791 m = re.search(
792 r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
793 date_str)
794 if not m:
795 timezone = datetime.timedelta()
796 else:
797 date_str = date_str[:-len(m.group(0))]
798 if not m.group('sign'):
799 timezone = datetime.timedelta()
800 else:
801 sign = 1 if m.group('sign') == '+' else -1
802 timezone = datetime.timedelta(
803 hours=sign * int(m.group('hours')),
804 minutes=sign * int(m.group('minutes')))
805 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
806 dt = datetime.datetime.strptime(date_str, date_format) - timezone
807 return calendar.timegm(dt.timetuple())
808
809
810 def unified_strdate(date_str):
811 """Return a string with the date in the format YYYYMMDD"""
812
813 if date_str is None:
814 return None
815
816 upload_date = None
817 #Replace commas
818 date_str = date_str.replace(',', ' ')
819 # %z (UTC offset) is only supported in python>=3.2
820 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
821 format_expressions = [
822 '%d %B %Y',
823 '%d %b %Y',
824 '%B %d %Y',
825 '%b %d %Y',
826 '%b %dst %Y %I:%M%p',
827 '%b %dnd %Y %I:%M%p',
828 '%b %dth %Y %I:%M%p',
829 '%Y-%m-%d',
830 '%d.%m.%Y',
831 '%d/%m/%Y',
832 '%Y/%m/%d %H:%M:%S',
833 '%Y-%m-%d %H:%M:%S',
834 '%d.%m.%Y %H:%M',
835 '%d.%m.%Y %H.%M',
836 '%Y-%m-%dT%H:%M:%SZ',
837 '%Y-%m-%dT%H:%M:%S.%fZ',
838 '%Y-%m-%dT%H:%M:%S.%f0Z',
839 '%Y-%m-%dT%H:%M:%S',
840 '%Y-%m-%dT%H:%M:%S.%f',
841 '%Y-%m-%dT%H:%M',
842 ]
843 for expression in format_expressions:
844 try:
845 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
846 except ValueError:
847 pass
848 if upload_date is None:
849 timetuple = email.utils.parsedate_tz(date_str)
850 if timetuple:
851 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
852 return upload_date
853
854 def determine_ext(url, default_ext=u'unknown_video'):
855 if url is None:
856 return default_ext
857 guess = url.partition(u'?')[0].rpartition(u'.')[2]
858 if re.match(r'^[A-Za-z0-9]+$', guess):
859 return guess
860 else:
861 return default_ext
862
863 def subtitles_filename(filename, sub_lang, sub_format):
864 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
865
866 def date_from_str(date_str):
867 """
868 Return a datetime object from a string in the format YYYYMMDD or
869 (now|today)[+-][0-9](day|week|month|year)(s)?"""
870 today = datetime.date.today()
871 if date_str == 'now'or date_str == 'today':
872 return today
873 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
874 if match is not None:
875 sign = match.group('sign')
876 time = int(match.group('time'))
877 if sign == '-':
878 time = -time
879 unit = match.group('unit')
880 #A bad aproximation?
881 if unit == 'month':
882 unit = 'day'
883 time *= 30
884 elif unit == 'year':
885 unit = 'day'
886 time *= 365
887 unit += 's'
888 delta = datetime.timedelta(**{unit: time})
889 return today + delta
890 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
891
892 def hyphenate_date(date_str):
893 """
894 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
895 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
896 if match is not None:
897 return '-'.join(match.groups())
898 else:
899 return date_str
900
901 class DateRange(object):
902 """Represents a time interval between two dates"""
903 def __init__(self, start=None, end=None):
904 """start and end must be strings in the format accepted by date"""
905 if start is not None:
906 self.start = date_from_str(start)
907 else:
908 self.start = datetime.datetime.min.date()
909 if end is not None:
910 self.end = date_from_str(end)
911 else:
912 self.end = datetime.datetime.max.date()
913 if self.start > self.end:
914 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
915 @classmethod
916 def day(cls, day):
917 """Returns a range that only contains the given day"""
918 return cls(day,day)
919 def __contains__(self, date):
920 """Check if the date is in the range"""
921 if not isinstance(date, datetime.date):
922 date = date_from_str(date)
923 return self.start <= date <= self.end
924 def __str__(self):
925 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
926
927
928 def platform_name():
929 """ Returns the platform name as a compat_str """
930 res = platform.platform()
931 if isinstance(res, bytes):
932 res = res.decode(preferredencoding())
933
934 assert isinstance(res, compat_str)
935 return res
936
937
938 def _windows_write_string(s, out):
939 """ Returns True if the string was written using special methods,
940 False if it has yet to be written out."""
941 # Adapted from http://stackoverflow.com/a/3259271/35070
942
943 import ctypes
944 import ctypes.wintypes
945
946 WIN_OUTPUT_IDS = {
947 1: -11,
948 2: -12,
949 }
950
951 try:
952 fileno = out.fileno()
953 except AttributeError:
954 # If the output stream doesn't have a fileno, it's virtual
955 return False
956 if fileno not in WIN_OUTPUT_IDS:
957 return False
958
959 GetStdHandle = ctypes.WINFUNCTYPE(
960 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
961 ("GetStdHandle", ctypes.windll.kernel32))
962 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
963
964 WriteConsoleW = ctypes.WINFUNCTYPE(
965 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
966 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
967 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
968 written = ctypes.wintypes.DWORD(0)
969
970 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
971 FILE_TYPE_CHAR = 0x0002
972 FILE_TYPE_REMOTE = 0x8000
973 GetConsoleMode = ctypes.WINFUNCTYPE(
974 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
975 ctypes.POINTER(ctypes.wintypes.DWORD))(
976 ("GetConsoleMode", ctypes.windll.kernel32))
977 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
978
979 def not_a_console(handle):
980 if handle == INVALID_HANDLE_VALUE or handle is None:
981 return True
982 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
983 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
984
985 if not_a_console(h):
986 return False
987
988 def next_nonbmp_pos(s):
989 try:
990 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
991 except StopIteration:
992 return len(s)
993
994 while s:
995 count = min(next_nonbmp_pos(s), 1024)
996
997 ret = WriteConsoleW(
998 h, s, count if count else 2, ctypes.byref(written), None)
999 if ret == 0:
1000 raise OSError('Failed to write string')
1001 if not count: # We just wrote a non-BMP character
1002 assert written.value == 2
1003 s = s[1:]
1004 else:
1005 assert written.value > 0
1006 s = s[written.value:]
1007 return True
1008
1009
1010 def write_string(s, out=None, encoding=None):
1011 if out is None:
1012 out = sys.stderr
1013 assert type(s) == compat_str
1014
1015 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1016 if _windows_write_string(s, out):
1017 return
1018
1019 if ('b' in getattr(out, 'mode', '') or
1020 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
1021 byt = s.encode(encoding or preferredencoding(), 'ignore')
1022 out.write(byt)
1023 elif hasattr(out, 'buffer'):
1024 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1025 byt = s.encode(enc, 'ignore')
1026 out.buffer.write(byt)
1027 else:
1028 out.write(s)
1029 out.flush()
1030
1031
1032 def bytes_to_intlist(bs):
1033 if not bs:
1034 return []
1035 if isinstance(bs[0], int): # Python 3
1036 return list(bs)
1037 else:
1038 return [ord(c) for c in bs]
1039
1040
1041 def intlist_to_bytes(xs):
1042 if not xs:
1043 return b''
1044 if isinstance(chr(0), bytes): # Python 2
1045 return ''.join([chr(x) for x in xs])
1046 else:
1047 return bytes(xs)
1048
1049
1050 def get_cachedir(params={}):
1051 cache_root = os.environ.get('XDG_CACHE_HOME',
1052 os.path.expanduser('~/.cache'))
1053 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
1054
1055
1056 # Cross-platform file locking
1057 if sys.platform == 'win32':
1058 import ctypes.wintypes
1059 import msvcrt
1060
1061 class OVERLAPPED(ctypes.Structure):
1062 _fields_ = [
1063 ('Internal', ctypes.wintypes.LPVOID),
1064 ('InternalHigh', ctypes.wintypes.LPVOID),
1065 ('Offset', ctypes.wintypes.DWORD),
1066 ('OffsetHigh', ctypes.wintypes.DWORD),
1067 ('hEvent', ctypes.wintypes.HANDLE),
1068 ]
1069
1070 kernel32 = ctypes.windll.kernel32
1071 LockFileEx = kernel32.LockFileEx
1072 LockFileEx.argtypes = [
1073 ctypes.wintypes.HANDLE, # hFile
1074 ctypes.wintypes.DWORD, # dwFlags
1075 ctypes.wintypes.DWORD, # dwReserved
1076 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1077 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1078 ctypes.POINTER(OVERLAPPED) # Overlapped
1079 ]
1080 LockFileEx.restype = ctypes.wintypes.BOOL
1081 UnlockFileEx = kernel32.UnlockFileEx
1082 UnlockFileEx.argtypes = [
1083 ctypes.wintypes.HANDLE, # hFile
1084 ctypes.wintypes.DWORD, # dwReserved
1085 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1086 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1087 ctypes.POINTER(OVERLAPPED) # Overlapped
1088 ]
1089 UnlockFileEx.restype = ctypes.wintypes.BOOL
1090 whole_low = 0xffffffff
1091 whole_high = 0x7fffffff
1092
1093 def _lock_file(f, exclusive):
1094 overlapped = OVERLAPPED()
1095 overlapped.Offset = 0
1096 overlapped.OffsetHigh = 0
1097 overlapped.hEvent = 0
1098 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1099 handle = msvcrt.get_osfhandle(f.fileno())
1100 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1101 whole_low, whole_high, f._lock_file_overlapped_p):
1102 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1103
1104 def _unlock_file(f):
1105 assert f._lock_file_overlapped_p
1106 handle = msvcrt.get_osfhandle(f.fileno())
1107 if not UnlockFileEx(handle, 0,
1108 whole_low, whole_high, f._lock_file_overlapped_p):
1109 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1110
1111 else:
1112 import fcntl
1113
1114 def _lock_file(f, exclusive):
1115 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
1116
1117 def _unlock_file(f):
1118 fcntl.lockf(f, fcntl.LOCK_UN)
1119
1120
1121 class locked_file(object):
1122 def __init__(self, filename, mode, encoding=None):
1123 assert mode in ['r', 'a', 'w']
1124 self.f = io.open(filename, mode, encoding=encoding)
1125 self.mode = mode
1126
1127 def __enter__(self):
1128 exclusive = self.mode != 'r'
1129 try:
1130 _lock_file(self.f, exclusive)
1131 except IOError:
1132 self.f.close()
1133 raise
1134 return self
1135
1136 def __exit__(self, etype, value, traceback):
1137 try:
1138 _unlock_file(self.f)
1139 finally:
1140 self.f.close()
1141
1142 def __iter__(self):
1143 return iter(self.f)
1144
1145 def write(self, *args):
1146 return self.f.write(*args)
1147
1148 def read(self, *args):
1149 return self.f.read(*args)
1150
1151
1152 def shell_quote(args):
1153 quoted_args = []
1154 encoding = sys.getfilesystemencoding()
1155 if encoding is None:
1156 encoding = 'utf-8'
1157 for a in args:
1158 if isinstance(a, bytes):
1159 # We may get a filename encoded with 'encodeFilename'
1160 a = a.decode(encoding)
1161 quoted_args.append(pipes.quote(a))
1162 return u' '.join(quoted_args)
1163
1164
1165 def takewhile_inclusive(pred, seq):
1166 """ Like itertools.takewhile, but include the latest evaluated element
1167 (the first element so that Not pred(e)) """
1168 for e in seq:
1169 yield e
1170 if not pred(e):
1171 return
1172
1173
1174 def smuggle_url(url, data):
1175 """ Pass additional data in a URL for internal use. """
1176
1177 sdata = compat_urllib_parse.urlencode(
1178 {u'__youtubedl_smuggle': json.dumps(data)})
1179 return url + u'#' + sdata
1180
1181
1182 def unsmuggle_url(smug_url, default=None):
1183 if not '#__youtubedl_smuggle' in smug_url:
1184 return smug_url, default
1185 url, _, sdata = smug_url.rpartition(u'#')
1186 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1187 data = json.loads(jsond)
1188 return url, data
1189
1190
1191 def format_bytes(bytes):
1192 if bytes is None:
1193 return u'N/A'
1194 if type(bytes) is str:
1195 bytes = float(bytes)
1196 if bytes == 0.0:
1197 exponent = 0
1198 else:
1199 exponent = int(math.log(bytes, 1024.0))
1200 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1201 converted = float(bytes) / float(1024 ** exponent)
1202 return u'%.2f%s' % (converted, suffix)
1203
1204
1205 def get_term_width():
1206 columns = os.environ.get('COLUMNS', None)
1207 if columns:
1208 return int(columns)
1209
1210 try:
1211 sp = subprocess.Popen(
1212 ['stty', 'size'],
1213 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1214 out, err = sp.communicate()
1215 return int(out.split()[1])
1216 except:
1217 pass
1218 return None
1219
1220
1221 def month_by_name(name):
1222 """ Return the number of a month by (locale-independently) English name """
1223
1224 ENGLISH_NAMES = [
1225 u'January', u'February', u'March', u'April', u'May', u'June',
1226 u'July', u'August', u'September', u'October', u'November', u'December']
1227 try:
1228 return ENGLISH_NAMES.index(name) + 1
1229 except ValueError:
1230 return None
1231
1232
1233 def fix_xml_ampersands(xml_str):
1234 """Replace all the '&' by '&amp;' in XML"""
1235 return re.sub(
1236 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1237 u'&amp;',
1238 xml_str)
1239
1240
1241 def setproctitle(title):
1242 assert isinstance(title, compat_str)
1243 try:
1244 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1245 except OSError:
1246 return
1247 title_bytes = title.encode('utf-8')
1248 buf = ctypes.create_string_buffer(len(title_bytes))
1249 buf.value = title_bytes
1250 try:
1251 libc.prctl(15, buf, 0, 0, 0)
1252 except AttributeError:
1253 return # Strange libc, just skip this
1254
1255
1256 def remove_start(s, start):
1257 if s.startswith(start):
1258 return s[len(start):]
1259 return s
1260
1261
1262 def url_basename(url):
1263 path = compat_urlparse.urlparse(url).path
1264 return path.strip(u'/').split(u'/')[-1]
1265
1266
1267 class HEADRequest(compat_urllib_request.Request):
1268 def get_method(self):
1269 return "HEAD"
1270
1271
1272 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1273 if get_attr:
1274 if v is not None:
1275 v = getattr(v, get_attr, None)
1276 return default if v is None else (int(v) * invscale // scale)
1277
1278
1279 def str_to_int(int_str):
1280 if int_str is None:
1281 return None
1282 int_str = re.sub(r'[,\.]', u'', int_str)
1283 return int(int_str)
1284
1285
1286 def float_or_none(v, scale=1, invscale=1, default=None):
1287 return default if v is None else (float(v) * invscale / scale)
1288
1289
1290 def parse_duration(s):
1291 if s is None:
1292 return None
1293
1294 m = re.match(
1295 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
1296 if not m:
1297 return None
1298 res = int(m.group('secs'))
1299 if m.group('mins'):
1300 res += int(m.group('mins')) * 60
1301 if m.group('hours'):
1302 res += int(m.group('hours')) * 60 * 60
1303 return res
1304
1305
1306 def prepend_extension(filename, ext):
1307 name, real_ext = os.path.splitext(filename)
1308 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1309
1310
1311 def check_executable(exe, args=[]):
1312 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1313 args can be a list of arguments for a short output (like -version) """
1314 try:
1315 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1316 except OSError:
1317 return False
1318 return exe
1319
1320
1321 class PagedList(object):
1322 def __init__(self, pagefunc, pagesize):
1323 self._pagefunc = pagefunc
1324 self._pagesize = pagesize
1325
1326 def __len__(self):
1327 # This is only useful for tests
1328 return len(self.getslice())
1329
1330 def getslice(self, start=0, end=None):
1331 res = []
1332 for pagenum in itertools.count(start // self._pagesize):
1333 firstid = pagenum * self._pagesize
1334 nextfirstid = pagenum * self._pagesize + self._pagesize
1335 if start >= nextfirstid:
1336 continue
1337
1338 page_results = list(self._pagefunc(pagenum))
1339
1340 startv = (
1341 start % self._pagesize
1342 if firstid <= start < nextfirstid
1343 else 0)
1344
1345 endv = (
1346 ((end - 1) % self._pagesize) + 1
1347 if (end is not None and firstid <= end <= nextfirstid)
1348 else None)
1349
1350 if startv != 0 or endv is not None:
1351 page_results = page_results[startv:endv]
1352 res.extend(page_results)
1353
1354 # A little optimization - if current page is not "full", ie. does
1355 # not contain page_size videos then we can assume that this page
1356 # is the last one - there are no more ids on further pages -
1357 # i.e. no need to query again.
1358 if len(page_results) + startv < self._pagesize:
1359 break
1360
1361 # If we got the whole page, but the next page is not interesting,
1362 # break out early as well
1363 if end == nextfirstid:
1364 break
1365 return res
1366
1367
1368 def uppercase_escape(s):
1369 unicode_escape = codecs.getdecoder('unicode_escape')
1370 return re.sub(
1371 r'\\U[0-9a-fA-F]{8}',
1372 lambda m: unicode_escape(m.group(0))[0],
1373 s)
1374
1375 try:
1376 struct.pack(u'!I', 0)
1377 except TypeError:
1378 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1379 def struct_pack(spec, *args):
1380 if isinstance(spec, compat_str):
1381 spec = spec.encode('ascii')
1382 return struct.pack(spec, *args)
1383
1384 def struct_unpack(spec, *args):
1385 if isinstance(spec, compat_str):
1386 spec = spec.encode('ascii')
1387 return struct.unpack(spec, *args)
1388 else:
1389 struct_pack = struct.pack
1390 struct_unpack = struct.unpack
1391
1392
1393 def read_batch_urls(batch_fd):
1394 def fixup(url):
1395 if not isinstance(url, compat_str):
1396 url = url.decode('utf-8', 'replace')
1397 BOM_UTF8 = u'\xef\xbb\xbf'
1398 if url.startswith(BOM_UTF8):
1399 url = url[len(BOM_UTF8):]
1400 url = url.strip()
1401 if url.startswith(('#', ';', ']')):
1402 return False
1403 return url
1404
1405 with contextlib.closing(batch_fd) as fd:
1406 return [url for url in map(fixup, fd) if url]
1407
1408
1409 def urlencode_postdata(*args, **kargs):
1410 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1411
1412
1413 def parse_xml(s):
1414 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1415 def doctype(self, name, pubid, system):
1416 pass # Ignore doctypes
1417
1418 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1419 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1420 return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1421
1422
1423 if sys.version_info < (3, 0) and sys.platform == 'win32':
1424 def compat_getpass(prompt, *args, **kwargs):
1425 if isinstance(prompt, compat_str):
1426 prompt = prompt.encode(preferredencoding())
1427 return getpass.getpass(prompt, *args, **kwargs)
1428 else:
1429 compat_getpass = getpass.getpass
1430
1431
1432 US_RATINGS = {
1433 'G': 0,
1434 'PG': 10,
1435 'PG-13': 13,
1436 'R': 16,
1437 'NC': 18,
1438 }
1439
1440
1441 def strip_jsonp(code):
1442 return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
1443
1444
1445 def qualities(quality_ids):
1446 """ Get a numeric quality value out of a list of possible values """
1447 def q(qid):
1448 try:
1449 return quality_ids.index(qid)
1450 except ValueError:
1451 return -1
1452 return q
1453
1454
1455 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1456
1457 try:
1458 subprocess_check_output = subprocess.check_output
1459 except AttributeError:
1460 def subprocess_check_output(*args, **kwargs):
1461 assert 'input' not in kwargs
1462 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
1463 output, _ = p.communicate()
1464 ret = p.poll()
1465 if ret:
1466 raise subprocess.CalledProcessError(ret, p.args, output=output)
1467 return output