2 # -*- coding: utf-8 -*-
29 import xml
.etree
.ElementTree
33 import urllib
.request
as compat_urllib_request
34 except ImportError: # Python 2
35 import urllib2
as compat_urllib_request
38 import urllib
.error
as compat_urllib_error
39 except ImportError: # Python 2
40 import urllib2
as compat_urllib_error
43 import urllib
.parse
as compat_urllib_parse
44 except ImportError: # Python 2
45 import urllib
as compat_urllib_parse
48 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
49 except ImportError: # Python 2
50 from urlparse
import urlparse
as compat_urllib_parse_urlparse
53 import urllib
.parse
as compat_urlparse
54 except ImportError: # Python 2
55 import urlparse
as compat_urlparse
58 import http
.cookiejar
as compat_cookiejar
59 except ImportError: # Python 2
60 import cookielib
as compat_cookiejar
63 import html
.entities
as compat_html_entities
64 except ImportError: # Python 2
65 import htmlentitydefs
as compat_html_entities
68 import html
.parser
as compat_html_parser
69 except ImportError: # Python 2
70 import HTMLParser
as compat_html_parser
73 import http
.client
as compat_http_client
74 except ImportError: # Python 2
75 import httplib
as compat_http_client
78 from urllib
.error
import HTTPError
as compat_HTTPError
79 except ImportError: # Python 2
80 from urllib2
import HTTPError
as compat_HTTPError
83 from urllib
.request
import urlretrieve
as compat_urlretrieve
84 except ImportError: # Python 2
85 from urllib
import urlretrieve
as compat_urlretrieve
89 from subprocess
import DEVNULL
90 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
92 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
95 from urllib
.parse
import unquote
as compat_urllib_parse_unquote
97 def compat_urllib_parse_unquote(string
, encoding
='utf-8', errors
='replace'):
100 res
= string
.split('%')
107 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
114 pct_sequence
+= item
[:2].decode('hex')
117 # This segment was just a single percent-encoded character.
118 # May be part of a sequence of code units, so delay decoding.
119 # (Stored in pct_sequence).
123 # Encountered non-percent-encoded characters. Flush the current
125 string
+= pct_sequence
.decode(encoding
, errors
) + rest
128 # Flush the final pct_sequence
129 string
+= pct_sequence
.decode(encoding
, errors
)
134 from urllib
.parse
import parse_qs
as compat_parse_qs
135 except ImportError: # Python 2
136 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
137 # Python 2's version is apparently totally broken
139 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
140 encoding
='utf-8', errors
='replace'):
141 qs
, _coerce_result
= qs
, unicode
142 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
144 for name_value
in pairs
:
145 if not name_value
and not strict_parsing
:
147 nv
= name_value
.split('=', 1)
150 raise ValueError("bad query field: %r" % (name_value
,))
151 # Handle case of a control-name with no equal sign
152 if keep_blank_values
:
156 if len(nv
[1]) or keep_blank_values
:
157 name
= nv
[0].replace('+', ' ')
158 name
= compat_urllib_parse_unquote(
159 name
, encoding
=encoding
, errors
=errors
)
160 name
= _coerce_result(name
)
161 value
= nv
[1].replace('+', ' ')
162 value
= compat_urllib_parse_unquote(
163 value
, encoding
=encoding
, errors
=errors
)
164 value
= _coerce_result(value
)
165 r
.append((name
, value
))
168 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
169 encoding
='utf-8', errors
='replace'):
171 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
172 encoding
=encoding
, errors
=errors
)
173 for name
, value
in pairs
:
174 if name
in parsed_result
:
175 parsed_result
[name
].append(value
)
177 parsed_result
[name
] = [value
]
181 compat_str
= unicode # Python 2
186 compat_chr
= unichr # Python 2
191 from xml
.etree
.ElementTree
import ParseError
as compat_xml_parse_error
192 except ImportError: # Python 2.6
193 from xml
.parsers
.expat
import ExpatError
as compat_xml_parse_error
196 from shlex
import quote
as shlex_quote
197 except ImportError: # Python < 3.3
199 return "'" + s
.replace("'", "'\"'\"'") + "'"
203 if type(c
) is int: return c
207 if sys
.version_info
>= (3, 0):
208 compat_getenv
= os
.getenv
209 compat_expanduser
= os
.path
.expanduser
211 # Environment variables should be decoded with filesystem encoding.
212 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
214 def compat_getenv(key
, default
=None):
215 env
= os
.getenv(key
, default
)
217 env
= env
.decode(get_filesystem_encoding())
220 # HACK: The default implementations of os.path.expanduser from cpython do not decode
221 # environment variables with filesystem encoding. We will work around this by
222 # providing adjusted implementations.
223 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
224 # for different platforms with correct environment variables decoding.
226 if os
.name
== 'posix':
227 def compat_expanduser(path
):
228 """Expand ~ and ~user constructions. If user or $HOME is unknown,
230 if not path
.startswith('~'):
232 i
= path
.find('/', 1)
236 if 'HOME' not in os
.environ
:
238 userhome
= pwd
.getpwuid(os
.getuid()).pw_dir
240 userhome
= compat_getenv('HOME')
244 pwent
= pwd
.getpwnam(path
[1:i
])
247 userhome
= pwent
.pw_dir
248 userhome
= userhome
.rstrip('/')
249 return (userhome
+ path
[i
:]) or '/'
250 elif os
.name
== 'nt' or os
.name
== 'ce':
251 def compat_expanduser(path
):
252 """Expand ~ and ~user constructs.
254 If user or $HOME is unknown, do nothing."""
258 while i
< n
and path
[i
] not in '/\\':
261 if 'HOME' in os
.environ
:
262 userhome
= compat_getenv('HOME')
263 elif 'USERPROFILE' in os
.environ
:
264 userhome
= compat_getenv('USERPROFILE')
265 elif not 'HOMEPATH' in os
.environ
:
269 drive
= compat_getenv('HOMEDRIVE')
272 userhome
= os
.path
.join(drive
, compat_getenv('HOMEPATH'))
275 userhome
= os
.path
.join(os
.path
.dirname(userhome
), path
[1:i
])
277 return userhome
+ path
[i
:]
279 compat_expanduser
= os
.path
.expanduser
282 # This is not clearly defined otherwise
283 compiled_regex_type
= type(re
.compile(''))
286 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
287 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
288 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
289 'Accept-Encoding': 'gzip, deflate',
290 'Accept-Language': 'en-us,en;q=0.5',
293 def preferredencoding():
294 """Get preferred encoding.
296 Returns the best encoding scheme for the system, based on
297 locale.getpreferredencoding() and some further tweaks.
300 pref
= locale
.getpreferredencoding()
307 if sys
.version_info
< (3,0):
309 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
312 assert type(s
) == type(u
'')
316 def write_json_file(obj
, fn
):
317 """ Encode obj as JSON and write it to fn, atomically """
321 'prefix': os
.path
.basename(fn
) + '.',
322 'dir': os
.path
.dirname(fn
),
326 # In Python 2.x, json.dump expects a bytestream.
327 # In Python 3.x, it writes to a character stream
328 if sys
.version_info
< (3, 0):
336 tf
= tempfile
.NamedTemporaryFile(**args
)
341 os
.rename(tf
.name
, fn
)
350 if sys
.version_info
>= (2, 7):
351 def find_xpath_attr(node
, xpath
, key
, val
):
352 """ Find the xpath xpath[@key=val] """
353 assert re
.match(r
'^[a-zA-Z-]+$', key
)
354 assert re
.match(r
'^[a-zA-Z0-9@\s:._-]*$', val
)
355 expr
= xpath
+ u
"[@%s='%s']" % (key
, val
)
356 return node
.find(expr
)
358 def find_xpath_attr(node
, xpath
, key
, val
):
359 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
360 # .//node does not match if a node is a direct child of . !
361 if isinstance(xpath
, unicode):
362 xpath
= xpath
.encode('ascii')
364 for f
in node
.findall(xpath
):
365 if f
.attrib
.get(key
) == val
:
369 # On python2.6 the xml.etree.ElementTree.Element methods don't support
370 # the namespace parameter
371 def xpath_with_ns(path
, ns_map
):
372 components
= [c
.split(':') for c
in path
.split('/')]
376 replaced
.append(c
[0])
379 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
380 return '/'.join(replaced
)
383 def xpath_text(node
, xpath
, name
=None, fatal
=False):
384 if sys
.version_info
< (2, 7): # Crazy 2.6
385 xpath
= xpath
.encode('ascii')
390 name
= xpath
if name
is None else name
391 raise ExtractorError('Could not find XML element %s' % name
)
397 compat_html_parser
.locatestarttagend
= re
.compile(r
"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re
.VERBOSE
) # backport bugfix
398 class BaseHTMLParser(compat_html_parser
.HTMLParser
):
400 compat_html_parser
.HTMLParser
.__init
__(self
)
403 def loads(self
, html
):
408 class AttrParser(BaseHTMLParser
):
409 """Modified HTMLParser that isolates a tag with the specified attribute"""
410 def __init__(self
, attribute
, value
):
411 self
.attribute
= attribute
416 self
.watch_startpos
= False
418 BaseHTMLParser
.__init
__(self
)
420 def error(self
, message
):
421 if self
.error_count
> 10 or self
.started
:
422 raise compat_html_parser
.HTMLParseError(message
, self
.getpos())
423 self
.rawdata
= '\n'.join(self
.html
.split('\n')[self
.getpos()[0]:]) # skip one line
424 self
.error_count
+= 1
427 def handle_starttag(self
, tag
, attrs
):
430 self
.find_startpos(None)
431 if self
.attribute
in attrs
and attrs
[self
.attribute
] == self
.value
:
434 self
.watch_startpos
= True
436 if not tag
in self
.depth
: self
.depth
[tag
] = 0
439 def handle_endtag(self
, tag
):
441 if tag
in self
.depth
: self
.depth
[tag
] -= 1
442 if self
.depth
[self
.result
[0]] == 0:
444 self
.result
.append(self
.getpos())
446 def find_startpos(self
, x
):
447 """Needed to put the start position of the result (self.result[1])
448 after the opening tag with the requested id"""
449 if self
.watch_startpos
:
450 self
.watch_startpos
= False
451 self
.result
.append(self
.getpos())
452 handle_entityref
= handle_charref
= handle_data
= handle_comment
= \
453 handle_decl
= handle_pi
= unknown_decl
= find_startpos
455 def get_result(self
):
456 if self
.result
is None:
458 if len(self
.result
) != 3:
460 lines
= self
.html
.split('\n')
461 lines
= lines
[self
.result
[1][0]-1:self
.result
[2][0]]
462 lines
[0] = lines
[0][self
.result
[1][1]:]
464 lines
[-1] = lines
[-1][:self
.result
[2][1]-self
.result
[1][1]]
465 lines
[-1] = lines
[-1][:self
.result
[2][1]]
466 return '\n'.join(lines
).strip()
467 # Hack for https://github.com/rg3/youtube-dl/issues/662
468 if sys
.version_info
< (2, 7, 3):
469 AttrParser
.parse_endtag
= (lambda self
, i
:
470 i
+ len("</scr'+'ipt>")
471 if self
.rawdata
[i
:].startswith("</scr'+'ipt>")
472 else compat_html_parser
.HTMLParser
.parse_endtag(self
, i
))
474 def get_element_by_id(id, html
):
475 """Return the content of the tag with the specified ID in the passed HTML document"""
476 return get_element_by_attribute("id", id, html
)
478 def get_element_by_attribute(attribute
, value
, html
):
479 """Return the content of the tag with the specified attribute in the passed HTML document"""
480 parser
= AttrParser(attribute
, value
)
483 except compat_html_parser
.HTMLParseError
:
485 return parser
.get_result()
487 class MetaParser(BaseHTMLParser
):
489 Modified HTMLParser that isolates a meta tag with the specified name
492 def __init__(self
, name
):
493 BaseHTMLParser
.__init
__(self
)
498 def handle_starttag(self
, tag
, attrs
):
502 if attrs
.get('name') == self
.name
:
503 self
.result
= attrs
.get('content')
505 def get_result(self
):
508 def get_meta_content(name
, html
):
510 Return the content attribute from the meta tag with the given name attribute.
512 parser
= MetaParser(name
)
515 except compat_html_parser
.HTMLParseError
:
517 return parser
.get_result()
520 def clean_html(html
):
521 """Clean an HTML snippet into a readable string"""
523 html
= html
.replace('\n', ' ')
524 html
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
)
525 html
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
)
527 html
= re
.sub('<.*?>', '', html
)
528 # Replace html entities
529 html
= unescapeHTML(html
)
533 def sanitize_open(filename
, open_mode
):
534 """Try to open the given filename, and slightly tweak it if this fails.
536 Attempts to open the given filename. If this fails, it tries to change
537 the filename slightly, step by step, until it's either able to open it
538 or it fails and raises a final exception, like the standard open()
541 It returns the tuple (stream, definitive_file_name).
545 if sys
.platform
== 'win32':
547 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
548 return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
)
549 stream
= open(encodeFilename(filename
), open_mode
)
550 return (stream
, filename
)
551 except (IOError, OSError) as err
:
552 if err
.errno
in (errno
.EACCES
,):
555 # In case of error, try to remove win32 forbidden chars
556 alt_filename
= os
.path
.join(
557 re
.sub(u
'[/<>:"\\|\\\\?\\*]', u
'#', path_part
)
558 for path_part
in os
.path
.split(filename
)
560 if alt_filename
== filename
:
563 # An exception here should be caught in the caller
564 stream
= open(encodeFilename(filename
), open_mode
)
565 return (stream
, alt_filename
)
568 def timeconvert(timestr
):
569 """Convert RFC 2822 defined time string into system timestamp"""
571 timetuple
= email
.utils
.parsedate_tz(timestr
)
572 if timetuple
is not None:
573 timestamp
= email
.utils
.mktime_tz(timetuple
)
576 def sanitize_filename(s
, restricted
=False, is_id
=False):
577 """Sanitizes a string so it could be used as part of a filename.
578 If restricted is set, use a stricter subset of allowed characters.
579 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
581 def replace_insane(char
):
582 if char
== '?' or ord(char
) < 32 or ord(char
) == 127:
585 return '' if restricted
else '\''
587 return '_-' if restricted
else ' -'
588 elif char
in '\\/|*<>':
590 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace()):
592 if restricted
and ord(char
) > 127:
596 result
= u
''.join(map(replace_insane
, s
))
598 while '__' in result
:
599 result
= result
.replace('__', '_')
600 result
= result
.strip('_')
601 # Common case of "Foreign band name - English song title"
602 if restricted
and result
.startswith('-_'):
608 def orderedSet(iterable
):
609 """ Remove all duplicates from the input iterable """
617 def _htmlentity_transform(entity
):
618 """Transforms an HTML entity to a character."""
619 # Known non-numeric HTML entity
620 if entity
in compat_html_entities
.name2codepoint
:
621 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
623 mobj
= re
.match(r
'#(x?[0-9]+)', entity
)
625 numstr
= mobj
.group(1)
626 if numstr
.startswith(u
'x'):
628 numstr
= u
'0%s' % numstr
631 return compat_chr(int(numstr
, base
))
633 # Unknown entity in name, return its literal representation
634 return (u
'&%s;' % entity
)
640 assert type(s
) == compat_str
643 r
'&([^;]+);', lambda m
: _htmlentity_transform(m
.group(1)), s
)
646 def encodeFilename(s
, for_subprocess
=False):
648 @param s The name of the file
651 assert type(s
) == compat_str
653 # Python 3 has a Unicode API
654 if sys
.version_info
>= (3, 0):
657 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
658 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
659 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
660 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
661 if not for_subprocess
:
664 # For subprocess calls, encode with locale encoding
665 # Refer to http://stackoverflow.com/a/9951851/35070
666 encoding
= preferredencoding()
668 encoding
= sys
.getfilesystemencoding()
671 return s
.encode(encoding
, 'ignore')
674 def encodeArgument(s
):
675 if not isinstance(s
, compat_str
):
676 # Legacy code that uses byte strings
677 # Uncomment the following line after fixing all post processors
678 #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
679 s
= s
.decode('ascii')
680 return encodeFilename(s
, True)
683 def decodeOption(optval
):
686 if isinstance(optval
, bytes):
687 optval
= optval
.decode(preferredencoding())
689 assert isinstance(optval
, compat_str
)
692 def formatSeconds(secs
):
694 return '%d:%02d:%02d' % (secs
// 3600, (secs
% 3600) // 60, secs
% 60)
696 return '%d:%02d' % (secs
// 60, secs
% 60)
701 def make_HTTPS_handler(opts_no_check_certificate
, **kwargs
):
702 if sys
.version_info
< (3, 2):
705 class HTTPSConnectionV3(httplib
.HTTPSConnection
):
706 def __init__(self
, *args
, **kwargs
):
707 httplib
.HTTPSConnection
.__init
__(self
, *args
, **kwargs
)
710 sock
= socket
.create_connection((self
.host
, self
.port
), self
.timeout
)
711 if getattr(self
, '_tunnel_host', False):
715 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_TLSv1
)
717 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_SSLv23
)
719 class HTTPSHandlerV3(compat_urllib_request
.HTTPSHandler
):
720 def https_open(self
, req
):
721 return self
.do_open(HTTPSConnectionV3
, req
)
722 return HTTPSHandlerV3(**kwargs
)
723 elif hasattr(ssl
, 'create_default_context'): # Python >= 3.4
724 context
= ssl
.create_default_context(ssl
.Purpose
.CLIENT_AUTH
)
725 context
.options
&= ~ssl
.OP_NO_SSLv3
# Allow older, not-as-secure SSLv3
726 if opts_no_check_certificate
:
727 context
.verify_mode
= ssl
.CERT_NONE
728 return compat_urllib_request
.HTTPSHandler(context
=context
, **kwargs
)
730 context
= ssl
.SSLContext(ssl
.PROTOCOL_SSLv23
)
731 context
.verify_mode
= (ssl
.CERT_NONE
732 if opts_no_check_certificate
733 else ssl
.CERT_REQUIRED
)
734 context
.set_default_verify_paths()
736 context
.load_default_certs()
737 except AttributeError:
739 return compat_urllib_request
.HTTPSHandler(context
=context
, **kwargs
)
741 class ExtractorError(Exception):
742 """Error during info extraction."""
743 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None):
744 """ tb, if given, is the original traceback (so that it can be printed out).
745 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
748 if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
):
750 if video_id
is not None:
751 msg
= video_id
+ ': ' + msg
753 msg
+= u
' (caused by %r)' % cause
755 msg
= msg
+ u
'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
756 super(ExtractorError
, self
).__init
__(msg
)
759 self
.exc_info
= sys
.exc_info() # preserve original exception
761 self
.video_id
= video_id
763 def format_traceback(self
):
764 if self
.traceback
is None:
766 return u
''.join(traceback
.format_tb(self
.traceback
))
769 class RegexNotFoundError(ExtractorError
):
770 """Error when a regex didn't match"""
774 class DownloadError(Exception):
775 """Download Error exception.
777 This exception may be thrown by FileDownloader objects if they are not
778 configured to continue on errors. They will contain the appropriate
781 def __init__(self
, msg
, exc_info
=None):
782 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
783 super(DownloadError
, self
).__init
__(msg
)
784 self
.exc_info
= exc_info
787 class SameFileError(Exception):
788 """Same File exception.
790 This exception will be thrown by FileDownloader objects if they detect
791 multiple files would have to be downloaded to the same file on disk.
796 class PostProcessingError(Exception):
797 """Post Processing exception.
799 This exception may be raised by PostProcessor's .run() method to
800 indicate an error in the postprocessing task.
802 def __init__(self
, msg
):
805 class MaxDownloadsReached(Exception):
806 """ --max-downloads limit has been reached. """
810 class UnavailableVideoError(Exception):
811 """Unavailable Format exception.
813 This exception will be thrown when a video is requested
814 in a format that is not available for that video.
819 class ContentTooShortError(Exception):
820 """Content Too Short exception.
822 This exception may be raised by FileDownloader objects when a file they
823 download is too small for what the server announced first, indicating
824 the connection was probably interrupted.
830 def __init__(self
, downloaded
, expected
):
831 self
.downloaded
= downloaded
832 self
.expected
= expected
834 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
835 """Handler for HTTP requests and responses.
837 This class, when installed with an OpenerDirector, automatically adds
838 the standard headers to every HTTP request and handles gzipped and
839 deflated responses from web servers. If compression is to be avoided in
840 a particular request, the original request in the program code only has
841 to include the HTTP header "Youtubedl-No-Compression", which will be
842 removed before making the real request.
844 Part of this code was copied from:
846 http://techknack.net/python-urllib2-handlers/
848 Andrew Rowls, the author of that code, agreed to release it to the
855 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
857 return zlib
.decompress(data
)
860 def addinfourl_wrapper(stream
, headers
, url
, code
):
861 if hasattr(compat_urllib_request
.addinfourl
, 'getcode'):
862 return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
)
863 ret
= compat_urllib_request
.addinfourl(stream
, headers
, url
)
867 def http_request(self
, req
):
868 for h
, v
in std_headers
.items():
869 if h
not in req
.headers
:
871 if 'Youtubedl-no-compression' in req
.headers
:
872 if 'Accept-encoding' in req
.headers
:
873 del req
.headers
['Accept-encoding']
874 del req
.headers
['Youtubedl-no-compression']
875 if 'Youtubedl-user-agent' in req
.headers
:
876 if 'User-agent' in req
.headers
:
877 del req
.headers
['User-agent']
878 req
.headers
['User-agent'] = req
.headers
['Youtubedl-user-agent']
879 del req
.headers
['Youtubedl-user-agent']
881 if sys
.version_info
< (2, 7) and '#' in req
.get_full_url():
882 # Python 2.6 is brain-dead when it comes to fragments
883 req
._Request
__original
= req
._Request
__original
.partition('#')[0]
884 req
._Request
__r
_type
= req
._Request
__r
_type
.partition('#')[0]
888 def http_response(self
, req
, resp
):
891 if resp
.headers
.get('Content-encoding', '') == 'gzip':
892 content
= resp
.read()
893 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
895 uncompressed
= io
.BytesIO(gz
.read())
896 except IOError as original_ioerror
:
897 # There may be junk add the end of the file
898 # See http://stackoverflow.com/q/4928560/35070 for details
899 for i
in range(1, 1024):
901 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
902 uncompressed
= io
.BytesIO(gz
.read())
907 raise original_ioerror
908 resp
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
909 resp
.msg
= old_resp
.msg
911 if resp
.headers
.get('Content-encoding', '') == 'deflate':
912 gz
= io
.BytesIO(self
.deflate(resp
.read()))
913 resp
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
914 resp
.msg
= old_resp
.msg
917 https_request
= http_request
918 https_response
= http_response
921 def parse_iso8601(date_str
, delimiter
='T'):
922 """ Return a UNIX timestamp from the given date """
928 r
'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
931 timezone
= datetime
.timedelta()
933 date_str
= date_str
[:-len(m
.group(0))]
934 if not m
.group('sign'):
935 timezone
= datetime
.timedelta()
937 sign
= 1 if m
.group('sign') == '+' else -1
938 timezone
= datetime
.timedelta(
939 hours
=sign
* int(m
.group('hours')),
940 minutes
=sign
* int(m
.group('minutes')))
941 date_format
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
)
942 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
943 return calendar
.timegm(dt
.timetuple())
946 def unified_strdate(date_str
):
947 """Return a string with the date in the format YYYYMMDD"""
954 date_str
= date_str
.replace(',', ' ')
955 # %z (UTC offset) is only supported in python>=3.2
956 date_str
= re
.sub(r
' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str
)
957 format_expressions
= [
962 '%b %dst %Y %I:%M%p',
963 '%b %dnd %Y %I:%M%p',
964 '%b %dth %Y %I:%M%p',
973 '%Y-%m-%d %H:%M:%S.%f',
976 '%Y-%m-%dT%H:%M:%SZ',
977 '%Y-%m-%dT%H:%M:%S.%fZ',
978 '%Y-%m-%dT%H:%M:%S.%f0Z',
980 '%Y-%m-%dT%H:%M:%S.%f',
983 for expression
in format_expressions
:
985 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
988 if upload_date
is None:
989 timetuple
= email
.utils
.parsedate_tz(date_str
)
991 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
994 def determine_ext(url
, default_ext
=u
'unknown_video'):
997 guess
= url
.partition(u
'?')[0].rpartition(u
'.')[2]
998 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
1003 def subtitles_filename(filename
, sub_lang
, sub_format
):
1004 return filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
1006 def date_from_str(date_str
):
1008 Return a datetime object from a string in the format YYYYMMDD or
1009 (now|today)[+-][0-9](day|week|month|year)(s)?"""
1010 today
= datetime
.date
.today()
1011 if date_str
== 'now'or date_str
== 'today':
1013 match
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
)
1014 if match
is not None:
1015 sign
= match
.group('sign')
1016 time
= int(match
.group('time'))
1019 unit
= match
.group('unit')
1020 #A bad aproximation?
1024 elif unit
== 'year':
1028 delta
= datetime
.timedelta(**{unit
: time
})
1029 return today
+ delta
1030 return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date()
1032 def hyphenate_date(date_str
):
1034 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1035 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
1036 if match
is not None:
1037 return '-'.join(match
.groups())
1041 class DateRange(object):
1042 """Represents a time interval between two dates"""
1043 def __init__(self
, start
=None, end
=None):
1044 """start and end must be strings in the format accepted by date"""
1045 if start
is not None:
1046 self
.start
= date_from_str(start
)
1048 self
.start
= datetime
.datetime
.min.date()
1050 self
.end
= date_from_str(end
)
1052 self
.end
= datetime
.datetime
.max.date()
1053 if self
.start
> self
.end
:
1054 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
1057 """Returns a range that only contains the given day"""
1059 def __contains__(self
, date
):
1060 """Check if the date is in the range"""
1061 if not isinstance(date
, datetime
.date
):
1062 date
= date_from_str(date
)
1063 return self
.start
<= date
<= self
.end
1065 return '%s - %s' % ( self
.start
.isoformat(), self
.end
.isoformat())
1068 def platform_name():
1069 """ Returns the platform name as a compat_str """
1070 res
= platform
.platform()
1071 if isinstance(res
, bytes):
1072 res
= res
.decode(preferredencoding())
1074 assert isinstance(res
, compat_str
)
1078 def _windows_write_string(s
, out
):
1079 """ Returns True if the string was written using special methods,
1080 False if it has yet to be written out."""
1081 # Adapted from http://stackoverflow.com/a/3259271/35070
1084 import ctypes
.wintypes
1092 fileno
= out
.fileno()
1093 except AttributeError:
1094 # If the output stream doesn't have a fileno, it's virtual
1096 if fileno
not in WIN_OUTPUT_IDS
:
1099 GetStdHandle
= ctypes
.WINFUNCTYPE(
1100 ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)(
1101 ("GetStdHandle", ctypes
.windll
.kernel32
))
1102 h
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
])
1104 WriteConsoleW
= ctypes
.WINFUNCTYPE(
1105 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
,
1106 ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
),
1107 ctypes
.wintypes
.LPVOID
)(("WriteConsoleW", ctypes
.windll
.kernel32
))
1108 written
= ctypes
.wintypes
.DWORD(0)
1110 GetFileType
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)(("GetFileType", ctypes
.windll
.kernel32
))
1111 FILE_TYPE_CHAR
= 0x0002
1112 FILE_TYPE_REMOTE
= 0x8000
1113 GetConsoleMode
= ctypes
.WINFUNCTYPE(
1114 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
,
1115 ctypes
.POINTER(ctypes
.wintypes
.DWORD
))(
1116 ("GetConsoleMode", ctypes
.windll
.kernel32
))
1117 INVALID_HANDLE_VALUE
= ctypes
.wintypes
.DWORD(-1).value
1119 def not_a_console(handle
):
1120 if handle
== INVALID_HANDLE_VALUE
or handle
is None:
1122 return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR
1123 or GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0)
1125 if not_a_console(h
):
1128 def next_nonbmp_pos(s
):
1130 return next(i
for i
, c
in enumerate(s
) if ord(c
) > 0xffff)
1131 except StopIteration:
1135 count
= min(next_nonbmp_pos(s
), 1024)
1137 ret
= WriteConsoleW(
1138 h
, s
, count
if count
else 2, ctypes
.byref(written
), None)
1140 raise OSError('Failed to write string')
1141 if not count
: # We just wrote a non-BMP character
1142 assert written
.value
== 2
1145 assert written
.value
> 0
1146 s
= s
[written
.value
:]
1150 def write_string(s
, out
=None, encoding
=None):
1153 assert type(s
) == compat_str
1155 if sys
.platform
== 'win32' and encoding
is None and hasattr(out
, 'fileno'):
1156 if _windows_write_string(s
, out
):
1159 if ('b' in getattr(out
, 'mode', '') or
1160 sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
1161 byt
= s
.encode(encoding
or preferredencoding(), 'ignore')
1163 elif hasattr(out
, 'buffer'):
1164 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
1165 byt
= s
.encode(enc
, 'ignore')
1166 out
.buffer.write(byt
)
1172 def bytes_to_intlist(bs
):
1175 if isinstance(bs
[0], int): # Python 3
1178 return [ord(c
) for c
in bs
]
1181 def intlist_to_bytes(xs
):
1184 if isinstance(chr(0), bytes): # Python 2
1185 return ''.join([chr(x
) for x
in xs
])
1190 # Cross-platform file locking
1191 if sys
.platform
== 'win32':
1192 import ctypes
.wintypes
1195 class OVERLAPPED(ctypes
.Structure
):
1197 ('Internal', ctypes
.wintypes
.LPVOID
),
1198 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
1199 ('Offset', ctypes
.wintypes
.DWORD
),
1200 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
1201 ('hEvent', ctypes
.wintypes
.HANDLE
),
1204 kernel32
= ctypes
.windll
.kernel32
1205 LockFileEx
= kernel32
.LockFileEx
1206 LockFileEx
.argtypes
= [
1207 ctypes
.wintypes
.HANDLE
, # hFile
1208 ctypes
.wintypes
.DWORD
, # dwFlags
1209 ctypes
.wintypes
.DWORD
, # dwReserved
1210 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1211 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1212 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1214 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
1215 UnlockFileEx
= kernel32
.UnlockFileEx
1216 UnlockFileEx
.argtypes
= [
1217 ctypes
.wintypes
.HANDLE
, # hFile
1218 ctypes
.wintypes
.DWORD
, # dwReserved
1219 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1220 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1221 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1223 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
1224 whole_low
= 0xffffffff
1225 whole_high
= 0x7fffffff
1227 def _lock_file(f
, exclusive
):
1228 overlapped
= OVERLAPPED()
1229 overlapped
.Offset
= 0
1230 overlapped
.OffsetHigh
= 0
1231 overlapped
.hEvent
= 0
1232 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
1233 handle
= msvcrt
.get_osfhandle(f
.fileno())
1234 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
1235 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1236 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
1238 def _unlock_file(f
):
1239 assert f
._lock
_file
_overlapped
_p
1240 handle
= msvcrt
.get_osfhandle(f
.fileno())
1241 if not UnlockFileEx(handle
, 0,
1242 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1243 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
1248 def _lock_file(f
, exclusive
):
1249 fcntl
.flock(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
1251 def _unlock_file(f
):
1252 fcntl
.flock(f
, fcntl
.LOCK_UN
)
1255 class locked_file(object):
1256 def __init__(self
, filename
, mode
, encoding
=None):
1257 assert mode
in ['r', 'a', 'w']
1258 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
1261 def __enter__(self
):
1262 exclusive
= self
.mode
!= 'r'
1264 _lock_file(self
.f
, exclusive
)
1270 def __exit__(self
, etype
, value
, traceback
):
1272 _unlock_file(self
.f
)
1279 def write(self
, *args
):
1280 return self
.f
.write(*args
)
1282 def read(self
, *args
):
1283 return self
.f
.read(*args
)
1286 def get_filesystem_encoding():
1287 encoding
= sys
.getfilesystemencoding()
1288 return encoding
if encoding
is not None else 'utf-8'
1291 def shell_quote(args
):
1293 encoding
= get_filesystem_encoding()
1295 if isinstance(a
, bytes):
1296 # We may get a filename encoded with 'encodeFilename'
1297 a
= a
.decode(encoding
)
1298 quoted_args
.append(pipes
.quote(a
))
1299 return u
' '.join(quoted_args
)
1302 def takewhile_inclusive(pred
, seq
):
1303 """ Like itertools.takewhile, but include the latest evaluated element
1304 (the first element so that Not pred(e)) """
1311 def smuggle_url(url
, data
):
1312 """ Pass additional data in a URL for internal use. """
1314 sdata
= compat_urllib_parse
.urlencode(
1315 {u
'__youtubedl_smuggle': json
.dumps(data
)})
1316 return url
+ u
'#' + sdata
1319 def unsmuggle_url(smug_url
, default
=None):
1320 if not '#__youtubedl_smuggle' in smug_url
:
1321 return smug_url
, default
1322 url
, _
, sdata
= smug_url
.rpartition(u
'#')
1323 jsond
= compat_parse_qs(sdata
)[u
'__youtubedl_smuggle'][0]
1324 data
= json
.loads(jsond
)
1328 def format_bytes(bytes):
1331 if type(bytes) is str:
1332 bytes = float(bytes)
1336 exponent
= int(math
.log(bytes, 1024.0))
1337 suffix
= [u
'B', u
'KiB', u
'MiB', u
'GiB', u
'TiB', u
'PiB', u
'EiB', u
'ZiB', u
'YiB'][exponent
]
1338 converted
= float(bytes) / float(1024 ** exponent
)
1339 return u
'%.2f%s' % (converted
, suffix
)
1342 def get_term_width():
1343 columns
= compat_getenv('COLUMNS', None)
1348 sp
= subprocess
.Popen(
1350 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
1351 out
, err
= sp
.communicate()
1352 return int(out
.split()[1])
1358 def month_by_name(name
):
1359 """ Return the number of a month by (locale-independently) English name """
1362 u
'January', u
'February', u
'March', u
'April', u
'May', u
'June',
1363 u
'July', u
'August', u
'September', u
'October', u
'November', u
'December']
1365 return ENGLISH_NAMES
.index(name
) + 1
1370 def fix_xml_ampersands(xml_str
):
1371 """Replace all the '&' by '&' in XML"""
1373 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1378 def setproctitle(title
):
1379 assert isinstance(title
, compat_str
)
1381 libc
= ctypes
.cdll
.LoadLibrary("libc.so.6")
1384 title_bytes
= title
.encode('utf-8')
1385 buf
= ctypes
.create_string_buffer(len(title_bytes
))
1386 buf
.value
= title_bytes
1388 libc
.prctl(15, buf
, 0, 0, 0)
1389 except AttributeError:
1390 return # Strange libc, just skip this
1393 def remove_start(s
, start
):
1394 if s
.startswith(start
):
1395 return s
[len(start
):]
1399 def remove_end(s
, end
):
1401 return s
[:-len(end
)]
1405 def url_basename(url
):
1406 path
= compat_urlparse
.urlparse(url
).path
1407 return path
.strip(u
'/').split(u
'/')[-1]
1410 class HEADRequest(compat_urllib_request
.Request
):
1411 def get_method(self
):
1415 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
1418 v
= getattr(v
, get_attr
, None)
1421 return default
if v
is None else (int(v
) * invscale
// scale
)
1424 def str_or_none(v
, default
=None):
1425 return default
if v
is None else compat_str(v
)
1428 def str_to_int(int_str
):
1429 """ A more relaxed version of int_or_none """
1432 int_str
= re
.sub(r
'[,\.\+]', u
'', int_str
)
1436 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
1437 return default
if v
is None else (float(v
) * invscale
/ scale
)
1440 def parse_duration(s
):
1447 r
'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s
)
1450 res
= int(m
.group('secs'))
1452 res
+= int(m
.group('mins')) * 60
1453 if m
.group('hours'):
1454 res
+= int(m
.group('hours')) * 60 * 60
1456 res
+= float(m
.group('ms'))
1460 def prepend_extension(filename
, ext
):
1461 name
, real_ext
= os
.path
.splitext(filename
)
1462 return u
'{0}.{1}{2}'.format(name
, ext
, real_ext
)
1465 def check_executable(exe
, args
=[]):
1466 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1467 args can be a list of arguments for a short output (like -version) """
1469 subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate()
1475 class PagedList(object):
1477 # This is only useful for tests
1478 return len(self
.getslice())
1481 class OnDemandPagedList(PagedList
):
1482 def __init__(self
, pagefunc
, pagesize
):
1483 self
._pagefunc
= pagefunc
1484 self
._pagesize
= pagesize
1486 def getslice(self
, start
=0, end
=None):
1488 for pagenum
in itertools
.count(start
// self
._pagesize
):
1489 firstid
= pagenum
* self
._pagesize
1490 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
1491 if start
>= nextfirstid
:
1494 page_results
= list(self
._pagefunc
(pagenum
))
1497 start
% self
._pagesize
1498 if firstid
<= start
< nextfirstid
1502 ((end
- 1) % self
._pagesize
) + 1
1503 if (end
is not None and firstid
<= end
<= nextfirstid
)
1506 if startv
!= 0 or endv
is not None:
1507 page_results
= page_results
[startv
:endv
]
1508 res
.extend(page_results
)
1510 # A little optimization - if current page is not "full", ie. does
1511 # not contain page_size videos then we can assume that this page
1512 # is the last one - there are no more ids on further pages -
1513 # i.e. no need to query again.
1514 if len(page_results
) + startv
< self
._pagesize
:
1517 # If we got the whole page, but the next page is not interesting,
1518 # break out early as well
1519 if end
== nextfirstid
:
1524 class InAdvancePagedList(PagedList
):
1525 def __init__(self
, pagefunc
, pagecount
, pagesize
):
1526 self
._pagefunc
= pagefunc
1527 self
._pagecount
= pagecount
1528 self
._pagesize
= pagesize
1530 def getslice(self
, start
=0, end
=None):
1532 start_page
= start
// self
._pagesize
1534 self
._pagecount
if end
is None else (end
// self
._pagesize
+ 1))
1535 skip_elems
= start
- start_page
* self
._pagesize
1536 only_more
= None if end
is None else end
- start
1537 for pagenum
in range(start_page
, end_page
):
1538 page
= list(self
._pagefunc
(pagenum
))
1540 page
= page
[skip_elems
:]
1542 if only_more
is not None:
1543 if len(page
) < only_more
:
1544 only_more
-= len(page
)
1546 page
= page
[:only_more
]
1553 def uppercase_escape(s
):
1554 unicode_escape
= codecs
.getdecoder('unicode_escape')
1556 r
'\\U[0-9a-fA-F]{8}',
1557 lambda m
: unicode_escape(m
.group(0))[0],
1561 def escape_rfc3986(s
):
1562 """Escape non-ASCII characters as suggested by RFC 3986"""
1563 if sys
.version_info
< (3, 0) and isinstance(s
, unicode):
1564 s
= s
.encode('utf-8')
1565 return compat_urllib_parse
.quote(s
, "%/;:@&=+$,!~*'()?#[]")
1568 def escape_url(url
):
1569 """Escape URL as suggested by RFC 3986"""
1570 url_parsed
= compat_urllib_parse_urlparse(url
)
1571 return url_parsed
._replace
(
1572 path
=escape_rfc3986(url_parsed
.path
),
1573 params
=escape_rfc3986(url_parsed
.params
),
1574 query
=escape_rfc3986(url_parsed
.query
),
1575 fragment
=escape_rfc3986(url_parsed
.fragment
)
1579 struct
.pack(u
'!I', 0)
1581 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1582 def struct_pack(spec
, *args
):
1583 if isinstance(spec
, compat_str
):
1584 spec
= spec
.encode('ascii')
1585 return struct
.pack(spec
, *args
)
1587 def struct_unpack(spec
, *args
):
1588 if isinstance(spec
, compat_str
):
1589 spec
= spec
.encode('ascii')
1590 return struct
.unpack(spec
, *args
)
1592 struct_pack
= struct
.pack
1593 struct_unpack
= struct
.unpack
1596 def read_batch_urls(batch_fd
):
1598 if not isinstance(url
, compat_str
):
1599 url
= url
.decode('utf-8', 'replace')
1600 BOM_UTF8
= u
'\xef\xbb\xbf'
1601 if url
.startswith(BOM_UTF8
):
1602 url
= url
[len(BOM_UTF8
):]
1604 if url
.startswith(('#', ';', ']')):
1608 with contextlib
.closing(batch_fd
) as fd
:
1609 return [url
for url
in map(fixup
, fd
) if url
]
1612 def urlencode_postdata(*args
, **kargs
):
1613 return compat_urllib_parse
.urlencode(*args
, **kargs
).encode('ascii')
1617 etree_iter
= xml
.etree
.ElementTree
.Element
.iter
1618 except AttributeError: # Python <=2.6
1619 etree_iter
= lambda n
: n
.findall('.//*')
1623 class TreeBuilder(xml
.etree
.ElementTree
.TreeBuilder
):
1624 def doctype(self
, name
, pubid
, system
):
1625 pass # Ignore doctypes
1627 parser
= xml
.etree
.ElementTree
.XMLParser(target
=TreeBuilder())
1628 kwargs
= {'parser': parser
} if sys
.version_info
>= (2, 7) else {}
1629 tree
= xml
.etree
.ElementTree
.XML(s
.encode('utf-8'), **kwargs
)
1630 # Fix up XML parser in Python 2.x
1631 if sys
.version_info
< (3, 0):
1632 for n
in etree_iter(tree
):
1633 if n
.text
is not None:
1634 if not isinstance(n
.text
, compat_str
):
1635 n
.text
= n
.text
.decode('utf-8')
1639 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
1640 def compat_getpass(prompt
, *args
, **kwargs
):
1641 if isinstance(prompt
, compat_str
):
1642 prompt
= prompt
.encode(preferredencoding())
1643 return getpass
.getpass(prompt
, *args
, **kwargs
)
1645 compat_getpass
= getpass
.getpass
1657 def parse_age_limit(s
):
1660 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
1661 return int(m
.group('age')) if m
else US_RATINGS
.get(s
, None)
1664 def strip_jsonp(code
):
1665 return re
.sub(r
'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r
'\1', code
)
1668 def js_to_json(code
):
1671 if v
in ('true', 'false', 'null'):
1673 if v
.startswith('"'):
1675 if v
.startswith("'"):
1677 v
= re
.sub(r
"\\\\|\\'|\"", lambda m: {
1684 res = re.sub(r'''(?x)
1685 "(?
:[^
"\\]*(?:\\\\|\\")?
)*"|
1686 '(?:[^'\\]*(?:\\\\|\\')?)*'|
1687 [a-zA-Z_][a-zA-Z_0-9]*
1689 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1693 def qualities(quality_ids):
1694 """ Get a numeric quality value out of a list of possible values """
1697 return quality_ids.index(qid)
1703 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1706 subprocess_check_output = subprocess.check_output
1707 except AttributeError:
1708 def subprocess_check_output(*args, **kwargs):
1709 assert 'input' not in kwargs
1710 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
1711 output, _ = p.communicate()
1714 raise subprocess.CalledProcessError(ret, p.args, output=output)
1718 def limit_length(s, length):
1719 """ Add ellipses to overly long strings """
1724 return s[:length - len(ELLIPSES)] + ELLIPSES
1728 def version_tuple(v):
1729 return [int(e) for e in v.split('.')]
1732 def is_outdated_version(version, limit, assume_new=True):
1734 return not assume_new
1736 return version_tuple(version) < version_tuple(limit)
1738 return not assume_new