2 # -*- coding: utf-8 -*-
25 import urllib
.request
as compat_urllib_request
26 except ImportError: # Python 2
27 import urllib2
as compat_urllib_request
30 import urllib
.error
as compat_urllib_error
31 except ImportError: # Python 2
32 import urllib2
as compat_urllib_error
35 import urllib
.parse
as compat_urllib_parse
36 except ImportError: # Python 2
37 import urllib
as compat_urllib_parse
40 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
41 except ImportError: # Python 2
42 from urlparse
import urlparse
as compat_urllib_parse_urlparse
45 import urllib
.parse
as compat_urlparse
46 except ImportError: # Python 2
47 import urlparse
as compat_urlparse
50 import http
.cookiejar
as compat_cookiejar
51 except ImportError: # Python 2
52 import cookielib
as compat_cookiejar
55 import html
.entities
as compat_html_entities
56 except ImportError: # Python 2
57 import htmlentitydefs
as compat_html_entities
60 import html
.parser
as compat_html_parser
61 except ImportError: # Python 2
62 import HTMLParser
as compat_html_parser
65 import http
.client
as compat_http_client
66 except ImportError: # Python 2
67 import httplib
as compat_http_client
70 from urllib
.error
import HTTPError
as compat_HTTPError
71 except ImportError: # Python 2
72 from urllib2
import HTTPError
as compat_HTTPError
75 from urllib
.request
import urlretrieve
as compat_urlretrieve
76 except ImportError: # Python 2
77 from urllib
import urlretrieve
as compat_urlretrieve
81 from subprocess
import DEVNULL
82 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
84 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
87 from urllib
.parse
import parse_qs
as compat_parse_qs
88 except ImportError: # Python 2
89 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
90 # Python 2's version is apparently totally broken
91 def _unquote(string
, encoding
='utf-8', errors
='replace'):
94 res
= string
.split('%')
101 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
108 pct_sequence
+= item
[:2].decode('hex')
111 # This segment was just a single percent-encoded character.
112 # May be part of a sequence of code units, so delay decoding.
113 # (Stored in pct_sequence).
117 # Encountered non-percent-encoded characters. Flush the current
119 string
+= pct_sequence
.decode(encoding
, errors
) + rest
122 # Flush the final pct_sequence
123 string
+= pct_sequence
.decode(encoding
, errors
)
126 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
127 encoding
='utf-8', errors
='replace'):
128 qs
, _coerce_result
= qs
, unicode
129 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
131 for name_value
in pairs
:
132 if not name_value
and not strict_parsing
:
134 nv
= name_value
.split('=', 1)
137 raise ValueError("bad query field: %r" % (name_value
,))
138 # Handle case of a control-name with no equal sign
139 if keep_blank_values
:
143 if len(nv
[1]) or keep_blank_values
:
144 name
= nv
[0].replace('+', ' ')
145 name
= _unquote(name
, encoding
=encoding
, errors
=errors
)
146 name
= _coerce_result(name
)
147 value
= nv
[1].replace('+', ' ')
148 value
= _unquote(value
, encoding
=encoding
, errors
=errors
)
149 value
= _coerce_result(value
)
150 r
.append((name
, value
))
153 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
154 encoding
='utf-8', errors
='replace'):
156 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
157 encoding
=encoding
, errors
=errors
)
158 for name
, value
in pairs
:
159 if name
in parsed_result
:
160 parsed_result
[name
].append(value
)
162 parsed_result
[name
] = [value
]
166 compat_str
= unicode # Python 2
171 compat_chr
= unichr # Python 2
176 if type(c
) is int: return c
179 # This is not clearly defined otherwise
180 compiled_regex_type
= type(re
.compile(''))
183 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
184 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
185 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
186 'Accept-Encoding': 'gzip, deflate',
187 'Accept-Language': 'en-us,en;q=0.5',
190 def preferredencoding():
191 """Get preferred encoding.
193 Returns the best encoding scheme for the system, based on
194 locale.getpreferredencoding() and some further tweaks.
197 pref
= locale
.getpreferredencoding()
204 if sys
.version_info
< (3,0):
206 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
209 assert type(s
) == type(u
'')
212 # In Python 2.x, json.dump expects a bytestream.
213 # In Python 3.x, it writes to a character stream
214 if sys
.version_info
< (3,0):
215 def write_json_file(obj
, fn
):
216 with open(fn
, 'wb') as f
:
219 def write_json_file(obj
, fn
):
220 with open(fn
, 'w', encoding
='utf-8') as f
:
223 if sys
.version_info
>= (2,7):
224 def find_xpath_attr(node
, xpath
, key
, val
):
225 """ Find the xpath xpath[@key=val] """
226 assert re
.match(r
'^[a-zA-Z]+$', key
)
227 assert re
.match(r
'^[a-zA-Z0-9@\s]*$', val
)
228 expr
= xpath
+ u
"[@%s='%s']" % (key
, val
)
229 return node
.find(expr
)
231 def find_xpath_attr(node
, xpath
, key
, val
):
232 for f
in node
.findall(xpath
):
233 if f
.attrib
.get(key
) == val
:
237 # On python2.6 the xml.etree.ElementTree.Element methods don't support
238 # the namespace parameter
239 def xpath_with_ns(path
, ns_map
):
240 components
= [c
.split(':') for c
in path
.split('/')]
244 replaced
.append(c
[0])
247 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
248 return '/'.join(replaced
)
250 def htmlentity_transform(matchobj
):
251 """Transforms an HTML entity to a character.
253 This function receives a match object and is intended to be used with
254 the re.sub() function.
256 entity
= matchobj
.group(1)
258 # Known non-numeric HTML entity
259 if entity
in compat_html_entities
.name2codepoint
:
260 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
262 mobj
= re
.match(u
'(?u)#(x?\\d+)', entity
)
264 numstr
= mobj
.group(1)
265 if numstr
.startswith(u
'x'):
267 numstr
= u
'0%s' % numstr
270 return compat_chr(int(numstr
, base
))
272 # Unknown entity in name, return its literal representation
273 return (u
'&%s;' % entity
)
275 compat_html_parser
.locatestarttagend
= re
.compile(r
"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re
.VERBOSE
) # backport bugfix
276 class BaseHTMLParser(compat_html_parser
.HTMLParser
):
278 compat_html_parser
.HTMLParser
.__init
__(self
)
281 def loads(self
, html
):
286 class AttrParser(BaseHTMLParser
):
287 """Modified HTMLParser that isolates a tag with the specified attribute"""
288 def __init__(self
, attribute
, value
):
289 self
.attribute
= attribute
294 self
.watch_startpos
= False
296 BaseHTMLParser
.__init
__(self
)
298 def error(self
, message
):
299 if self
.error_count
> 10 or self
.started
:
300 raise compat_html_parser
.HTMLParseError(message
, self
.getpos())
301 self
.rawdata
= '\n'.join(self
.html
.split('\n')[self
.getpos()[0]:]) # skip one line
302 self
.error_count
+= 1
305 def handle_starttag(self
, tag
, attrs
):
308 self
.find_startpos(None)
309 if self
.attribute
in attrs
and attrs
[self
.attribute
] == self
.value
:
312 self
.watch_startpos
= True
314 if not tag
in self
.depth
: self
.depth
[tag
] = 0
317 def handle_endtag(self
, tag
):
319 if tag
in self
.depth
: self
.depth
[tag
] -= 1
320 if self
.depth
[self
.result
[0]] == 0:
322 self
.result
.append(self
.getpos())
324 def find_startpos(self
, x
):
325 """Needed to put the start position of the result (self.result[1])
326 after the opening tag with the requested id"""
327 if self
.watch_startpos
:
328 self
.watch_startpos
= False
329 self
.result
.append(self
.getpos())
330 handle_entityref
= handle_charref
= handle_data
= handle_comment
= \
331 handle_decl
= handle_pi
= unknown_decl
= find_startpos
333 def get_result(self
):
334 if self
.result
is None:
336 if len(self
.result
) != 3:
338 lines
= self
.html
.split('\n')
339 lines
= lines
[self
.result
[1][0]-1:self
.result
[2][0]]
340 lines
[0] = lines
[0][self
.result
[1][1]:]
342 lines
[-1] = lines
[-1][:self
.result
[2][1]-self
.result
[1][1]]
343 lines
[-1] = lines
[-1][:self
.result
[2][1]]
344 return '\n'.join(lines
).strip()
345 # Hack for https://github.com/rg3/youtube-dl/issues/662
346 if sys
.version_info
< (2, 7, 3):
347 AttrParser
.parse_endtag
= (lambda self
, i
:
348 i
+ len("</scr'+'ipt>")
349 if self
.rawdata
[i
:].startswith("</scr'+'ipt>")
350 else compat_html_parser
.HTMLParser
.parse_endtag(self
, i
))
352 def get_element_by_id(id, html
):
353 """Return the content of the tag with the specified ID in the passed HTML document"""
354 return get_element_by_attribute("id", id, html
)
356 def get_element_by_attribute(attribute
, value
, html
):
357 """Return the content of the tag with the specified attribute in the passed HTML document"""
358 parser
= AttrParser(attribute
, value
)
361 except compat_html_parser
.HTMLParseError
:
363 return parser
.get_result()
365 class MetaParser(BaseHTMLParser
):
367 Modified HTMLParser that isolates a meta tag with the specified name
370 def __init__(self
, name
):
371 BaseHTMLParser
.__init
__(self
)
376 def handle_starttag(self
, tag
, attrs
):
380 if attrs
.get('name') == self
.name
:
381 self
.result
= attrs
.get('content')
383 def get_result(self
):
386 def get_meta_content(name
, html
):
388 Return the content attribute from the meta tag with the given name attribute.
390 parser
= MetaParser(name
)
393 except compat_html_parser
.HTMLParseError
:
395 return parser
.get_result()
398 def clean_html(html
):
399 """Clean an HTML snippet into a readable string"""
401 html
= html
.replace('\n', ' ')
402 html
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
)
403 html
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
)
405 html
= re
.sub('<.*?>', '', html
)
406 # Replace html entities
407 html
= unescapeHTML(html
)
411 def sanitize_open(filename
, open_mode
):
412 """Try to open the given filename, and slightly tweak it if this fails.
414 Attempts to open the given filename. If this fails, it tries to change
415 the filename slightly, step by step, until it's either able to open it
416 or it fails and raises a final exception, like the standard open()
419 It returns the tuple (stream, definitive_file_name).
423 if sys
.platform
== 'win32':
425 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
426 return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
)
427 stream
= open(encodeFilename(filename
), open_mode
)
428 return (stream
, filename
)
429 except (IOError, OSError) as err
:
430 if err
.errno
in (errno
.EACCES
,):
433 # In case of error, try to remove win32 forbidden chars
434 alt_filename
= os
.path
.join(
435 re
.sub(u
'[/<>:"\\|\\\\?\\*]', u
'#', path_part
)
436 for path_part
in os
.path
.split(filename
)
438 if alt_filename
== filename
:
441 # An exception here should be caught in the caller
442 stream
= open(encodeFilename(filename
), open_mode
)
443 return (stream
, alt_filename
)
446 def timeconvert(timestr
):
447 """Convert RFC 2822 defined time string into system timestamp"""
449 timetuple
= email
.utils
.parsedate_tz(timestr
)
450 if timetuple
is not None:
451 timestamp
= email
.utils
.mktime_tz(timetuple
)
454 def sanitize_filename(s
, restricted
=False, is_id
=False):
455 """Sanitizes a string so it could be used as part of a filename.
456 If restricted is set, use a stricter subset of allowed characters.
457 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
459 def replace_insane(char
):
460 if char
== '?' or ord(char
) < 32 or ord(char
) == 127:
463 return '' if restricted
else '\''
465 return '_-' if restricted
else ' -'
466 elif char
in '\\/|*<>':
468 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace()):
470 if restricted
and ord(char
) > 127:
474 result
= u
''.join(map(replace_insane
, s
))
476 while '__' in result
:
477 result
= result
.replace('__', '_')
478 result
= result
.strip('_')
479 # Common case of "Foreign band name - English song title"
480 if restricted
and result
.startswith('-_'):
486 def orderedSet(iterable
):
487 """ Remove all duplicates from the input iterable """
498 assert type(s
) == type(u
'')
500 result
= re
.sub(u
'(?u)&(.+?);', htmlentity_transform
, s
)
503 def encodeFilename(s
):
505 @param s The name of the file
508 assert type(s
) == type(u
'')
510 # Python 3 has a Unicode API
511 if sys
.version_info
>= (3, 0):
514 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
515 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
516 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
517 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
520 encoding
= sys
.getfilesystemencoding()
523 return s
.encode(encoding
, 'ignore')
525 def decodeOption(optval
):
528 if isinstance(optval
, bytes):
529 optval
= optval
.decode(preferredencoding())
531 assert isinstance(optval
, compat_str
)
534 def formatSeconds(secs
):
536 return '%d:%02d:%02d' % (secs
// 3600, (secs
% 3600) // 60, secs
% 60)
538 return '%d:%02d' % (secs
// 60, secs
% 60)
542 def make_HTTPS_handler(opts_no_check_certificate
):
543 if sys
.version_info
< (3, 2):
546 class HTTPSConnectionV3(httplib
.HTTPSConnection
):
547 def __init__(self
, *args
, **kwargs
):
548 httplib
.HTTPSConnection
.__init
__(self
, *args
, **kwargs
)
551 sock
= socket
.create_connection((self
.host
, self
.port
), self
.timeout
)
552 if getattr(self
, '_tunnel_host', False):
556 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_SSLv3
)
558 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_SSLv23
)
560 class HTTPSHandlerV3(compat_urllib_request
.HTTPSHandler
):
561 def https_open(self
, req
):
562 return self
.do_open(HTTPSConnectionV3
, req
)
563 return HTTPSHandlerV3()
565 context
= ssl
.SSLContext(ssl
.PROTOCOL_SSLv3
)
566 context
.verify_mode
= (ssl
.CERT_NONE
567 if opts_no_check_certificate
568 else ssl
.CERT_REQUIRED
)
569 context
.set_default_verify_paths()
571 context
.load_default_certs()
572 except AttributeError:
574 return compat_urllib_request
.HTTPSHandler(context
=context
)
576 class ExtractorError(Exception):
577 """Error during info extraction."""
578 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None):
579 """ tb, if given, is the original traceback (so that it can be printed out).
580 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
583 if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
):
586 msg
= msg
+ u
'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
587 super(ExtractorError
, self
).__init
__(msg
)
590 self
.exc_info
= sys
.exc_info() # preserve original exception
593 def format_traceback(self
):
594 if self
.traceback
is None:
596 return u
''.join(traceback
.format_tb(self
.traceback
))
599 class RegexNotFoundError(ExtractorError
):
600 """Error when a regex didn't match"""
604 class DownloadError(Exception):
605 """Download Error exception.
607 This exception may be thrown by FileDownloader objects if they are not
608 configured to continue on errors. They will contain the appropriate
611 def __init__(self
, msg
, exc_info
=None):
612 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
613 super(DownloadError
, self
).__init
__(msg
)
614 self
.exc_info
= exc_info
617 class SameFileError(Exception):
618 """Same File exception.
620 This exception will be thrown by FileDownloader objects if they detect
621 multiple files would have to be downloaded to the same file on disk.
626 class PostProcessingError(Exception):
627 """Post Processing exception.
629 This exception may be raised by PostProcessor's .run() method to
630 indicate an error in the postprocessing task.
632 def __init__(self
, msg
):
635 class MaxDownloadsReached(Exception):
636 """ --max-downloads limit has been reached. """
640 class UnavailableVideoError(Exception):
641 """Unavailable Format exception.
643 This exception will be thrown when a video is requested
644 in a format that is not available for that video.
649 class ContentTooShortError(Exception):
650 """Content Too Short exception.
652 This exception may be raised by FileDownloader objects when a file they
653 download is too small for what the server announced first, indicating
654 the connection was probably interrupted.
660 def __init__(self
, downloaded
, expected
):
661 self
.downloaded
= downloaded
662 self
.expected
= expected
664 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
665 """Handler for HTTP requests and responses.
667 This class, when installed with an OpenerDirector, automatically adds
668 the standard headers to every HTTP request and handles gzipped and
669 deflated responses from web servers. If compression is to be avoided in
670 a particular request, the original request in the program code only has
671 to include the HTTP header "Youtubedl-No-Compression", which will be
672 removed before making the real request.
674 Part of this code was copied from:
676 http://techknack.net/python-urllib2-handlers/
678 Andrew Rowls, the author of that code, agreed to release it to the
685 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
687 return zlib
.decompress(data
)
690 def addinfourl_wrapper(stream
, headers
, url
, code
):
691 if hasattr(compat_urllib_request
.addinfourl
, 'getcode'):
692 return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
)
693 ret
= compat_urllib_request
.addinfourl(stream
, headers
, url
)
697 def http_request(self
, req
):
698 for h
,v
in std_headers
.items():
702 if 'Youtubedl-no-compression' in req
.headers
:
703 if 'Accept-encoding' in req
.headers
:
704 del req
.headers
['Accept-encoding']
705 del req
.headers
['Youtubedl-no-compression']
706 if 'Youtubedl-user-agent' in req
.headers
:
707 if 'User-agent' in req
.headers
:
708 del req
.headers
['User-agent']
709 req
.headers
['User-agent'] = req
.headers
['Youtubedl-user-agent']
710 del req
.headers
['Youtubedl-user-agent']
713 def http_response(self
, req
, resp
):
716 if resp
.headers
.get('Content-encoding', '') == 'gzip':
717 content
= resp
.read()
718 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
720 uncompressed
= io
.BytesIO(gz
.read())
721 except IOError as original_ioerror
:
722 # There may be junk add the end of the file
723 # See http://stackoverflow.com/q/4928560/35070 for details
724 for i
in range(1, 1024):
726 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
727 uncompressed
= io
.BytesIO(gz
.read())
732 raise original_ioerror
733 resp
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
734 resp
.msg
= old_resp
.msg
736 if resp
.headers
.get('Content-encoding', '') == 'deflate':
737 gz
= io
.BytesIO(self
.deflate(resp
.read()))
738 resp
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
739 resp
.msg
= old_resp
.msg
742 https_request
= http_request
743 https_response
= http_response
745 def unified_strdate(date_str
):
746 """Return a string with the date in the format YYYYMMDD"""
749 date_str
= date_str
.replace(',',' ')
750 # %z (UTC offset) is only supported in python>=3.2
751 date_str
= re
.sub(r
' (\+|-)[\d]*$', '', date_str
)
752 format_expressions
= [
760 '%Y-%m-%dT%H:%M:%SZ',
761 '%Y-%m-%dT%H:%M:%S.%fZ',
762 '%Y-%m-%dT%H:%M:%S.%f0Z',
765 for expression
in format_expressions
:
767 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
770 if upload_date
is None:
771 timetuple
= email
.utils
.parsedate_tz(date_str
)
773 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
776 def determine_ext(url
, default_ext
=u
'unknown_video'):
777 guess
= url
.partition(u
'?')[0].rpartition(u
'.')[2]
778 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
783 def subtitles_filename(filename
, sub_lang
, sub_format
):
784 return filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
786 def date_from_str(date_str
):
788 Return a datetime object from a string in the format YYYYMMDD or
789 (now|today)[+-][0-9](day|week|month|year)(s)?"""
790 today
= datetime
.date
.today()
791 if date_str
== 'now'or date_str
== 'today':
793 match
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
)
794 if match
is not None:
795 sign
= match
.group('sign')
796 time
= int(match
.group('time'))
799 unit
= match
.group('unit')
808 delta
= datetime
.timedelta(**{unit
: time
})
810 return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date()
812 class DateRange(object):
813 """Represents a time interval between two dates"""
814 def __init__(self
, start
=None, end
=None):
815 """start and end must be strings in the format accepted by date"""
816 if start
is not None:
817 self
.start
= date_from_str(start
)
819 self
.start
= datetime
.datetime
.min.date()
821 self
.end
= date_from_str(end
)
823 self
.end
= datetime
.datetime
.max.date()
824 if self
.start
> self
.end
:
825 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
828 """Returns a range that only contains the given day"""
830 def __contains__(self
, date
):
831 """Check if the date is in the range"""
832 if not isinstance(date
, datetime
.date
):
833 date
= date_from_str(date
)
834 return self
.start
<= date
<= self
.end
836 return '%s - %s' % ( self
.start
.isoformat(), self
.end
.isoformat())
840 """ Returns the platform name as a compat_str """
841 res
= platform
.platform()
842 if isinstance(res
, bytes):
843 res
= res
.decode(preferredencoding())
845 assert isinstance(res
, compat_str
)
849 def write_string(s
, out
=None):
852 assert type(s
) == type(u
'')
854 if ('b' in getattr(out
, 'mode', '') or
855 sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
856 s
= s
.encode(preferredencoding(), 'ignore')
861 def bytes_to_intlist(bs
):
864 if isinstance(bs
[0], int): # Python 3
867 return [ord(c
) for c
in bs
]
870 def intlist_to_bytes(xs
):
873 if isinstance(chr(0), bytes): # Python 2
874 return ''.join([chr(x
) for x
in xs
])
879 def get_cachedir(params
={}):
880 cache_root
= os
.environ
.get('XDG_CACHE_HOME',
881 os
.path
.expanduser('~/.cache'))
882 return params
.get('cachedir', os
.path
.join(cache_root
, 'youtube-dl'))
885 # Cross-platform file locking
886 if sys
.platform
== 'win32':
887 import ctypes
.wintypes
890 class OVERLAPPED(ctypes
.Structure
):
892 ('Internal', ctypes
.wintypes
.LPVOID
),
893 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
894 ('Offset', ctypes
.wintypes
.DWORD
),
895 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
896 ('hEvent', ctypes
.wintypes
.HANDLE
),
899 kernel32
= ctypes
.windll
.kernel32
900 LockFileEx
= kernel32
.LockFileEx
901 LockFileEx
.argtypes
= [
902 ctypes
.wintypes
.HANDLE
, # hFile
903 ctypes
.wintypes
.DWORD
, # dwFlags
904 ctypes
.wintypes
.DWORD
, # dwReserved
905 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
906 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
907 ctypes
.POINTER(OVERLAPPED
) # Overlapped
909 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
910 UnlockFileEx
= kernel32
.UnlockFileEx
911 UnlockFileEx
.argtypes
= [
912 ctypes
.wintypes
.HANDLE
, # hFile
913 ctypes
.wintypes
.DWORD
, # dwReserved
914 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
915 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
916 ctypes
.POINTER(OVERLAPPED
) # Overlapped
918 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
919 whole_low
= 0xffffffff
920 whole_high
= 0x7fffffff
922 def _lock_file(f
, exclusive
):
923 overlapped
= OVERLAPPED()
924 overlapped
.Offset
= 0
925 overlapped
.OffsetHigh
= 0
926 overlapped
.hEvent
= 0
927 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
928 handle
= msvcrt
.get_osfhandle(f
.fileno())
929 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
930 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
931 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
934 assert f
._lock
_file
_overlapped
_p
935 handle
= msvcrt
.get_osfhandle(f
.fileno())
936 if not UnlockFileEx(handle
, 0,
937 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
938 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
943 def _lock_file(f
, exclusive
):
944 fcntl
.lockf(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
947 fcntl
.lockf(f
, fcntl
.LOCK_UN
)
950 class locked_file(object):
951 def __init__(self
, filename
, mode
, encoding
=None):
952 assert mode
in ['r', 'a', 'w']
953 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
957 exclusive
= self
.mode
!= 'r'
959 _lock_file(self
.f
, exclusive
)
965 def __exit__(self
, etype
, value
, traceback
):
974 def write(self
, *args
):
975 return self
.f
.write(*args
)
977 def read(self
, *args
):
978 return self
.f
.read(*args
)
981 def shell_quote(args
):
983 encoding
= sys
.getfilesystemencoding()
987 if isinstance(a
, bytes):
988 # We may get a filename encoded with 'encodeFilename'
989 a
= a
.decode(encoding
)
990 quoted_args
.append(pipes
.quote(a
))
991 return u
' '.join(quoted_args
)
994 def takewhile_inclusive(pred
, seq
):
995 """ Like itertools.takewhile, but include the latest evaluated element
996 (the first element so that Not pred(e)) """
1003 def smuggle_url(url
, data
):
1004 """ Pass additional data in a URL for internal use. """
1006 sdata
= compat_urllib_parse
.urlencode(
1007 {u
'__youtubedl_smuggle': json
.dumps(data
)})
1008 return url
+ u
'#' + sdata
1011 def unsmuggle_url(smug_url
):
1012 if not '#__youtubedl_smuggle' in smug_url
:
1013 return smug_url
, None
1014 url
, _
, sdata
= smug_url
.rpartition(u
'#')
1015 jsond
= compat_parse_qs(sdata
)[u
'__youtubedl_smuggle'][0]
1016 data
= json
.loads(jsond
)
1020 def format_bytes(bytes):
1023 if type(bytes) is str:
1024 bytes = float(bytes)
1028 exponent
= int(math
.log(bytes, 1024.0))
1029 suffix
= [u
'B', u
'KiB', u
'MiB', u
'GiB', u
'TiB', u
'PiB', u
'EiB', u
'ZiB', u
'YiB'][exponent
]
1030 converted
= float(bytes) / float(1024 ** exponent
)
1031 return u
'%.2f%s' % (converted
, suffix
)
1034 def str_to_int(int_str
):
1035 int_str
= re
.sub(r
'[,\.]', u
'', int_str
)
1039 def get_term_width():
1040 columns
= os
.environ
.get('COLUMNS', None)
1045 sp
= subprocess
.Popen(
1047 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
1048 out
, err
= sp
.communicate()
1049 return int(out
.split()[1])
1055 def month_by_name(name
):
1056 """ Return the number of a month by (locale-independently) English name """
1059 u
'January', u
'February', u
'March', u
'April', u
'May', u
'June',
1060 u
'July', u
'August', u
'September', u
'October', u
'November', u
'December']
1062 return ENGLISH_NAMES
.index(name
) + 1
1067 def fix_xml_all_ampersand(xml_str
):
1068 """Replace all the '&' by '&' in XML"""
1069 return xml_str
.replace(u
'&', u
'&')
1072 def setproctitle(title
):
1073 assert isinstance(title
, type(u
''))
1075 libc
= ctypes
.cdll
.LoadLibrary("libc.so.6")
1079 buf
= ctypes
.create_string_buffer(len(title
) + 1)
1080 buf
.value
= title
.encode('utf-8')
1082 libc
.prctl(15, ctypes
.byref(buf
), 0, 0, 0)
1083 except AttributeError:
1084 return # Strange libc, just skip this
1087 def remove_start(s
, start
):
1088 if s
.startswith(start
):
1089 return s
[len(start
):]
1093 def url_basename(url
):
1094 path
= compat_urlparse
.urlparse(url
).path
1095 return path
.strip(u
'/').split(u
'/')[-1]
1098 class HEADRequest(compat_urllib_request
.Request
):
1099 def get_method(self
):