2 # -*- coding: utf-8 -*-
20 import xml
.etree
.ElementTree
24 import urllib
.request
as compat_urllib_request
25 except ImportError: # Python 2
26 import urllib2
as compat_urllib_request
29 import urllib
.error
as compat_urllib_error
30 except ImportError: # Python 2
31 import urllib2
as compat_urllib_error
34 import urllib
.parse
as compat_urllib_parse
35 except ImportError: # Python 2
36 import urllib
as compat_urllib_parse
39 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
40 except ImportError: # Python 2
41 from urlparse
import urlparse
as compat_urllib_parse_urlparse
44 import urllib
.parse
as compat_urlparse
45 except ImportError: # Python 2
46 import urlparse
as compat_urlparse
49 import http
.cookiejar
as compat_cookiejar
50 except ImportError: # Python 2
51 import cookielib
as compat_cookiejar
54 import html
.entities
as compat_html_entities
55 except ImportError: # Python 2
56 import htmlentitydefs
as compat_html_entities
59 import html
.parser
as compat_html_parser
60 except ImportError: # Python 2
61 import HTMLParser
as compat_html_parser
64 import http
.client
as compat_http_client
65 except ImportError: # Python 2
66 import httplib
as compat_http_client
69 from urllib
.error
import HTTPError
as compat_HTTPError
70 except ImportError: # Python 2
71 from urllib2
import HTTPError
as compat_HTTPError
74 from urllib
.request
import urlretrieve
as compat_urlretrieve
75 except ImportError: # Python 2
76 from urllib
import urlretrieve
as compat_urlretrieve
80 from subprocess
import DEVNULL
81 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
83 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
86 from urllib
.parse
import parse_qs
as compat_parse_qs
87 except ImportError: # Python 2
88 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
89 # Python 2's version is apparently totally broken
90 def _unquote(string
, encoding
='utf-8', errors
='replace'):
93 res
= string
.split('%')
100 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
107 pct_sequence
+= item
[:2].decode('hex')
110 # This segment was just a single percent-encoded character.
111 # May be part of a sequence of code units, so delay decoding.
112 # (Stored in pct_sequence).
116 # Encountered non-percent-encoded characters. Flush the current
118 string
+= pct_sequence
.decode(encoding
, errors
) + rest
121 # Flush the final pct_sequence
122 string
+= pct_sequence
.decode(encoding
, errors
)
125 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
126 encoding
='utf-8', errors
='replace'):
127 qs
, _coerce_result
= qs
, unicode
128 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
130 for name_value
in pairs
:
131 if not name_value
and not strict_parsing
:
133 nv
= name_value
.split('=', 1)
136 raise ValueError("bad query field: %r" % (name_value
,))
137 # Handle case of a control-name with no equal sign
138 if keep_blank_values
:
142 if len(nv
[1]) or keep_blank_values
:
143 name
= nv
[0].replace('+', ' ')
144 name
= _unquote(name
, encoding
=encoding
, errors
=errors
)
145 name
= _coerce_result(name
)
146 value
= nv
[1].replace('+', ' ')
147 value
= _unquote(value
, encoding
=encoding
, errors
=errors
)
148 value
= _coerce_result(value
)
149 r
.append((name
, value
))
152 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
153 encoding
='utf-8', errors
='replace'):
155 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
156 encoding
=encoding
, errors
=errors
)
157 for name
, value
in pairs
:
158 if name
in parsed_result
:
159 parsed_result
[name
].append(value
)
161 parsed_result
[name
] = [value
]
165 compat_str
= unicode # Python 2
170 compat_chr
= unichr # Python 2
175 if type(c
) is int: return c
178 # This is not clearly defined otherwise
179 compiled_regex_type
= type(re
.compile(''))
182 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
183 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
184 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
185 'Accept-Encoding': 'gzip, deflate',
186 'Accept-Language': 'en-us,en;q=0.5',
189 def preferredencoding():
190 """Get preferred encoding.
192 Returns the best encoding scheme for the system, based on
193 locale.getpreferredencoding() and some further tweaks.
196 pref
= locale
.getpreferredencoding()
203 if sys
.version_info
< (3,0):
205 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
208 assert type(s
) == type(u
'')
211 # In Python 2.x, json.dump expects a bytestream.
212 # In Python 3.x, it writes to a character stream
213 if sys
.version_info
< (3,0):
214 def write_json_file(obj
, fn
):
215 with open(fn
, 'wb') as f
:
218 def write_json_file(obj
, fn
):
219 with open(fn
, 'w', encoding
='utf-8') as f
:
222 if sys
.version_info
>= (2,7):
223 def find_xpath_attr(node
, xpath
, key
, val
):
224 """ Find the xpath xpath[@key=val] """
225 assert re
.match(r
'^[a-zA-Z]+$', key
)
226 assert re
.match(r
'^[a-zA-Z0-9@\s]*$', val
)
227 expr
= xpath
+ u
"[@%s='%s']" % (key
, val
)
228 return node
.find(expr
)
230 def find_xpath_attr(node
, xpath
, key
, val
):
231 for f
in node
.findall(xpath
):
232 if f
.attrib
.get(key
) == val
:
236 # On python2.6 the xml.etree.ElementTree.Element methods don't support
237 # the namespace parameter
238 def xpath_with_ns(path
, ns_map
):
239 components
= [c
.split(':') for c
in path
.split('/')]
243 replaced
.append(c
[0])
246 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
247 return '/'.join(replaced
)
249 def htmlentity_transform(matchobj
):
250 """Transforms an HTML entity to a character.
252 This function receives a match object and is intended to be used with
253 the re.sub() function.
255 entity
= matchobj
.group(1)
257 # Known non-numeric HTML entity
258 if entity
in compat_html_entities
.name2codepoint
:
259 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
261 mobj
= re
.match(u
'(?u)#(x?\\d+)', entity
)
263 numstr
= mobj
.group(1)
264 if numstr
.startswith(u
'x'):
266 numstr
= u
'0%s' % numstr
269 return compat_chr(int(numstr
, base
))
271 # Unknown entity in name, return its literal representation
272 return (u
'&%s;' % entity
)
274 compat_html_parser
.locatestarttagend
= re
.compile(r
"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re
.VERBOSE
) # backport bugfix
275 class BaseHTMLParser(compat_html_parser
.HTMLParser
):
277 compat_html_parser
.HTMLParser
.__init
__(self
)
280 def loads(self
, html
):
285 class AttrParser(BaseHTMLParser
):
286 """Modified HTMLParser that isolates a tag with the specified attribute"""
287 def __init__(self
, attribute
, value
):
288 self
.attribute
= attribute
293 self
.watch_startpos
= False
295 BaseHTMLParser
.__init
__(self
)
297 def error(self
, message
):
298 if self
.error_count
> 10 or self
.started
:
299 raise compat_html_parser
.HTMLParseError(message
, self
.getpos())
300 self
.rawdata
= '\n'.join(self
.html
.split('\n')[self
.getpos()[0]:]) # skip one line
301 self
.error_count
+= 1
304 def handle_starttag(self
, tag
, attrs
):
307 self
.find_startpos(None)
308 if self
.attribute
in attrs
and attrs
[self
.attribute
] == self
.value
:
311 self
.watch_startpos
= True
313 if not tag
in self
.depth
: self
.depth
[tag
] = 0
316 def handle_endtag(self
, tag
):
318 if tag
in self
.depth
: self
.depth
[tag
] -= 1
319 if self
.depth
[self
.result
[0]] == 0:
321 self
.result
.append(self
.getpos())
323 def find_startpos(self
, x
):
324 """Needed to put the start position of the result (self.result[1])
325 after the opening tag with the requested id"""
326 if self
.watch_startpos
:
327 self
.watch_startpos
= False
328 self
.result
.append(self
.getpos())
329 handle_entityref
= handle_charref
= handle_data
= handle_comment
= \
330 handle_decl
= handle_pi
= unknown_decl
= find_startpos
332 def get_result(self
):
333 if self
.result
is None:
335 if len(self
.result
) != 3:
337 lines
= self
.html
.split('\n')
338 lines
= lines
[self
.result
[1][0]-1:self
.result
[2][0]]
339 lines
[0] = lines
[0][self
.result
[1][1]:]
341 lines
[-1] = lines
[-1][:self
.result
[2][1]-self
.result
[1][1]]
342 lines
[-1] = lines
[-1][:self
.result
[2][1]]
343 return '\n'.join(lines
).strip()
344 # Hack for https://github.com/rg3/youtube-dl/issues/662
345 if sys
.version_info
< (2, 7, 3):
346 AttrParser
.parse_endtag
= (lambda self
, i
:
347 i
+ len("</scr'+'ipt>")
348 if self
.rawdata
[i
:].startswith("</scr'+'ipt>")
349 else compat_html_parser
.HTMLParser
.parse_endtag(self
, i
))
351 def get_element_by_id(id, html
):
352 """Return the content of the tag with the specified ID in the passed HTML document"""
353 return get_element_by_attribute("id", id, html
)
355 def get_element_by_attribute(attribute
, value
, html
):
356 """Return the content of the tag with the specified attribute in the passed HTML document"""
357 parser
= AttrParser(attribute
, value
)
360 except compat_html_parser
.HTMLParseError
:
362 return parser
.get_result()
364 class MetaParser(BaseHTMLParser
):
366 Modified HTMLParser that isolates a meta tag with the specified name
369 def __init__(self
, name
):
370 BaseHTMLParser
.__init
__(self
)
375 def handle_starttag(self
, tag
, attrs
):
379 if attrs
.get('name') == self
.name
:
380 self
.result
= attrs
.get('content')
382 def get_result(self
):
385 def get_meta_content(name
, html
):
387 Return the content attribute from the meta tag with the given name attribute.
389 parser
= MetaParser(name
)
392 except compat_html_parser
.HTMLParseError
:
394 return parser
.get_result()
397 def clean_html(html
):
398 """Clean an HTML snippet into a readable string"""
400 html
= html
.replace('\n', ' ')
401 html
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
)
402 html
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
)
404 html
= re
.sub('<.*?>', '', html
)
405 # Replace html entities
406 html
= unescapeHTML(html
)
410 def sanitize_open(filename
, open_mode
):
411 """Try to open the given filename, and slightly tweak it if this fails.
413 Attempts to open the given filename. If this fails, it tries to change
414 the filename slightly, step by step, until it's either able to open it
415 or it fails and raises a final exception, like the standard open()
418 It returns the tuple (stream, definitive_file_name).
422 if sys
.platform
== 'win32':
424 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
425 return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
)
426 stream
= open(encodeFilename(filename
), open_mode
)
427 return (stream
, filename
)
428 except (IOError, OSError) as err
:
429 if err
.errno
in (errno
.EACCES
,):
432 # In case of error, try to remove win32 forbidden chars
433 alt_filename
= os
.path
.join(
434 re
.sub(u
'[/<>:"\\|\\\\?\\*]', u
'#', path_part
)
435 for path_part
in os
.path
.split(filename
)
437 if alt_filename
== filename
:
440 # An exception here should be caught in the caller
441 stream
= open(encodeFilename(filename
), open_mode
)
442 return (stream
, alt_filename
)
445 def timeconvert(timestr
):
446 """Convert RFC 2822 defined time string into system timestamp"""
448 timetuple
= email
.utils
.parsedate_tz(timestr
)
449 if timetuple
is not None:
450 timestamp
= email
.utils
.mktime_tz(timetuple
)
453 def sanitize_filename(s
, restricted
=False, is_id
=False):
454 """Sanitizes a string so it could be used as part of a filename.
455 If restricted is set, use a stricter subset of allowed characters.
456 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
458 def replace_insane(char
):
459 if char
== '?' or ord(char
) < 32 or ord(char
) == 127:
462 return '' if restricted
else '\''
464 return '_-' if restricted
else ' -'
465 elif char
in '\\/|*<>':
467 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace()):
469 if restricted
and ord(char
) > 127:
473 result
= u
''.join(map(replace_insane
, s
))
475 while '__' in result
:
476 result
= result
.replace('__', '_')
477 result
= result
.strip('_')
478 # Common case of "Foreign band name - English song title"
479 if restricted
and result
.startswith('-_'):
485 def orderedSet(iterable
):
486 """ Remove all duplicates from the input iterable """
497 assert type(s
) == type(u
'')
499 result
= re
.sub(u
'(?u)&(.+?);', htmlentity_transform
, s
)
502 def encodeFilename(s
):
504 @param s The name of the file
507 assert type(s
) == type(u
'')
509 # Python 3 has a Unicode API
510 if sys
.version_info
>= (3, 0):
513 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
514 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
515 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
516 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
519 encoding
= sys
.getfilesystemencoding()
522 return s
.encode(encoding
, 'ignore')
524 def decodeOption(optval
):
527 if isinstance(optval
, bytes):
528 optval
= optval
.decode(preferredencoding())
530 assert isinstance(optval
, compat_str
)
533 def formatSeconds(secs
):
535 return '%d:%02d:%02d' % (secs
// 3600, (secs
% 3600) // 60, secs
% 60)
537 return '%d:%02d' % (secs
// 60, secs
% 60)
541 def make_HTTPS_handler(opts_no_check_certificate
):
542 if sys
.version_info
< (3, 2):
545 class HTTPSConnectionV3(httplib
.HTTPSConnection
):
546 def __init__(self
, *args
, **kwargs
):
547 httplib
.HTTPSConnection
.__init
__(self
, *args
, **kwargs
)
550 sock
= socket
.create_connection((self
.host
, self
.port
), self
.timeout
)
551 if self
._tunnel
_host
:
555 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_SSLv3
)
557 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_SSLv23
)
559 class HTTPSHandlerV3(compat_urllib_request
.HTTPSHandler
):
560 def https_open(self
, req
):
561 return self
.do_open(HTTPSConnectionV3
, req
)
562 return HTTPSHandlerV3()
564 context
= ssl
.SSLContext(ssl
.PROTOCOL_SSLv3
)
565 context
.set_default_verify_paths()
567 context
.verify_mode
= (ssl
.CERT_NONE
568 if opts_no_check_certificate
569 else ssl
.CERT_REQUIRED
)
570 return compat_urllib_request
.HTTPSHandler(context
=context
)
572 class ExtractorError(Exception):
573 """Error during info extraction."""
574 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None):
575 """ tb, if given, is the original traceback (so that it can be printed out).
576 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
579 if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
):
582 msg
= msg
+ u
'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
583 super(ExtractorError
, self
).__init
__(msg
)
586 self
.exc_info
= sys
.exc_info() # preserve original exception
589 def format_traceback(self
):
590 if self
.traceback
is None:
592 return u
''.join(traceback
.format_tb(self
.traceback
))
595 class RegexNotFoundError(ExtractorError
):
596 """Error when a regex didn't match"""
600 class DownloadError(Exception):
601 """Download Error exception.
603 This exception may be thrown by FileDownloader objects if they are not
604 configured to continue on errors. They will contain the appropriate
607 def __init__(self
, msg
, exc_info
=None):
608 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
609 super(DownloadError
, self
).__init
__(msg
)
610 self
.exc_info
= exc_info
613 class SameFileError(Exception):
614 """Same File exception.
616 This exception will be thrown by FileDownloader objects if they detect
617 multiple files would have to be downloaded to the same file on disk.
622 class PostProcessingError(Exception):
623 """Post Processing exception.
625 This exception may be raised by PostProcessor's .run() method to
626 indicate an error in the postprocessing task.
628 def __init__(self
, msg
):
631 class MaxDownloadsReached(Exception):
632 """ --max-downloads limit has been reached. """
636 class UnavailableVideoError(Exception):
637 """Unavailable Format exception.
639 This exception will be thrown when a video is requested
640 in a format that is not available for that video.
645 class ContentTooShortError(Exception):
646 """Content Too Short exception.
648 This exception may be raised by FileDownloader objects when a file they
649 download is too small for what the server announced first, indicating
650 the connection was probably interrupted.
656 def __init__(self
, downloaded
, expected
):
657 self
.downloaded
= downloaded
658 self
.expected
= expected
660 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
661 """Handler for HTTP requests and responses.
663 This class, when installed with an OpenerDirector, automatically adds
664 the standard headers to every HTTP request and handles gzipped and
665 deflated responses from web servers. If compression is to be avoided in
666 a particular request, the original request in the program code only has
667 to include the HTTP header "Youtubedl-No-Compression", which will be
668 removed before making the real request.
670 Part of this code was copied from:
672 http://techknack.net/python-urllib2-handlers/
674 Andrew Rowls, the author of that code, agreed to release it to the
681 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
683 return zlib
.decompress(data
)
686 def addinfourl_wrapper(stream
, headers
, url
, code
):
687 if hasattr(compat_urllib_request
.addinfourl
, 'getcode'):
688 return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
)
689 ret
= compat_urllib_request
.addinfourl(stream
, headers
, url
)
693 def http_request(self
, req
):
694 for h
,v
in std_headers
.items():
698 if 'Youtubedl-no-compression' in req
.headers
:
699 if 'Accept-encoding' in req
.headers
:
700 del req
.headers
['Accept-encoding']
701 del req
.headers
['Youtubedl-no-compression']
702 if 'Youtubedl-user-agent' in req
.headers
:
703 if 'User-agent' in req
.headers
:
704 del req
.headers
['User-agent']
705 req
.headers
['User-agent'] = req
.headers
['Youtubedl-user-agent']
706 del req
.headers
['Youtubedl-user-agent']
709 def http_response(self
, req
, resp
):
712 if resp
.headers
.get('Content-encoding', '') == 'gzip':
713 content
= resp
.read()
714 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
716 uncompressed
= io
.BytesIO(gz
.read())
717 except IOError as original_ioerror
:
718 # There may be junk add the end of the file
719 # See http://stackoverflow.com/q/4928560/35070 for details
720 for i
in range(1, 1024):
722 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
723 uncompressed
= io
.BytesIO(gz
.read())
728 raise original_ioerror
729 resp
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
730 resp
.msg
= old_resp
.msg
732 if resp
.headers
.get('Content-encoding', '') == 'deflate':
733 gz
= io
.BytesIO(self
.deflate(resp
.read()))
734 resp
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
735 resp
.msg
= old_resp
.msg
738 https_request
= http_request
739 https_response
= http_response
741 def unified_strdate(date_str
):
742 """Return a string with the date in the format YYYYMMDD"""
745 date_str
= date_str
.replace(',',' ')
746 # %z (UTC offset) is only supported in python>=3.2
747 date_str
= re
.sub(r
' (\+|-)[\d]*$', '', date_str
)
748 format_expressions
= [
756 '%Y-%m-%dT%H:%M:%SZ',
757 '%Y-%m-%dT%H:%M:%S.%fZ',
758 '%Y-%m-%dT%H:%M:%S.%f0Z',
761 for expression
in format_expressions
:
763 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
768 def determine_ext(url
, default_ext
=u
'unknown_video'):
769 guess
= url
.partition(u
'?')[0].rpartition(u
'.')[2]
770 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
775 def subtitles_filename(filename
, sub_lang
, sub_format
):
776 return filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
778 def date_from_str(date_str
):
780 Return a datetime object from a string in the format YYYYMMDD or
781 (now|today)[+-][0-9](day|week|month|year)(s)?"""
782 today
= datetime
.date
.today()
783 if date_str
== 'now'or date_str
== 'today':
785 match
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
)
786 if match
is not None:
787 sign
= match
.group('sign')
788 time
= int(match
.group('time'))
791 unit
= match
.group('unit')
800 delta
= datetime
.timedelta(**{unit
: time
})
802 return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date()
804 class DateRange(object):
805 """Represents a time interval between two dates"""
806 def __init__(self
, start
=None, end
=None):
807 """start and end must be strings in the format accepted by date"""
808 if start
is not None:
809 self
.start
= date_from_str(start
)
811 self
.start
= datetime
.datetime
.min.date()
813 self
.end
= date_from_str(end
)
815 self
.end
= datetime
.datetime
.max.date()
816 if self
.start
> self
.end
:
817 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
820 """Returns a range that only contains the given day"""
822 def __contains__(self
, date
):
823 """Check if the date is in the range"""
824 if not isinstance(date
, datetime
.date
):
825 date
= date_from_str(date
)
826 return self
.start
<= date
<= self
.end
828 return '%s - %s' % ( self
.start
.isoformat(), self
.end
.isoformat())
832 """ Returns the platform name as a compat_str """
833 res
= platform
.platform()
834 if isinstance(res
, bytes):
835 res
= res
.decode(preferredencoding())
837 assert isinstance(res
, compat_str
)
841 def write_string(s
, out
=None):
844 assert type(s
) == type(u
'')
846 if ('b' in getattr(out
, 'mode', '') or
847 sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
848 s
= s
.encode(preferredencoding(), 'ignore')
853 def bytes_to_intlist(bs
):
856 if isinstance(bs
[0], int): # Python 3
859 return [ord(c
) for c
in bs
]
862 def intlist_to_bytes(xs
):
865 if isinstance(chr(0), bytes): # Python 2
866 return ''.join([chr(x
) for x
in xs
])
871 def get_cachedir(params
={}):
872 cache_root
= os
.environ
.get('XDG_CACHE_HOME',
873 os
.path
.expanduser('~/.cache'))
874 return params
.get('cachedir', os
.path
.join(cache_root
, 'youtube-dl'))
877 # Cross-platform file locking
878 if sys
.platform
== 'win32':
879 import ctypes
.wintypes
882 class OVERLAPPED(ctypes
.Structure
):
884 ('Internal', ctypes
.wintypes
.LPVOID
),
885 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
886 ('Offset', ctypes
.wintypes
.DWORD
),
887 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
888 ('hEvent', ctypes
.wintypes
.HANDLE
),
891 kernel32
= ctypes
.windll
.kernel32
892 LockFileEx
= kernel32
.LockFileEx
893 LockFileEx
.argtypes
= [
894 ctypes
.wintypes
.HANDLE
, # hFile
895 ctypes
.wintypes
.DWORD
, # dwFlags
896 ctypes
.wintypes
.DWORD
, # dwReserved
897 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
898 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
899 ctypes
.POINTER(OVERLAPPED
) # Overlapped
901 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
902 UnlockFileEx
= kernel32
.UnlockFileEx
903 UnlockFileEx
.argtypes
= [
904 ctypes
.wintypes
.HANDLE
, # hFile
905 ctypes
.wintypes
.DWORD
, # dwReserved
906 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
907 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
908 ctypes
.POINTER(OVERLAPPED
) # Overlapped
910 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
911 whole_low
= 0xffffffff
912 whole_high
= 0x7fffffff
914 def _lock_file(f
, exclusive
):
915 overlapped
= OVERLAPPED()
916 overlapped
.Offset
= 0
917 overlapped
.OffsetHigh
= 0
918 overlapped
.hEvent
= 0
919 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
920 handle
= msvcrt
.get_osfhandle(f
.fileno())
921 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
922 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
923 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
926 assert f
._lock
_file
_overlapped
_p
927 handle
= msvcrt
.get_osfhandle(f
.fileno())
928 if not UnlockFileEx(handle
, 0,
929 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
930 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
935 def _lock_file(f
, exclusive
):
936 fcntl
.lockf(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
939 fcntl
.lockf(f
, fcntl
.LOCK_UN
)
942 class locked_file(object):
943 def __init__(self
, filename
, mode
, encoding
=None):
944 assert mode
in ['r', 'a', 'w']
945 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
949 exclusive
= self
.mode
!= 'r'
951 _lock_file(self
.f
, exclusive
)
957 def __exit__(self
, etype
, value
, traceback
):
966 def write(self
, *args
):
967 return self
.f
.write(*args
)
969 def read(self
, *args
):
970 return self
.f
.read(*args
)
973 def shell_quote(args
):
975 encoding
= sys
.getfilesystemencoding()
979 if isinstance(a
, bytes):
980 # We may get a filename encoded with 'encodeFilename'
981 a
= a
.decode(encoding
)
982 quoted_args
.append(pipes
.quote(a
))
983 return u
' '.join(quoted_args
)
986 def takewhile_inclusive(pred
, seq
):
987 """ Like itertools.takewhile, but include the latest evaluated element
988 (the first element so that Not pred(e)) """
995 def smuggle_url(url
, data
):
996 """ Pass additional data in a URL for internal use. """
998 sdata
= compat_urllib_parse
.urlencode(
999 {u
'__youtubedl_smuggle': json
.dumps(data
)})
1000 return url
+ u
'#' + sdata
1003 def unsmuggle_url(smug_url
):
1004 if not '#__youtubedl_smuggle' in smug_url
:
1005 return smug_url
, None
1006 url
, _
, sdata
= smug_url
.rpartition(u
'#')
1007 jsond
= compat_parse_qs(sdata
)[u
'__youtubedl_smuggle'][0]
1008 data
= json
.loads(jsond
)
1012 def format_bytes(bytes):
1015 if type(bytes) is str:
1016 bytes = float(bytes)
1020 exponent
= int(math
.log(bytes, 1024.0))
1021 suffix
= [u
'B', u
'KiB', u
'MiB', u
'GiB', u
'TiB', u
'PiB', u
'EiB', u
'ZiB', u
'YiB'][exponent
]
1022 converted
= float(bytes) / float(1024 ** exponent
)
1023 return u
'%.2f%s' % (converted
, suffix
)