2 # -*- coding: utf-8 -*-
27 import urllib
.request
as compat_urllib_request
28 except ImportError: # Python 2
29 import urllib2
as compat_urllib_request
32 import urllib
.error
as compat_urllib_error
33 except ImportError: # Python 2
34 import urllib2
as compat_urllib_error
37 import urllib
.parse
as compat_urllib_parse
38 except ImportError: # Python 2
39 import urllib
as compat_urllib_parse
42 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
43 except ImportError: # Python 2
44 from urlparse
import urlparse
as compat_urllib_parse_urlparse
47 import urllib
.parse
as compat_urlparse
48 except ImportError: # Python 2
49 import urlparse
as compat_urlparse
52 import http
.cookiejar
as compat_cookiejar
53 except ImportError: # Python 2
54 import cookielib
as compat_cookiejar
57 import html
.entities
as compat_html_entities
58 except ImportError: # Python 2
59 import htmlentitydefs
as compat_html_entities
62 import html
.parser
as compat_html_parser
63 except ImportError: # Python 2
64 import HTMLParser
as compat_html_parser
67 import http
.client
as compat_http_client
68 except ImportError: # Python 2
69 import httplib
as compat_http_client
72 from urllib
.error
import HTTPError
as compat_HTTPError
73 except ImportError: # Python 2
74 from urllib2
import HTTPError
as compat_HTTPError
77 from urllib
.request
import urlretrieve
as compat_urlretrieve
78 except ImportError: # Python 2
79 from urllib
import urlretrieve
as compat_urlretrieve
83 from subprocess
import DEVNULL
84 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
86 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
89 from urllib
.parse
import parse_qs
as compat_parse_qs
90 except ImportError: # Python 2
91 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
92 # Python 2's version is apparently totally broken
93 def _unquote(string
, encoding
='utf-8', errors
='replace'):
96 res
= string
.split('%')
103 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
110 pct_sequence
+= item
[:2].decode('hex')
113 # This segment was just a single percent-encoded character.
114 # May be part of a sequence of code units, so delay decoding.
115 # (Stored in pct_sequence).
119 # Encountered non-percent-encoded characters. Flush the current
121 string
+= pct_sequence
.decode(encoding
, errors
) + rest
124 # Flush the final pct_sequence
125 string
+= pct_sequence
.decode(encoding
, errors
)
128 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
129 encoding
='utf-8', errors
='replace'):
130 qs
, _coerce_result
= qs
, unicode
131 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
133 for name_value
in pairs
:
134 if not name_value
and not strict_parsing
:
136 nv
= name_value
.split('=', 1)
139 raise ValueError("bad query field: %r" % (name_value
,))
140 # Handle case of a control-name with no equal sign
141 if keep_blank_values
:
145 if len(nv
[1]) or keep_blank_values
:
146 name
= nv
[0].replace('+', ' ')
147 name
= _unquote(name
, encoding
=encoding
, errors
=errors
)
148 name
= _coerce_result(name
)
149 value
= nv
[1].replace('+', ' ')
150 value
= _unquote(value
, encoding
=encoding
, errors
=errors
)
151 value
= _coerce_result(value
)
152 r
.append((name
, value
))
155 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
156 encoding
='utf-8', errors
='replace'):
158 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
159 encoding
=encoding
, errors
=errors
)
160 for name
, value
in pairs
:
161 if name
in parsed_result
:
162 parsed_result
[name
].append(value
)
164 parsed_result
[name
] = [value
]
168 compat_str
= unicode # Python 2
173 compat_chr
= unichr # Python 2
178 if type(c
) is int: return c
181 # This is not clearly defined otherwise
182 compiled_regex_type
= type(re
.compile(''))
185 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
186 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
187 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
188 'Accept-Encoding': 'gzip, deflate',
189 'Accept-Language': 'en-us,en;q=0.5',
192 def preferredencoding():
193 """Get preferred encoding.
195 Returns the best encoding scheme for the system, based on
196 locale.getpreferredencoding() and some further tweaks.
199 pref
= locale
.getpreferredencoding()
206 if sys
.version_info
< (3,0):
208 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
211 assert type(s
) == type(u
'')
214 # In Python 2.x, json.dump expects a bytestream.
215 # In Python 3.x, it writes to a character stream
216 if sys
.version_info
< (3,0):
217 def write_json_file(obj
, fn
):
218 with open(fn
, 'wb') as f
:
221 def write_json_file(obj
, fn
):
222 with open(fn
, 'w', encoding
='utf-8') as f
:
225 if sys
.version_info
>= (2,7):
226 def find_xpath_attr(node
, xpath
, key
, val
):
227 """ Find the xpath xpath[@key=val] """
228 assert re
.match(r
'^[a-zA-Z]+$', key
)
229 assert re
.match(r
'^[a-zA-Z0-9@\s:._]*$', val
)
230 expr
= xpath
+ u
"[@%s='%s']" % (key
, val
)
231 return node
.find(expr
)
233 def find_xpath_attr(node
, xpath
, key
, val
):
234 for f
in node
.findall(xpath
):
235 if f
.attrib
.get(key
) == val
:
239 # On python2.6 the xml.etree.ElementTree.Element methods don't support
240 # the namespace parameter
241 def xpath_with_ns(path
, ns_map
):
242 components
= [c
.split(':') for c
in path
.split('/')]
246 replaced
.append(c
[0])
249 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
250 return '/'.join(replaced
)
252 def htmlentity_transform(matchobj
):
253 """Transforms an HTML entity to a character.
255 This function receives a match object and is intended to be used with
256 the re.sub() function.
258 entity
= matchobj
.group(1)
260 # Known non-numeric HTML entity
261 if entity
in compat_html_entities
.name2codepoint
:
262 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
264 mobj
= re
.match(u
'(?u)#(x?\\d+)', entity
)
266 numstr
= mobj
.group(1)
267 if numstr
.startswith(u
'x'):
269 numstr
= u
'0%s' % numstr
272 return compat_chr(int(numstr
, base
))
274 # Unknown entity in name, return its literal representation
275 return (u
'&%s;' % entity
)
277 compat_html_parser
.locatestarttagend
= re
.compile(r
"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re
.VERBOSE
) # backport bugfix
278 class BaseHTMLParser(compat_html_parser
.HTMLParser
):
280 compat_html_parser
.HTMLParser
.__init
__(self
)
283 def loads(self
, html
):
288 class AttrParser(BaseHTMLParser
):
289 """Modified HTMLParser that isolates a tag with the specified attribute"""
290 def __init__(self
, attribute
, value
):
291 self
.attribute
= attribute
296 self
.watch_startpos
= False
298 BaseHTMLParser
.__init
__(self
)
300 def error(self
, message
):
301 if self
.error_count
> 10 or self
.started
:
302 raise compat_html_parser
.HTMLParseError(message
, self
.getpos())
303 self
.rawdata
= '\n'.join(self
.html
.split('\n')[self
.getpos()[0]:]) # skip one line
304 self
.error_count
+= 1
307 def handle_starttag(self
, tag
, attrs
):
310 self
.find_startpos(None)
311 if self
.attribute
in attrs
and attrs
[self
.attribute
] == self
.value
:
314 self
.watch_startpos
= True
316 if not tag
in self
.depth
: self
.depth
[tag
] = 0
319 def handle_endtag(self
, tag
):
321 if tag
in self
.depth
: self
.depth
[tag
] -= 1
322 if self
.depth
[self
.result
[0]] == 0:
324 self
.result
.append(self
.getpos())
326 def find_startpos(self
, x
):
327 """Needed to put the start position of the result (self.result[1])
328 after the opening tag with the requested id"""
329 if self
.watch_startpos
:
330 self
.watch_startpos
= False
331 self
.result
.append(self
.getpos())
332 handle_entityref
= handle_charref
= handle_data
= handle_comment
= \
333 handle_decl
= handle_pi
= unknown_decl
= find_startpos
335 def get_result(self
):
336 if self
.result
is None:
338 if len(self
.result
) != 3:
340 lines
= self
.html
.split('\n')
341 lines
= lines
[self
.result
[1][0]-1:self
.result
[2][0]]
342 lines
[0] = lines
[0][self
.result
[1][1]:]
344 lines
[-1] = lines
[-1][:self
.result
[2][1]-self
.result
[1][1]]
345 lines
[-1] = lines
[-1][:self
.result
[2][1]]
346 return '\n'.join(lines
).strip()
347 # Hack for https://github.com/rg3/youtube-dl/issues/662
348 if sys
.version_info
< (2, 7, 3):
349 AttrParser
.parse_endtag
= (lambda self
, i
:
350 i
+ len("</scr'+'ipt>")
351 if self
.rawdata
[i
:].startswith("</scr'+'ipt>")
352 else compat_html_parser
.HTMLParser
.parse_endtag(self
, i
))
354 def get_element_by_id(id, html
):
355 """Return the content of the tag with the specified ID in the passed HTML document"""
356 return get_element_by_attribute("id", id, html
)
358 def get_element_by_attribute(attribute
, value
, html
):
359 """Return the content of the tag with the specified attribute in the passed HTML document"""
360 parser
= AttrParser(attribute
, value
)
363 except compat_html_parser
.HTMLParseError
:
365 return parser
.get_result()
367 class MetaParser(BaseHTMLParser
):
369 Modified HTMLParser that isolates a meta tag with the specified name
372 def __init__(self
, name
):
373 BaseHTMLParser
.__init
__(self
)
378 def handle_starttag(self
, tag
, attrs
):
382 if attrs
.get('name') == self
.name
:
383 self
.result
= attrs
.get('content')
385 def get_result(self
):
388 def get_meta_content(name
, html
):
390 Return the content attribute from the meta tag with the given name attribute.
392 parser
= MetaParser(name
)
395 except compat_html_parser
.HTMLParseError
:
397 return parser
.get_result()
400 def clean_html(html
):
401 """Clean an HTML snippet into a readable string"""
403 html
= html
.replace('\n', ' ')
404 html
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
)
405 html
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
)
407 html
= re
.sub('<.*?>', '', html
)
408 # Replace html entities
409 html
= unescapeHTML(html
)
413 def sanitize_open(filename
, open_mode
):
414 """Try to open the given filename, and slightly tweak it if this fails.
416 Attempts to open the given filename. If this fails, it tries to change
417 the filename slightly, step by step, until it's either able to open it
418 or it fails and raises a final exception, like the standard open()
421 It returns the tuple (stream, definitive_file_name).
425 if sys
.platform
== 'win32':
427 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
428 return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
)
429 stream
= open(encodeFilename(filename
), open_mode
)
430 return (stream
, filename
)
431 except (IOError, OSError) as err
:
432 if err
.errno
in (errno
.EACCES
,):
435 # In case of error, try to remove win32 forbidden chars
436 alt_filename
= os
.path
.join(
437 re
.sub(u
'[/<>:"\\|\\\\?\\*]', u
'#', path_part
)
438 for path_part
in os
.path
.split(filename
)
440 if alt_filename
== filename
:
443 # An exception here should be caught in the caller
444 stream
= open(encodeFilename(filename
), open_mode
)
445 return (stream
, alt_filename
)
448 def timeconvert(timestr
):
449 """Convert RFC 2822 defined time string into system timestamp"""
451 timetuple
= email
.utils
.parsedate_tz(timestr
)
452 if timetuple
is not None:
453 timestamp
= email
.utils
.mktime_tz(timetuple
)
456 def sanitize_filename(s
, restricted
=False, is_id
=False):
457 """Sanitizes a string so it could be used as part of a filename.
458 If restricted is set, use a stricter subset of allowed characters.
459 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
461 def replace_insane(char
):
462 if char
== '?' or ord(char
) < 32 or ord(char
) == 127:
465 return '' if restricted
else '\''
467 return '_-' if restricted
else ' -'
468 elif char
in '\\/|*<>':
470 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace()):
472 if restricted
and ord(char
) > 127:
476 result
= u
''.join(map(replace_insane
, s
))
478 while '__' in result
:
479 result
= result
.replace('__', '_')
480 result
= result
.strip('_')
481 # Common case of "Foreign band name - English song title"
482 if restricted
and result
.startswith('-_'):
488 def orderedSet(iterable
):
489 """ Remove all duplicates from the input iterable """
500 assert type(s
) == type(u
'')
502 result
= re
.sub(u
'(?u)&(.+?);', htmlentity_transform
, s
)
506 def encodeFilename(s
, for_subprocess
=False):
508 @param s The name of the file
511 assert type(s
) == compat_str
513 # Python 3 has a Unicode API
514 if sys
.version_info
>= (3, 0):
517 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
518 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
519 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
520 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
521 if not for_subprocess
:
524 # For subprocess calls, encode with locale encoding
525 # Refer to http://stackoverflow.com/a/9951851/35070
526 encoding
= preferredencoding()
528 encoding
= sys
.getfilesystemencoding()
531 return s
.encode(encoding
, 'ignore')
534 def decodeOption(optval
):
537 if isinstance(optval
, bytes):
538 optval
= optval
.decode(preferredencoding())
540 assert isinstance(optval
, compat_str
)
543 def formatSeconds(secs
):
545 return '%d:%02d:%02d' % (secs
// 3600, (secs
% 3600) // 60, secs
% 60)
547 return '%d:%02d' % (secs
// 60, secs
% 60)
552 def make_HTTPS_handler(opts_no_check_certificate
, **kwargs
):
553 if sys
.version_info
< (3, 2):
556 class HTTPSConnectionV3(httplib
.HTTPSConnection
):
557 def __init__(self
, *args
, **kwargs
):
558 httplib
.HTTPSConnection
.__init
__(self
, *args
, **kwargs
)
561 sock
= socket
.create_connection((self
.host
, self
.port
), self
.timeout
)
562 if getattr(self
, '_tunnel_host', False):
566 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_SSLv3
)
568 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_SSLv23
)
570 class HTTPSHandlerV3(compat_urllib_request
.HTTPSHandler
):
571 def https_open(self
, req
):
572 return self
.do_open(HTTPSConnectionV3
, req
)
573 return HTTPSHandlerV3(**kwargs
)
575 context
= ssl
.SSLContext(ssl
.PROTOCOL_SSLv3
)
576 context
.verify_mode
= (ssl
.CERT_NONE
577 if opts_no_check_certificate
578 else ssl
.CERT_REQUIRED
)
579 context
.set_default_verify_paths()
581 context
.load_default_certs()
582 except AttributeError:
584 return compat_urllib_request
.HTTPSHandler(context
=context
, **kwargs
)
586 class ExtractorError(Exception):
587 """Error during info extraction."""
588 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None):
589 """ tb, if given, is the original traceback (so that it can be printed out).
590 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
593 if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
):
596 msg
= msg
+ u
'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
597 super(ExtractorError
, self
).__init
__(msg
)
600 self
.exc_info
= sys
.exc_info() # preserve original exception
603 def format_traceback(self
):
604 if self
.traceback
is None:
606 return u
''.join(traceback
.format_tb(self
.traceback
))
609 class RegexNotFoundError(ExtractorError
):
610 """Error when a regex didn't match"""
614 class DownloadError(Exception):
615 """Download Error exception.
617 This exception may be thrown by FileDownloader objects if they are not
618 configured to continue on errors. They will contain the appropriate
621 def __init__(self
, msg
, exc_info
=None):
622 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
623 super(DownloadError
, self
).__init
__(msg
)
624 self
.exc_info
= exc_info
627 class SameFileError(Exception):
628 """Same File exception.
630 This exception will be thrown by FileDownloader objects if they detect
631 multiple files would have to be downloaded to the same file on disk.
636 class PostProcessingError(Exception):
637 """Post Processing exception.
639 This exception may be raised by PostProcessor's .run() method to
640 indicate an error in the postprocessing task.
642 def __init__(self
, msg
):
645 class MaxDownloadsReached(Exception):
646 """ --max-downloads limit has been reached. """
650 class UnavailableVideoError(Exception):
651 """Unavailable Format exception.
653 This exception will be thrown when a video is requested
654 in a format that is not available for that video.
659 class ContentTooShortError(Exception):
660 """Content Too Short exception.
662 This exception may be raised by FileDownloader objects when a file they
663 download is too small for what the server announced first, indicating
664 the connection was probably interrupted.
670 def __init__(self
, downloaded
, expected
):
671 self
.downloaded
= downloaded
672 self
.expected
= expected
674 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
675 """Handler for HTTP requests and responses.
677 This class, when installed with an OpenerDirector, automatically adds
678 the standard headers to every HTTP request and handles gzipped and
679 deflated responses from web servers. If compression is to be avoided in
680 a particular request, the original request in the program code only has
681 to include the HTTP header "Youtubedl-No-Compression", which will be
682 removed before making the real request.
684 Part of this code was copied from:
686 http://techknack.net/python-urllib2-handlers/
688 Andrew Rowls, the author of that code, agreed to release it to the
695 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
697 return zlib
.decompress(data
)
700 def addinfourl_wrapper(stream
, headers
, url
, code
):
701 if hasattr(compat_urllib_request
.addinfourl
, 'getcode'):
702 return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
)
703 ret
= compat_urllib_request
.addinfourl(stream
, headers
, url
)
707 def http_request(self
, req
):
708 for h
,v
in std_headers
.items():
712 if 'Youtubedl-no-compression' in req
.headers
:
713 if 'Accept-encoding' in req
.headers
:
714 del req
.headers
['Accept-encoding']
715 del req
.headers
['Youtubedl-no-compression']
716 if 'Youtubedl-user-agent' in req
.headers
:
717 if 'User-agent' in req
.headers
:
718 del req
.headers
['User-agent']
719 req
.headers
['User-agent'] = req
.headers
['Youtubedl-user-agent']
720 del req
.headers
['Youtubedl-user-agent']
723 def http_response(self
, req
, resp
):
726 if resp
.headers
.get('Content-encoding', '') == 'gzip':
727 content
= resp
.read()
728 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
730 uncompressed
= io
.BytesIO(gz
.read())
731 except IOError as original_ioerror
:
732 # There may be junk add the end of the file
733 # See http://stackoverflow.com/q/4928560/35070 for details
734 for i
in range(1, 1024):
736 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
737 uncompressed
= io
.BytesIO(gz
.read())
742 raise original_ioerror
743 resp
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
744 resp
.msg
= old_resp
.msg
746 if resp
.headers
.get('Content-encoding', '') == 'deflate':
747 gz
= io
.BytesIO(self
.deflate(resp
.read()))
748 resp
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
749 resp
.msg
= old_resp
.msg
752 https_request
= http_request
753 https_response
= http_response
756 def unified_strdate(date_str
):
757 """Return a string with the date in the format YYYYMMDD"""
760 date_str
= date_str
.replace(',', ' ')
761 # %z (UTC offset) is only supported in python>=3.2
762 date_str
= re
.sub(r
' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str
)
763 format_expressions
= [
773 '%Y-%m-%dT%H:%M:%SZ',
774 '%Y-%m-%dT%H:%M:%S.%fZ',
775 '%Y-%m-%dT%H:%M:%S.%f0Z',
779 for expression
in format_expressions
:
781 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
784 if upload_date
is None:
785 timetuple
= email
.utils
.parsedate_tz(date_str
)
787 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
790 def determine_ext(url
, default_ext
=u
'unknown_video'):
791 guess
= url
.partition(u
'?')[0].rpartition(u
'.')[2]
792 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
797 def subtitles_filename(filename
, sub_lang
, sub_format
):
798 return filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
800 def date_from_str(date_str
):
802 Return a datetime object from a string in the format YYYYMMDD or
803 (now|today)[+-][0-9](day|week|month|year)(s)?"""
804 today
= datetime
.date
.today()
805 if date_str
== 'now'or date_str
== 'today':
807 match
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
)
808 if match
is not None:
809 sign
= match
.group('sign')
810 time
= int(match
.group('time'))
813 unit
= match
.group('unit')
822 delta
= datetime
.timedelta(**{unit
: time
})
824 return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date()
826 def hyphenate_date(date_str
):
828 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
829 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
830 if match
is not None:
831 return '-'.join(match
.groups())
835 class DateRange(object):
836 """Represents a time interval between two dates"""
837 def __init__(self
, start
=None, end
=None):
838 """start and end must be strings in the format accepted by date"""
839 if start
is not None:
840 self
.start
= date_from_str(start
)
842 self
.start
= datetime
.datetime
.min.date()
844 self
.end
= date_from_str(end
)
846 self
.end
= datetime
.datetime
.max.date()
847 if self
.start
> self
.end
:
848 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
851 """Returns a range that only contains the given day"""
853 def __contains__(self
, date
):
854 """Check if the date is in the range"""
855 if not isinstance(date
, datetime
.date
):
856 date
= date_from_str(date
)
857 return self
.start
<= date
<= self
.end
859 return '%s - %s' % ( self
.start
.isoformat(), self
.end
.isoformat())
863 """ Returns the platform name as a compat_str """
864 res
= platform
.platform()
865 if isinstance(res
, bytes):
866 res
= res
.decode(preferredencoding())
868 assert isinstance(res
, compat_str
)
872 def write_string(s
, out
=None):
875 assert type(s
) == compat_str
877 if ('b' in getattr(out
, 'mode', '') or
878 sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
879 s
= s
.encode(preferredencoding(), 'ignore')
882 except UnicodeEncodeError:
883 # In Windows shells, this can fail even when the codec is just charmap!?
884 # See https://wiki.python.org/moin/PrintFails#Issue
885 if sys
.platform
== 'win32' and hasattr(out
, 'encoding'):
886 s
= s
.encode(out
.encoding
, 'ignore').decode(out
.encoding
)
894 def bytes_to_intlist(bs
):
897 if isinstance(bs
[0], int): # Python 3
900 return [ord(c
) for c
in bs
]
903 def intlist_to_bytes(xs
):
906 if isinstance(chr(0), bytes): # Python 2
907 return ''.join([chr(x
) for x
in xs
])
912 def get_cachedir(params
={}):
913 cache_root
= os
.environ
.get('XDG_CACHE_HOME',
914 os
.path
.expanduser('~/.cache'))
915 return params
.get('cachedir', os
.path
.join(cache_root
, 'youtube-dl'))
918 # Cross-platform file locking
919 if sys
.platform
== 'win32':
920 import ctypes
.wintypes
923 class OVERLAPPED(ctypes
.Structure
):
925 ('Internal', ctypes
.wintypes
.LPVOID
),
926 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
927 ('Offset', ctypes
.wintypes
.DWORD
),
928 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
929 ('hEvent', ctypes
.wintypes
.HANDLE
),
932 kernel32
= ctypes
.windll
.kernel32
933 LockFileEx
= kernel32
.LockFileEx
934 LockFileEx
.argtypes
= [
935 ctypes
.wintypes
.HANDLE
, # hFile
936 ctypes
.wintypes
.DWORD
, # dwFlags
937 ctypes
.wintypes
.DWORD
, # dwReserved
938 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
939 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
940 ctypes
.POINTER(OVERLAPPED
) # Overlapped
942 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
943 UnlockFileEx
= kernel32
.UnlockFileEx
944 UnlockFileEx
.argtypes
= [
945 ctypes
.wintypes
.HANDLE
, # hFile
946 ctypes
.wintypes
.DWORD
, # dwReserved
947 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
948 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
949 ctypes
.POINTER(OVERLAPPED
) # Overlapped
951 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
952 whole_low
= 0xffffffff
953 whole_high
= 0x7fffffff
955 def _lock_file(f
, exclusive
):
956 overlapped
= OVERLAPPED()
957 overlapped
.Offset
= 0
958 overlapped
.OffsetHigh
= 0
959 overlapped
.hEvent
= 0
960 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
961 handle
= msvcrt
.get_osfhandle(f
.fileno())
962 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
963 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
964 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
967 assert f
._lock
_file
_overlapped
_p
968 handle
= msvcrt
.get_osfhandle(f
.fileno())
969 if not UnlockFileEx(handle
, 0,
970 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
971 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
976 def _lock_file(f
, exclusive
):
977 fcntl
.lockf(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
980 fcntl
.lockf(f
, fcntl
.LOCK_UN
)
983 class locked_file(object):
984 def __init__(self
, filename
, mode
, encoding
=None):
985 assert mode
in ['r', 'a', 'w']
986 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
990 exclusive
= self
.mode
!= 'r'
992 _lock_file(self
.f
, exclusive
)
998 def __exit__(self
, etype
, value
, traceback
):
1000 _unlock_file(self
.f
)
1007 def write(self
, *args
):
1008 return self
.f
.write(*args
)
1010 def read(self
, *args
):
1011 return self
.f
.read(*args
)
1014 def shell_quote(args
):
1016 encoding
= sys
.getfilesystemencoding()
1017 if encoding
is None:
1020 if isinstance(a
, bytes):
1021 # We may get a filename encoded with 'encodeFilename'
1022 a
= a
.decode(encoding
)
1023 quoted_args
.append(pipes
.quote(a
))
1024 return u
' '.join(quoted_args
)
1027 def takewhile_inclusive(pred
, seq
):
1028 """ Like itertools.takewhile, but include the latest evaluated element
1029 (the first element so that Not pred(e)) """
1036 def smuggle_url(url
, data
):
1037 """ Pass additional data in a URL for internal use. """
1039 sdata
= compat_urllib_parse
.urlencode(
1040 {u
'__youtubedl_smuggle': json
.dumps(data
)})
1041 return url
+ u
'#' + sdata
1044 def unsmuggle_url(smug_url
, default
=None):
1045 if not '#__youtubedl_smuggle' in smug_url
:
1046 return smug_url
, default
1047 url
, _
, sdata
= smug_url
.rpartition(u
'#')
1048 jsond
= compat_parse_qs(sdata
)[u
'__youtubedl_smuggle'][0]
1049 data
= json
.loads(jsond
)
1053 def format_bytes(bytes):
1056 if type(bytes) is str:
1057 bytes = float(bytes)
1061 exponent
= int(math
.log(bytes, 1024.0))
1062 suffix
= [u
'B', u
'KiB', u
'MiB', u
'GiB', u
'TiB', u
'PiB', u
'EiB', u
'ZiB', u
'YiB'][exponent
]
1063 converted
= float(bytes) / float(1024 ** exponent
)
1064 return u
'%.2f%s' % (converted
, suffix
)
1067 def str_to_int(int_str
):
1068 int_str
= re
.sub(r
'[,\.]', u
'', int_str
)
1072 def get_term_width():
1073 columns
= os
.environ
.get('COLUMNS', None)
1078 sp
= subprocess
.Popen(
1080 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
1081 out
, err
= sp
.communicate()
1082 return int(out
.split()[1])
1088 def month_by_name(name
):
1089 """ Return the number of a month by (locale-independently) English name """
1092 u
'January', u
'February', u
'March', u
'April', u
'May', u
'June',
1093 u
'July', u
'August', u
'September', u
'October', u
'November', u
'December']
1095 return ENGLISH_NAMES
.index(name
) + 1
1100 def fix_xml_ampersands(xml_str
):
1101 """Replace all the '&' by '&' in XML"""
1103 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1108 def setproctitle(title
):
1109 assert isinstance(title
, compat_str
)
1111 libc
= ctypes
.cdll
.LoadLibrary("libc.so.6")
1115 buf
= ctypes
.create_string_buffer(len(title
) + 1)
1116 buf
.value
= title
.encode('utf-8')
1118 libc
.prctl(15, ctypes
.byref(buf
), 0, 0, 0)
1119 except AttributeError:
1120 return # Strange libc, just skip this
1123 def remove_start(s
, start
):
1124 if s
.startswith(start
):
1125 return s
[len(start
):]
1129 def url_basename(url
):
1130 path
= compat_urlparse
.urlparse(url
).path
1131 return path
.strip(u
'/').split(u
'/')[-1]
1134 class HEADRequest(compat_urllib_request
.Request
):
1135 def get_method(self
):
1139 def int_or_none(v
, scale
=1):
1140 return v
if v
is None else (int(v
) // scale
)
1143 def parse_duration(s
):
1148 r
'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s
)
1151 res
= int(m
.group('secs'))
1153 res
+= int(m
.group('mins')) * 60
1154 if m
.group('hours'):
1155 res
+= int(m
.group('hours')) * 60 * 60
1159 def prepend_extension(filename
, ext
):
1160 name
, real_ext
= os
.path
.splitext(filename
)
1161 return u
'{0}.{1}{2}'.format(name
, ext
, real_ext
)
1164 def check_executable(exe
, args
=[]):
1165 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1166 args can be a list of arguments for a short output (like -version) """
1168 subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate()
1174 class PagedList(object):
1175 def __init__(self
, pagefunc
, pagesize
):
1176 self
._pagefunc
= pagefunc
1177 self
._pagesize
= pagesize
1180 # This is only useful for tests
1181 return len(self
.getslice())
1183 def getslice(self
, start
=0, end
=None):
1185 for pagenum
in itertools
.count(start
// self
._pagesize
):
1186 firstid
= pagenum
* self
._pagesize
1187 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
1188 if start
>= nextfirstid
:
1191 page_results
= list(self
._pagefunc
(pagenum
))
1194 start
% self
._pagesize
1195 if firstid
<= start
< nextfirstid
1199 ((end
- 1) % self
._pagesize
) + 1
1200 if (end
is not None and firstid
<= end
<= nextfirstid
)
1203 if startv
!= 0 or endv
is not None:
1204 page_results
= page_results
[startv
:endv
]
1205 res
.extend(page_results
)
1207 # A little optimization - if current page is not "full", ie. does
1208 # not contain page_size videos then we can assume that this page
1209 # is the last one - there are no more ids on further pages -
1210 # i.e. no need to query again.
1211 if len(page_results
) + startv
< self
._pagesize
:
1214 # If we got the whole page, but the next page is not interesting,
1215 # break out early as well
1216 if end
== nextfirstid
:
1221 def uppercase_escape(s
):
1223 r
'\\U([0-9a-fA-F]{8})',
1224 lambda m
: compat_chr(int(m
.group(1), base
=16)), s
)
1227 struct
.pack(u
'!I', 0)
1229 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1230 def struct_pack(spec
, *args
):
1231 if isinstance(spec
, compat_str
):
1232 spec
= spec
.encode('ascii')
1233 return struct
.pack(spec
, *args
)
1235 def struct_unpack(spec
, *args
):
1236 if isinstance(spec
, compat_str
):
1237 spec
= spec
.encode('ascii')
1238 return struct
.unpack(spec
, *args
)
1240 struct_pack
= struct
.pack
1241 struct_unpack
= struct
.unpack