]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/utils.py
Update changelog.
[youtubedl] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import datetime
5 import email.utils
6 import errno
7 import gzip
8 import io
9 import json
10 import locale
11 import os
12 import platform
13 import re
14 import socket
15 import sys
16 import traceback
17 import zlib
18
19 try:
20 import urllib.request as compat_urllib_request
21 except ImportError: # Python 2
22 import urllib2 as compat_urllib_request
23
24 try:
25 import urllib.error as compat_urllib_error
26 except ImportError: # Python 2
27 import urllib2 as compat_urllib_error
28
29 try:
30 import urllib.parse as compat_urllib_parse
31 except ImportError: # Python 2
32 import urllib as compat_urllib_parse
33
34 try:
35 from urllib.parse import urlparse as compat_urllib_parse_urlparse
36 except ImportError: # Python 2
37 from urlparse import urlparse as compat_urllib_parse_urlparse
38
39 try:
40 import urllib.parse as compat_urlparse
41 except ImportError: # Python 2
42 import urlparse as compat_urlparse
43
44 try:
45 import http.cookiejar as compat_cookiejar
46 except ImportError: # Python 2
47 import cookielib as compat_cookiejar
48
49 try:
50 import html.entities as compat_html_entities
51 except ImportError: # Python 2
52 import htmlentitydefs as compat_html_entities
53
54 try:
55 import html.parser as compat_html_parser
56 except ImportError: # Python 2
57 import HTMLParser as compat_html_parser
58
59 try:
60 import http.client as compat_http_client
61 except ImportError: # Python 2
62 import httplib as compat_http_client
63
64 try:
65 from urllib.error import HTTPError as compat_HTTPError
66 except ImportError: # Python 2
67 from urllib2 import HTTPError as compat_HTTPError
68
69 try:
70 from urllib.request import urlretrieve as compat_urlretrieve
71 except ImportError: # Python 2
72 from urllib import urlretrieve as compat_urlretrieve
73
74
75 try:
76 from subprocess import DEVNULL
77 compat_subprocess_get_DEVNULL = lambda: DEVNULL
78 except ImportError:
79 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
80
81 try:
82 from urllib.parse import parse_qs as compat_parse_qs
83 except ImportError: # Python 2
84 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
85 # Python 2's version is apparently totally broken
86 def _unquote(string, encoding='utf-8', errors='replace'):
87 if string == '':
88 return string
89 res = string.split('%')
90 if len(res) == 1:
91 return string
92 if encoding is None:
93 encoding = 'utf-8'
94 if errors is None:
95 errors = 'replace'
96 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
97 pct_sequence = b''
98 string = res[0]
99 for item in res[1:]:
100 try:
101 if not item:
102 raise ValueError
103 pct_sequence += item[:2].decode('hex')
104 rest = item[2:]
105 if not rest:
106 # This segment was just a single percent-encoded character.
107 # May be part of a sequence of code units, so delay decoding.
108 # (Stored in pct_sequence).
109 continue
110 except ValueError:
111 rest = '%' + item
112 # Encountered non-percent-encoded characters. Flush the current
113 # pct_sequence.
114 string += pct_sequence.decode(encoding, errors) + rest
115 pct_sequence = b''
116 if pct_sequence:
117 # Flush the final pct_sequence
118 string += pct_sequence.decode(encoding, errors)
119 return string
120
121 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
122 encoding='utf-8', errors='replace'):
123 qs, _coerce_result = qs, unicode
124 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
125 r = []
126 for name_value in pairs:
127 if not name_value and not strict_parsing:
128 continue
129 nv = name_value.split('=', 1)
130 if len(nv) != 2:
131 if strict_parsing:
132 raise ValueError("bad query field: %r" % (name_value,))
133 # Handle case of a control-name with no equal sign
134 if keep_blank_values:
135 nv.append('')
136 else:
137 continue
138 if len(nv[1]) or keep_blank_values:
139 name = nv[0].replace('+', ' ')
140 name = _unquote(name, encoding=encoding, errors=errors)
141 name = _coerce_result(name)
142 value = nv[1].replace('+', ' ')
143 value = _unquote(value, encoding=encoding, errors=errors)
144 value = _coerce_result(value)
145 r.append((name, value))
146 return r
147
148 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
149 encoding='utf-8', errors='replace'):
150 parsed_result = {}
151 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
152 encoding=encoding, errors=errors)
153 for name, value in pairs:
154 if name in parsed_result:
155 parsed_result[name].append(value)
156 else:
157 parsed_result[name] = [value]
158 return parsed_result
159
160 try:
161 compat_str = unicode # Python 2
162 except NameError:
163 compat_str = str
164
165 try:
166 compat_chr = unichr # Python 2
167 except NameError:
168 compat_chr = chr
169
170 def compat_ord(c):
171 if type(c) is int: return c
172 else: return ord(c)
173
174 # This is not clearly defined otherwise
175 compiled_regex_type = type(re.compile(''))
176
177 std_headers = {
178 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
179 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
180 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
181 'Accept-Encoding': 'gzip, deflate',
182 'Accept-Language': 'en-us,en;q=0.5',
183 }
184
185 def preferredencoding():
186 """Get preferred encoding.
187
188 Returns the best encoding scheme for the system, based on
189 locale.getpreferredencoding() and some further tweaks.
190 """
191 try:
192 pref = locale.getpreferredencoding()
193 u'TEST'.encode(pref)
194 except:
195 pref = 'UTF-8'
196
197 return pref
198
199 if sys.version_info < (3,0):
200 def compat_print(s):
201 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
202 else:
203 def compat_print(s):
204 assert type(s) == type(u'')
205 print(s)
206
207 # In Python 2.x, json.dump expects a bytestream.
208 # In Python 3.x, it writes to a character stream
209 if sys.version_info < (3,0):
210 def write_json_file(obj, fn):
211 with open(fn, 'wb') as f:
212 json.dump(obj, f)
213 else:
214 def write_json_file(obj, fn):
215 with open(fn, 'w', encoding='utf-8') as f:
216 json.dump(obj, f)
217
218 if sys.version_info >= (2,7):
219 def find_xpath_attr(node, xpath, key, val):
220 """ Find the xpath xpath[@key=val] """
221 assert re.match(r'^[a-zA-Z]+$', key)
222 assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
223 expr = xpath + u"[@%s='%s']" % (key, val)
224 return node.find(expr)
225 else:
226 def find_xpath_attr(node, xpath, key, val):
227 for f in node.findall(xpath):
228 if f.attrib.get(key) == val:
229 return f
230 return None
231
232 def htmlentity_transform(matchobj):
233 """Transforms an HTML entity to a character.
234
235 This function receives a match object and is intended to be used with
236 the re.sub() function.
237 """
238 entity = matchobj.group(1)
239
240 # Known non-numeric HTML entity
241 if entity in compat_html_entities.name2codepoint:
242 return compat_chr(compat_html_entities.name2codepoint[entity])
243
244 mobj = re.match(u'(?u)#(x?\\d+)', entity)
245 if mobj is not None:
246 numstr = mobj.group(1)
247 if numstr.startswith(u'x'):
248 base = 16
249 numstr = u'0%s' % numstr
250 else:
251 base = 10
252 return compat_chr(int(numstr, base))
253
254 # Unknown entity in name, return its literal representation
255 return (u'&%s;' % entity)
256
257 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
258 class BaseHTMLParser(compat_html_parser.HTMLParser):
259 def __init(self):
260 compat_html_parser.HTMLParser.__init__(self)
261 self.html = None
262
263 def loads(self, html):
264 self.html = html
265 self.feed(html)
266 self.close()
267
268 class AttrParser(BaseHTMLParser):
269 """Modified HTMLParser that isolates a tag with the specified attribute"""
270 def __init__(self, attribute, value):
271 self.attribute = attribute
272 self.value = value
273 self.result = None
274 self.started = False
275 self.depth = {}
276 self.watch_startpos = False
277 self.error_count = 0
278 BaseHTMLParser.__init__(self)
279
280 def error(self, message):
281 if self.error_count > 10 or self.started:
282 raise compat_html_parser.HTMLParseError(message, self.getpos())
283 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
284 self.error_count += 1
285 self.goahead(1)
286
287 def handle_starttag(self, tag, attrs):
288 attrs = dict(attrs)
289 if self.started:
290 self.find_startpos(None)
291 if self.attribute in attrs and attrs[self.attribute] == self.value:
292 self.result = [tag]
293 self.started = True
294 self.watch_startpos = True
295 if self.started:
296 if not tag in self.depth: self.depth[tag] = 0
297 self.depth[tag] += 1
298
299 def handle_endtag(self, tag):
300 if self.started:
301 if tag in self.depth: self.depth[tag] -= 1
302 if self.depth[self.result[0]] == 0:
303 self.started = False
304 self.result.append(self.getpos())
305
306 def find_startpos(self, x):
307 """Needed to put the start position of the result (self.result[1])
308 after the opening tag with the requested id"""
309 if self.watch_startpos:
310 self.watch_startpos = False
311 self.result.append(self.getpos())
312 handle_entityref = handle_charref = handle_data = handle_comment = \
313 handle_decl = handle_pi = unknown_decl = find_startpos
314
315 def get_result(self):
316 if self.result is None:
317 return None
318 if len(self.result) != 3:
319 return None
320 lines = self.html.split('\n')
321 lines = lines[self.result[1][0]-1:self.result[2][0]]
322 lines[0] = lines[0][self.result[1][1]:]
323 if len(lines) == 1:
324 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
325 lines[-1] = lines[-1][:self.result[2][1]]
326 return '\n'.join(lines).strip()
327 # Hack for https://github.com/rg3/youtube-dl/issues/662
328 if sys.version_info < (2, 7, 3):
329 AttrParser.parse_endtag = (lambda self, i:
330 i + len("</scr'+'ipt>")
331 if self.rawdata[i:].startswith("</scr'+'ipt>")
332 else compat_html_parser.HTMLParser.parse_endtag(self, i))
333
334 def get_element_by_id(id, html):
335 """Return the content of the tag with the specified ID in the passed HTML document"""
336 return get_element_by_attribute("id", id, html)
337
338 def get_element_by_attribute(attribute, value, html):
339 """Return the content of the tag with the specified attribute in the passed HTML document"""
340 parser = AttrParser(attribute, value)
341 try:
342 parser.loads(html)
343 except compat_html_parser.HTMLParseError:
344 pass
345 return parser.get_result()
346
347 class MetaParser(BaseHTMLParser):
348 """
349 Modified HTMLParser that isolates a meta tag with the specified name
350 attribute.
351 """
352 def __init__(self, name):
353 BaseHTMLParser.__init__(self)
354 self.name = name
355 self.content = None
356 self.result = None
357
358 def handle_starttag(self, tag, attrs):
359 if tag != 'meta':
360 return
361 attrs = dict(attrs)
362 if attrs.get('name') == self.name:
363 self.result = attrs.get('content')
364
365 def get_result(self):
366 return self.result
367
368 def get_meta_content(name, html):
369 """
370 Return the content attribute from the meta tag with the given name attribute.
371 """
372 parser = MetaParser(name)
373 try:
374 parser.loads(html)
375 except compat_html_parser.HTMLParseError:
376 pass
377 return parser.get_result()
378
379
380 def clean_html(html):
381 """Clean an HTML snippet into a readable string"""
382 # Newline vs <br />
383 html = html.replace('\n', ' ')
384 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
385 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
386 # Strip html tags
387 html = re.sub('<.*?>', '', html)
388 # Replace html entities
389 html = unescapeHTML(html)
390 return html.strip()
391
392
393 def sanitize_open(filename, open_mode):
394 """Try to open the given filename, and slightly tweak it if this fails.
395
396 Attempts to open the given filename. If this fails, it tries to change
397 the filename slightly, step by step, until it's either able to open it
398 or it fails and raises a final exception, like the standard open()
399 function.
400
401 It returns the tuple (stream, definitive_file_name).
402 """
403 try:
404 if filename == u'-':
405 if sys.platform == 'win32':
406 import msvcrt
407 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
408 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
409 stream = open(encodeFilename(filename), open_mode)
410 return (stream, filename)
411 except (IOError, OSError) as err:
412 if err.errno in (errno.EACCES,):
413 raise
414
415 # In case of error, try to remove win32 forbidden chars
416 alt_filename = os.path.join(
417 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
418 for path_part in os.path.split(filename)
419 )
420 if alt_filename == filename:
421 raise
422 else:
423 # An exception here should be caught in the caller
424 stream = open(encodeFilename(filename), open_mode)
425 return (stream, alt_filename)
426
427
428 def timeconvert(timestr):
429 """Convert RFC 2822 defined time string into system timestamp"""
430 timestamp = None
431 timetuple = email.utils.parsedate_tz(timestr)
432 if timetuple is not None:
433 timestamp = email.utils.mktime_tz(timetuple)
434 return timestamp
435
436 def sanitize_filename(s, restricted=False, is_id=False):
437 """Sanitizes a string so it could be used as part of a filename.
438 If restricted is set, use a stricter subset of allowed characters.
439 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
440 """
441 def replace_insane(char):
442 if char == '?' or ord(char) < 32 or ord(char) == 127:
443 return ''
444 elif char == '"':
445 return '' if restricted else '\''
446 elif char == ':':
447 return '_-' if restricted else ' -'
448 elif char in '\\/|*<>':
449 return '_'
450 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
451 return '_'
452 if restricted and ord(char) > 127:
453 return '_'
454 return char
455
456 result = u''.join(map(replace_insane, s))
457 if not is_id:
458 while '__' in result:
459 result = result.replace('__', '_')
460 result = result.strip('_')
461 # Common case of "Foreign band name - English song title"
462 if restricted and result.startswith('-_'):
463 result = result[2:]
464 if not result:
465 result = '_'
466 return result
467
468 def orderedSet(iterable):
469 """ Remove all duplicates from the input iterable """
470 res = []
471 for el in iterable:
472 if el not in res:
473 res.append(el)
474 return res
475
476 def unescapeHTML(s):
477 """
478 @param s a string
479 """
480 assert type(s) == type(u'')
481
482 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
483 return result
484
485 def encodeFilename(s):
486 """
487 @param s The name of the file
488 """
489
490 assert type(s) == type(u'')
491
492 # Python 3 has a Unicode API
493 if sys.version_info >= (3, 0):
494 return s
495
496 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
497 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
498 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
499 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
500 return s
501 else:
502 encoding = sys.getfilesystemencoding()
503 if encoding is None:
504 encoding = 'utf-8'
505 return s.encode(encoding, 'ignore')
506
507 def decodeOption(optval):
508 if optval is None:
509 return optval
510 if isinstance(optval, bytes):
511 optval = optval.decode(preferredencoding())
512
513 assert isinstance(optval, compat_str)
514 return optval
515
516 def formatSeconds(secs):
517 if secs > 3600:
518 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
519 elif secs > 60:
520 return '%d:%02d' % (secs // 60, secs % 60)
521 else:
522 return '%d' % secs
523
524 def make_HTTPS_handler(opts):
525 if sys.version_info < (3,2):
526 # Python's 2.x handler is very simplistic
527 return compat_urllib_request.HTTPSHandler()
528 else:
529 import ssl
530 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
531 context.set_default_verify_paths()
532
533 context.verify_mode = (ssl.CERT_NONE
534 if opts.no_check_certificate
535 else ssl.CERT_REQUIRED)
536 return compat_urllib_request.HTTPSHandler(context=context)
537
538 class ExtractorError(Exception):
539 """Error during info extraction."""
540 def __init__(self, msg, tb=None, expected=False, cause=None):
541 """ tb, if given, is the original traceback (so that it can be printed out).
542 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
543 """
544
545 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
546 expected = True
547 if not expected:
548 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
549 super(ExtractorError, self).__init__(msg)
550
551 self.traceback = tb
552 self.exc_info = sys.exc_info() # preserve original exception
553 self.cause = cause
554
555 def format_traceback(self):
556 if self.traceback is None:
557 return None
558 return u''.join(traceback.format_tb(self.traceback))
559
560
561 class DownloadError(Exception):
562 """Download Error exception.
563
564 This exception may be thrown by FileDownloader objects if they are not
565 configured to continue on errors. They will contain the appropriate
566 error message.
567 """
568 def __init__(self, msg, exc_info=None):
569 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
570 super(DownloadError, self).__init__(msg)
571 self.exc_info = exc_info
572
573
574 class SameFileError(Exception):
575 """Same File exception.
576
577 This exception will be thrown by FileDownloader objects if they detect
578 multiple files would have to be downloaded to the same file on disk.
579 """
580 pass
581
582
583 class PostProcessingError(Exception):
584 """Post Processing exception.
585
586 This exception may be raised by PostProcessor's .run() method to
587 indicate an error in the postprocessing task.
588 """
589 def __init__(self, msg):
590 self.msg = msg
591
592 class MaxDownloadsReached(Exception):
593 """ --max-downloads limit has been reached. """
594 pass
595
596
597 class UnavailableVideoError(Exception):
598 """Unavailable Format exception.
599
600 This exception will be thrown when a video is requested
601 in a format that is not available for that video.
602 """
603 pass
604
605
606 class ContentTooShortError(Exception):
607 """Content Too Short exception.
608
609 This exception may be raised by FileDownloader objects when a file they
610 download is too small for what the server announced first, indicating
611 the connection was probably interrupted.
612 """
613 # Both in bytes
614 downloaded = None
615 expected = None
616
617 def __init__(self, downloaded, expected):
618 self.downloaded = downloaded
619 self.expected = expected
620
621 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
622 """Handler for HTTP requests and responses.
623
624 This class, when installed with an OpenerDirector, automatically adds
625 the standard headers to every HTTP request and handles gzipped and
626 deflated responses from web servers. If compression is to be avoided in
627 a particular request, the original request in the program code only has
628 to include the HTTP header "Youtubedl-No-Compression", which will be
629 removed before making the real request.
630
631 Part of this code was copied from:
632
633 http://techknack.net/python-urllib2-handlers/
634
635 Andrew Rowls, the author of that code, agreed to release it to the
636 public domain.
637 """
638
639 @staticmethod
640 def deflate(data):
641 try:
642 return zlib.decompress(data, -zlib.MAX_WBITS)
643 except zlib.error:
644 return zlib.decompress(data)
645
646 @staticmethod
647 def addinfourl_wrapper(stream, headers, url, code):
648 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
649 return compat_urllib_request.addinfourl(stream, headers, url, code)
650 ret = compat_urllib_request.addinfourl(stream, headers, url)
651 ret.code = code
652 return ret
653
654 def http_request(self, req):
655 for h,v in std_headers.items():
656 if h in req.headers:
657 del req.headers[h]
658 req.add_header(h, v)
659 if 'Youtubedl-no-compression' in req.headers:
660 if 'Accept-encoding' in req.headers:
661 del req.headers['Accept-encoding']
662 del req.headers['Youtubedl-no-compression']
663 if 'Youtubedl-user-agent' in req.headers:
664 if 'User-agent' in req.headers:
665 del req.headers['User-agent']
666 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
667 del req.headers['Youtubedl-user-agent']
668 return req
669
670 def http_response(self, req, resp):
671 old_resp = resp
672 # gzip
673 if resp.headers.get('Content-encoding', '') == 'gzip':
674 content = resp.read()
675 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
676 try:
677 uncompressed = io.BytesIO(gz.read())
678 except IOError as original_ioerror:
679 # There may be junk add the end of the file
680 # See http://stackoverflow.com/q/4928560/35070 for details
681 for i in range(1, 1024):
682 try:
683 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
684 uncompressed = io.BytesIO(gz.read())
685 except IOError:
686 continue
687 break
688 else:
689 raise original_ioerror
690 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
691 resp.msg = old_resp.msg
692 # deflate
693 if resp.headers.get('Content-encoding', '') == 'deflate':
694 gz = io.BytesIO(self.deflate(resp.read()))
695 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
696 resp.msg = old_resp.msg
697 return resp
698
699 https_request = http_request
700 https_response = http_response
701
702 def unified_strdate(date_str):
703 """Return a string with the date in the format YYYYMMDD"""
704 upload_date = None
705 #Replace commas
706 date_str = date_str.replace(',',' ')
707 # %z (UTC offset) is only supported in python>=3.2
708 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
709 format_expressions = [
710 '%d %B %Y',
711 '%B %d %Y',
712 '%b %d %Y',
713 '%Y-%m-%d',
714 '%d/%m/%Y',
715 '%Y/%m/%d %H:%M:%S',
716 '%d.%m.%Y %H:%M',
717 '%Y-%m-%dT%H:%M:%SZ',
718 ]
719 for expression in format_expressions:
720 try:
721 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
722 except:
723 pass
724 return upload_date
725
726 def determine_ext(url, default_ext=u'unknown_video'):
727 guess = url.partition(u'?')[0].rpartition(u'.')[2]
728 if re.match(r'^[A-Za-z0-9]+$', guess):
729 return guess
730 else:
731 return default_ext
732
733 def subtitles_filename(filename, sub_lang, sub_format):
734 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
735
736 def date_from_str(date_str):
737 """
738 Return a datetime object from a string in the format YYYYMMDD or
739 (now|today)[+-][0-9](day|week|month|year)(s)?"""
740 today = datetime.date.today()
741 if date_str == 'now'or date_str == 'today':
742 return today
743 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
744 if match is not None:
745 sign = match.group('sign')
746 time = int(match.group('time'))
747 if sign == '-':
748 time = -time
749 unit = match.group('unit')
750 #A bad aproximation?
751 if unit == 'month':
752 unit = 'day'
753 time *= 30
754 elif unit == 'year':
755 unit = 'day'
756 time *= 365
757 unit += 's'
758 delta = datetime.timedelta(**{unit: time})
759 return today + delta
760 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
761
762 class DateRange(object):
763 """Represents a time interval between two dates"""
764 def __init__(self, start=None, end=None):
765 """start and end must be strings in the format accepted by date"""
766 if start is not None:
767 self.start = date_from_str(start)
768 else:
769 self.start = datetime.datetime.min.date()
770 if end is not None:
771 self.end = date_from_str(end)
772 else:
773 self.end = datetime.datetime.max.date()
774 if self.start > self.end:
775 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
776 @classmethod
777 def day(cls, day):
778 """Returns a range that only contains the given day"""
779 return cls(day,day)
780 def __contains__(self, date):
781 """Check if the date is in the range"""
782 if not isinstance(date, datetime.date):
783 date = date_from_str(date)
784 return self.start <= date <= self.end
785 def __str__(self):
786 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
787
788
789 def platform_name():
790 """ Returns the platform name as a compat_str """
791 res = platform.platform()
792 if isinstance(res, bytes):
793 res = res.decode(preferredencoding())
794
795 assert isinstance(res, compat_str)
796 return res
797
798
799 def write_string(s, out=None):
800 if out is None:
801 out = sys.stderr
802 assert type(s) == type(u'')
803
804 if ('b' in getattr(out, 'mode', '') or
805 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
806 s = s.encode(preferredencoding(), 'ignore')
807 out.write(s)
808 out.flush()
809
810
811 def bytes_to_intlist(bs):
812 if not bs:
813 return []
814 if isinstance(bs[0], int): # Python 3
815 return list(bs)
816 else:
817 return [ord(c) for c in bs]
818
819
820 def intlist_to_bytes(xs):
821 if not xs:
822 return b''
823 if isinstance(chr(0), bytes): # Python 2
824 return ''.join([chr(x) for x in xs])
825 else:
826 return bytes(xs)
827
828
829 def get_cachedir(params={}):
830 cache_root = os.environ.get('XDG_CACHE_HOME',
831 os.path.expanduser('~/.cache'))
832 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))