X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/8b2307d66b10597b66d9491668d75d0e5ae5ab0c..745cf30fc72997fe3c21286d40996339496ec4a5:/youtube_dl/utils.py?ds=inline diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f620469..112279e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -7,6 +7,7 @@ import base64 import binascii import calendar import codecs +import collections import contextlib import ctypes import datetime @@ -30,6 +31,7 @@ import ssl import subprocess import sys import tempfile +import time import traceback import xml.etree.ElementTree import zlib @@ -2729,15 +2731,72 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): + """ + See [1] for cookie file format. + + 1. https://curl.haxx.se/docs/http-cookies.html + """ _HTTPONLY_PREFIX = '#HttpOnly_' + _ENTRY_LEN = 7 + _HEADER = '''# Netscape HTTP Cookie File +# This file is generated by youtube-dl. Do not edit. + +''' + _CookieFileEntry = collections.namedtuple( + 'CookieFileEntry', + ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """ + Save cookies to a file. + + Most of the code is taken from CPython 3.8 and slightly adapted + to support cookie files with UTF-8 in both python 2 and 3. + """ + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + # Store session cookies with `expires` set to 0 instead of an empty # string for cookie in self: if cookie.expires is None: cookie.expires = 0 - compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires) + + with io.open(filename, 'w', encoding='utf-8') as f: + f.write(self._HEADER) + now = time.time() + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + if cookie.secure: + secure = 'TRUE' + else: + secure = 'FALSE' + if cookie.domain.startswith('.'): + initial_dot = 'TRUE' + else: + initial_dot = 'FALSE' + if cookie.expires is not None: + expires = compat_str(cookie.expires) + else: + expires = '' + if cookie.value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = '' + value = cookie.name + else: + name = cookie.name + value = cookie.value + f.write( + '\t'.join([cookie.domain, initial_dot, cookie.path, + secure, expires, name, value]) + '\n') def load(self, filename=None, ignore_discard=False, ignore_expires=False): """Load cookies from a file.""" @@ -2747,12 +2806,30 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): else: raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + def prepare_line(line): + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + # comments and empty lines are fine + if line.startswith('#') or not line.strip(): + return line + cookie_list = line.split('\t') + if len(cookie_list) != self._ENTRY_LEN: + raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list)) + cookie = self._CookieFileEntry(*cookie_list) + if cookie.expires_at and not cookie.expires_at.isdigit(): + raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + return line + cf = io.StringIO() - with open(filename) as f: + with io.open(filename, encoding='utf-8') as f: for line in f: - if line.startswith(self._HTTPONLY_PREFIX): - line = line[len(self._HTTPONLY_PREFIX):] - cf.write(compat_str(line)) + try: + cf.write(prepare_line(line)) + except compat_cookiejar.LoadError as e: + write_string( + 'WARNING: skipping cookie file entry due to %s: %r\n' + % (e, line), sys.stderr) + continue cf.seek(0) self._really_load(cf, filename, ignore_discard, ignore_expires) # Session cookies are denoted by either `expires` field set to @@ -2795,6 +2872,15 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): https_response = http_response +class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): + if sys.version_info[0] < 3: + def redirect_request(self, req, fp, code, msg, headers, newurl): + # On python 2 urlh.geturl() may sometimes return redirect URL + # as byte string instead of unicode. This workaround allows + # to force it always return unicode. + return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl)) + + def extract_timezone(date_str): m = re.search( r'^.{8,}?(?PZ$| ?(?P\+|-)(?P[0-9]{2}):?(?P[0-9]{2})$)',