import binascii
import calendar
import codecs
+import collections
import contextlib
import ctypes
import datetime
import subprocess
import sys
import tempfile
+import time
import traceback
import xml.etree.ElementTree
import zlib
compat_html_entities,
compat_html_entities_html5,
compat_http_client,
+ compat_integer_types,
compat_kwargs,
compat_os_name,
compat_parse_qs,
'%B %d %Y',
'%B %dst %Y',
'%B %dnd %Y',
+ '%B %drd %Y',
'%B %dth %Y',
'%b %d %Y',
'%b %dst %Y',
'%b %dnd %Y',
+ '%b %drd %Y',
'%b %dth %Y',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
+ '%b %drd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
os.unlink(fn)
except OSError:
pass
+ try:
+ mask = os.umask(0)
+ os.umask(mask)
+ os.chmod(tf.name, 0o666 & ~mask)
+ except OSError:
+ pass
os.rename(tf.name, fn)
except Exception:
try:
class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+ """
+ See [1] for cookie file format.
+
+ 1. https://curl.haxx.se/docs/http-cookies.html
+ """
_HTTPONLY_PREFIX = '#HttpOnly_'
+ _ENTRY_LEN = 7
+ _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by youtube-dl. Do not edit.
+
+'''
+ _CookieFileEntry = collections.namedtuple(
+ 'CookieFileEntry',
+ ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """
+ Save cookies to a file.
+
+ Most of the code is taken from CPython 3.8 and slightly adapted
+ to support cookie files with UTF-8 in both python 2 and 3.
+ """
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
# Store session cookies with `expires` set to 0 instead of an empty
# string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0
- compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
+
+ with io.open(filename, 'w', encoding='utf-8') as f:
+ f.write(self._HEADER)
+ now = time.time()
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ if cookie.secure:
+ secure = 'TRUE'
+ else:
+ secure = 'FALSE'
+ if cookie.domain.startswith('.'):
+ initial_dot = 'TRUE'
+ else:
+ initial_dot = 'FALSE'
+ if cookie.expires is not None:
+ expires = compat_str(cookie.expires)
+ else:
+ expires = ''
+ if cookie.value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name = ''
+ value = cookie.name
+ else:
+ name = cookie.name
+ value = cookie.value
+ f.write(
+ '\t'.join([cookie.domain, initial_dot, cookie.path,
+ secure, expires, name, value]) + '\n')
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ def prepare_line(line):
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ # comments and empty lines are fine
+ if line.startswith('#') or not line.strip():
+ return line
+ cookie_list = line.split('\t')
+ if len(cookie_list) != self._ENTRY_LEN:
+ raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ cookie = self._CookieFileEntry(*cookie_list)
+ if cookie.expires_at and not cookie.expires_at.isdigit():
+ raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ return line
+
cf = io.StringIO()
- with open(filename) as f:
+ with io.open(filename, encoding='utf-8') as f:
for line in f:
- if line.startswith(self._HTTPONLY_PREFIX):
- line = line[len(self._HTTPONLY_PREFIX):]
- cf.write(compat_str(line))
+ try:
+ cf.write(prepare_line(line))
+ except compat_cookiejar.LoadError as e:
+ write_string(
+ 'WARNING: skipping cookie file entry due to %s: %r\n'
+ % (e, line), sys.stderr)
+ continue
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
https_response = http_response
+class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
+ if sys.version_info[0] < 3:
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ # On python 2 urlh.geturl() may sometimes return redirect URL
+ # as byte string instead of unicode. This workaround allows
+ # to force it always return unicode.
+ return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
+
+
def extract_timezone(date_str):
m = re.search(
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
return default_ext
-def subtitles_filename(filename, sub_lang, sub_format):
- return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
+def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
+ return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
def date_from_str(date_str):
def str_to_int(int_str):
""" A more relaxed version of int_or_none """
- if int_str is None:
- return None
- int_str = re.sub(r'[,\.\+]', '', int_str)
- return int(int_str)
+ if isinstance(int_str, compat_integer_types):
+ return int_str
+ elif isinstance(int_str, compat_str):
+ int_str = re.sub(r'[,\.\+]', '', int_str)
+ return int_or_none(int_str)
def float_or_none(v, scale=1, invscale=1, default=None):
class GeoUtils(object):
# Major IPv4 address blocks per country
_country_ip_map = {
- 'AD': '85.94.160.0/19',
+ 'AD': '46.172.224.0/19',
'AE': '94.200.0.0/13',
'AF': '149.54.0.0/17',
'AG': '209.59.64.0/18',
'AL': '46.99.0.0/16',
'AM': '46.70.0.0/15',
'AO': '105.168.0.0/13',
- 'AP': '159.117.192.0/21',
+ 'AP': '182.50.184.0/21',
+ 'AQ': '23.154.160.0/24',
'AR': '181.0.0.0/12',
'AS': '202.70.112.0/20',
- 'AT': '84.112.0.0/13',
+ 'AT': '77.116.0.0/14',
'AU': '1.128.0.0/11',
'AW': '181.41.0.0/18',
- 'AZ': '5.191.0.0/16',
+ 'AX': '185.217.4.0/22',
+ 'AZ': '5.197.0.0/16',
'BA': '31.176.128.0/17',
'BB': '65.48.128.0/17',
'BD': '114.130.0.0/16',
'BE': '57.0.0.0/8',
- 'BF': '129.45.128.0/17',
+ 'BF': '102.178.0.0/15',
'BG': '95.42.0.0/15',
'BH': '37.131.0.0/17',
'BI': '154.117.192.0/18',
'BJ': '137.255.0.0/16',
- 'BL': '192.131.134.0/24',
+ 'BL': '185.212.72.0/23',
'BM': '196.12.64.0/18',
'BN': '156.31.0.0/16',
'BO': '161.56.0.0/16',
'BQ': '161.0.80.0/20',
- 'BR': '152.240.0.0/12',
+ 'BR': '191.128.0.0/12',
'BS': '24.51.64.0/18',
'BT': '119.2.96.0/19',
'BW': '168.167.0.0/16',
'BZ': '179.42.192.0/18',
'CA': '99.224.0.0/11',
'CD': '41.243.0.0/16',
- 'CF': '196.32.200.0/21',
- 'CG': '197.214.128.0/17',
+ 'CF': '197.242.176.0/21',
+ 'CG': '160.113.0.0/16',
'CH': '85.0.0.0/13',
- 'CI': '154.232.0.0/14',
+ 'CI': '102.136.0.0/14',
'CK': '202.65.32.0/19',
'CL': '152.172.0.0/14',
- 'CM': '165.210.0.0/15',
+ 'CM': '102.244.0.0/14',
'CN': '36.128.0.0/10',
'CO': '181.240.0.0/12',
'CR': '201.192.0.0/12',
'CU': '152.206.0.0/15',
'CV': '165.90.96.0/19',
'CW': '190.88.128.0/17',
- 'CY': '46.198.0.0/15',
+ 'CY': '31.153.0.0/16',
'CZ': '88.100.0.0/14',
'DE': '53.0.0.0/8',
'DJ': '197.241.0.0/17',
'EU': '2.16.0.0/13',
'FI': '91.152.0.0/13',
'FJ': '144.120.0.0/16',
+ 'FK': '80.73.208.0/21',
'FM': '119.252.112.0/20',
'FO': '88.85.32.0/19',
'FR': '90.0.0.0/9',
'GE': '31.146.0.0/16',
'GF': '161.22.64.0/18',
'GG': '62.68.160.0/19',
- 'GH': '45.208.0.0/14',
- 'GI': '85.115.128.0/19',
+ 'GH': '154.160.0.0/12',
+ 'GI': '95.164.0.0/16',
'GL': '88.83.0.0/19',
'GM': '160.182.0.0/15',
'GN': '197.149.192.0/18',
'JE': '87.244.64.0/18',
'JM': '72.27.0.0/17',
'JO': '176.29.0.0/16',
- 'JP': '126.0.0.0/8',
+ 'JP': '133.0.0.0/8',
'KE': '105.48.0.0/12',
'KG': '158.181.128.0/17',
'KH': '36.37.128.0/17',
'KI': '103.25.140.0/22',
'KM': '197.255.224.0/20',
- 'KN': '198.32.32.0/19',
+ 'KN': '198.167.192.0/19',
'KP': '175.45.176.0/22',
'KR': '175.192.0.0/10',
'KW': '37.36.0.0/14',
'KZ': '2.72.0.0/13',
'LA': '115.84.64.0/18',
'LB': '178.135.0.0/16',
- 'LC': '192.147.231.0/24',
+ 'LC': '24.92.144.0/20',
'LI': '82.117.0.0/19',
'LK': '112.134.0.0/15',
- 'LR': '41.86.0.0/19',
+ 'LR': '102.183.0.0/16',
'LS': '129.232.0.0/17',
'LT': '78.56.0.0/13',
'LU': '188.42.0.0/16',
'MT': '46.11.0.0/16',
'MU': '105.16.0.0/12',
'MV': '27.114.128.0/18',
- 'MW': '105.234.0.0/16',
+ 'MW': '102.70.0.0/15',
'MX': '187.192.0.0/11',
'MY': '175.136.0.0/13',
'MZ': '197.218.0.0/15',
'PW': '202.124.224.0/20',
'PY': '181.120.0.0/14',
'QA': '37.210.0.0/15',
- 'RE': '139.26.0.0/16',
+ 'RE': '102.35.0.0/16',
'RO': '79.112.0.0/13',
- 'RS': '178.220.0.0/14',
+ 'RS': '93.86.0.0/15',
'RU': '5.136.0.0/13',
- 'RW': '105.178.0.0/15',
+ 'RW': '41.186.0.0/16',
'SA': '188.48.0.0/13',
'SB': '202.1.160.0/19',
'SC': '154.192.0.0/11',
- 'SD': '154.96.0.0/13',
+ 'SD': '102.120.0.0/13',
'SE': '78.64.0.0/12',
- 'SG': '152.56.0.0/14',
+ 'SG': '8.128.0.0/10',
'SI': '188.196.0.0/14',
'SK': '78.98.0.0/15',
- 'SL': '197.215.0.0/17',
+ 'SL': '102.143.0.0/17',
'SM': '89.186.32.0/19',
'SN': '41.82.0.0/15',
- 'SO': '197.220.64.0/19',
+ 'SO': '154.115.192.0/18',
'SR': '186.179.128.0/17',
'SS': '105.235.208.0/21',
'ST': '197.159.160.0/19',
'TV': '202.2.96.0/19',
'TW': '120.96.0.0/11',
'TZ': '156.156.0.0/14',
- 'UA': '93.72.0.0/13',
- 'UG': '154.224.0.0/13',
- 'US': '3.0.0.0/8',
+ 'UA': '37.52.0.0/14',
+ 'UG': '102.80.0.0/13',
+ 'US': '6.0.0.0/8',
'UY': '167.56.0.0/13',
- 'UZ': '82.215.64.0/18',
+ 'UZ': '84.54.64.0/18',
'VA': '212.77.0.0/19',
- 'VC': '24.92.144.0/20',
+ 'VC': '207.191.240.0/21',
'VE': '186.88.0.0/13',
- 'VG': '172.103.64.0/18',
+ 'VG': '66.81.192.0/20',
'VI': '146.226.0.0/16',
'VN': '14.160.0.0/11',
'VU': '202.80.32.0/20',
'YE': '134.35.0.0/16',
'YT': '41.242.116.0/22',
'ZA': '41.0.0.0/11',
- 'ZM': '165.56.0.0/13',
- 'ZW': '41.85.192.0/19',
+ 'ZM': '102.144.0.0/13',
+ 'ZW': '102.177.192.0/18',
}
@classmethod
obfucasted_code)
+def caesar(s, alphabet, shift):
+ if shift == 0:
+ return s
+ l = len(alphabet)
+ return ''.join(
+ alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
+ for c in s)
+
+
+def rot47(s):
+ return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
+
+
def parse_m3u8_attributes(attrib):
info = {}
for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):