#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
+import base64
+import binascii
import calendar
import codecs
import contextlib
import ctypes
import datetime
import email.utils
+import email.header
import errno
import functools
import gzip
-import itertools
import io
+import itertools
import json
import locale
import math
import operator
import os
-import pipes
import platform
+import random
import re
-import ssl
import socket
-import struct
+import ssl
import subprocess
import sys
import tempfile
import zlib
from .compat import (
+ compat_HTMLParseError,
+ compat_HTMLParser,
compat_basestring,
compat_chr,
+ compat_cookiejar,
+ compat_ctypes_WINFUNCTYPE,
+ compat_etree_fromstring,
+ compat_expanduser,
compat_html_entities,
+ compat_html_entities_html5,
compat_http_client,
+ compat_integer_types,
compat_kwargs,
+ compat_os_name,
compat_parse_qs,
- compat_socket_create_connection,
+ compat_shlex_quote,
compat_str,
+ compat_struct_pack,
+ compat_struct_unpack,
compat_urllib_error,
compat_urllib_parse,
+ compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
+ compat_urllib_parse_unquote_plus,
compat_urllib_request,
compat_urlparse,
- shlex_quote,
+ compat_xpath,
)
+from .socks import (
+ ProxyType,
+ sockssocket,
+)
+
+
+def register_socks_protocols():
+ # "Register" SOCKS protocols
+ # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
+ # URLs with protocols not in urlparse.uses_netloc are not handled correctly
+ for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
+ if scheme not in compat_urlparse.uses_netloc:
+ compat_urlparse.uses_netloc.append(scheme)
+
# This is not clearly defined otherwise
compiled_regex_type = type(re.compile(''))
+
+def random_user_agent():
+ _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
+ _CHROME_VERSIONS = (
+ '74.0.3729.129',
+ '76.0.3780.3',
+ '76.0.3780.2',
+ '74.0.3729.128',
+ '76.0.3780.1',
+ '76.0.3780.0',
+ '75.0.3770.15',
+ '74.0.3729.127',
+ '74.0.3729.126',
+ '76.0.3779.1',
+ '76.0.3779.0',
+ '75.0.3770.14',
+ '74.0.3729.125',
+ '76.0.3778.1',
+ '76.0.3778.0',
+ '75.0.3770.13',
+ '74.0.3729.124',
+ '74.0.3729.123',
+ '73.0.3683.121',
+ '76.0.3777.1',
+ '76.0.3777.0',
+ '75.0.3770.12',
+ '74.0.3729.122',
+ '76.0.3776.4',
+ '75.0.3770.11',
+ '74.0.3729.121',
+ '76.0.3776.3',
+ '76.0.3776.2',
+ '73.0.3683.120',
+ '74.0.3729.120',
+ '74.0.3729.119',
+ '74.0.3729.118',
+ '76.0.3776.1',
+ '76.0.3776.0',
+ '76.0.3775.5',
+ '75.0.3770.10',
+ '74.0.3729.117',
+ '76.0.3775.4',
+ '76.0.3775.3',
+ '74.0.3729.116',
+ '75.0.3770.9',
+ '76.0.3775.2',
+ '76.0.3775.1',
+ '76.0.3775.0',
+ '75.0.3770.8',
+ '74.0.3729.115',
+ '74.0.3729.114',
+ '76.0.3774.1',
+ '76.0.3774.0',
+ '75.0.3770.7',
+ '74.0.3729.113',
+ '74.0.3729.112',
+ '74.0.3729.111',
+ '76.0.3773.1',
+ '76.0.3773.0',
+ '75.0.3770.6',
+ '74.0.3729.110',
+ '74.0.3729.109',
+ '76.0.3772.1',
+ '76.0.3772.0',
+ '75.0.3770.5',
+ '74.0.3729.108',
+ '74.0.3729.107',
+ '76.0.3771.1',
+ '76.0.3771.0',
+ '75.0.3770.4',
+ '74.0.3729.106',
+ '74.0.3729.105',
+ '75.0.3770.3',
+ '74.0.3729.104',
+ '74.0.3729.103',
+ '74.0.3729.102',
+ '75.0.3770.2',
+ '74.0.3729.101',
+ '75.0.3770.1',
+ '75.0.3770.0',
+ '74.0.3729.100',
+ '75.0.3769.5',
+ '75.0.3769.4',
+ '74.0.3729.99',
+ '75.0.3769.3',
+ '75.0.3769.2',
+ '75.0.3768.6',
+ '74.0.3729.98',
+ '75.0.3769.1',
+ '75.0.3769.0',
+ '74.0.3729.97',
+ '73.0.3683.119',
+ '73.0.3683.118',
+ '74.0.3729.96',
+ '75.0.3768.5',
+ '75.0.3768.4',
+ '75.0.3768.3',
+ '75.0.3768.2',
+ '74.0.3729.95',
+ '74.0.3729.94',
+ '75.0.3768.1',
+ '75.0.3768.0',
+ '74.0.3729.93',
+ '74.0.3729.92',
+ '73.0.3683.117',
+ '74.0.3729.91',
+ '75.0.3766.3',
+ '74.0.3729.90',
+ '75.0.3767.2',
+ '75.0.3767.1',
+ '75.0.3767.0',
+ '74.0.3729.89',
+ '73.0.3683.116',
+ '75.0.3766.2',
+ '74.0.3729.88',
+ '75.0.3766.1',
+ '75.0.3766.0',
+ '74.0.3729.87',
+ '73.0.3683.115',
+ '74.0.3729.86',
+ '75.0.3765.1',
+ '75.0.3765.0',
+ '74.0.3729.85',
+ '73.0.3683.114',
+ '74.0.3729.84',
+ '75.0.3764.1',
+ '75.0.3764.0',
+ '74.0.3729.83',
+ '73.0.3683.113',
+ '75.0.3763.2',
+ '75.0.3761.4',
+ '74.0.3729.82',
+ '75.0.3763.1',
+ '75.0.3763.0',
+ '74.0.3729.81',
+ '73.0.3683.112',
+ '75.0.3762.1',
+ '75.0.3762.0',
+ '74.0.3729.80',
+ '75.0.3761.3',
+ '74.0.3729.79',
+ '73.0.3683.111',
+ '75.0.3761.2',
+ '74.0.3729.78',
+ '74.0.3729.77',
+ '75.0.3761.1',
+ '75.0.3761.0',
+ '73.0.3683.110',
+ '74.0.3729.76',
+ '74.0.3729.75',
+ '75.0.3760.0',
+ '74.0.3729.74',
+ '75.0.3759.8',
+ '75.0.3759.7',
+ '75.0.3759.6',
+ '74.0.3729.73',
+ '75.0.3759.5',
+ '74.0.3729.72',
+ '73.0.3683.109',
+ '75.0.3759.4',
+ '75.0.3759.3',
+ '74.0.3729.71',
+ '75.0.3759.2',
+ '74.0.3729.70',
+ '73.0.3683.108',
+ '74.0.3729.69',
+ '75.0.3759.1',
+ '75.0.3759.0',
+ '74.0.3729.68',
+ '73.0.3683.107',
+ '74.0.3729.67',
+ '75.0.3758.1',
+ '75.0.3758.0',
+ '74.0.3729.66',
+ '73.0.3683.106',
+ '74.0.3729.65',
+ '75.0.3757.1',
+ '75.0.3757.0',
+ '74.0.3729.64',
+ '73.0.3683.105',
+ '74.0.3729.63',
+ '75.0.3756.1',
+ '75.0.3756.0',
+ '74.0.3729.62',
+ '73.0.3683.104',
+ '75.0.3755.3',
+ '75.0.3755.2',
+ '73.0.3683.103',
+ '75.0.3755.1',
+ '75.0.3755.0',
+ '74.0.3729.61',
+ '73.0.3683.102',
+ '74.0.3729.60',
+ '75.0.3754.2',
+ '74.0.3729.59',
+ '75.0.3753.4',
+ '74.0.3729.58',
+ '75.0.3754.1',
+ '75.0.3754.0',
+ '74.0.3729.57',
+ '73.0.3683.101',
+ '75.0.3753.3',
+ '75.0.3752.2',
+ '75.0.3753.2',
+ '74.0.3729.56',
+ '75.0.3753.1',
+ '75.0.3753.0',
+ '74.0.3729.55',
+ '73.0.3683.100',
+ '74.0.3729.54',
+ '75.0.3752.1',
+ '75.0.3752.0',
+ '74.0.3729.53',
+ '73.0.3683.99',
+ '74.0.3729.52',
+ '75.0.3751.1',
+ '75.0.3751.0',
+ '74.0.3729.51',
+ '73.0.3683.98',
+ '74.0.3729.50',
+ '75.0.3750.0',
+ '74.0.3729.49',
+ '74.0.3729.48',
+ '74.0.3729.47',
+ '75.0.3749.3',
+ '74.0.3729.46',
+ '73.0.3683.97',
+ '75.0.3749.2',
+ '74.0.3729.45',
+ '75.0.3749.1',
+ '75.0.3749.0',
+ '74.0.3729.44',
+ '73.0.3683.96',
+ '74.0.3729.43',
+ '74.0.3729.42',
+ '75.0.3748.1',
+ '75.0.3748.0',
+ '74.0.3729.41',
+ '75.0.3747.1',
+ '73.0.3683.95',
+ '75.0.3746.4',
+ '74.0.3729.40',
+ '74.0.3729.39',
+ '75.0.3747.0',
+ '75.0.3746.3',
+ '75.0.3746.2',
+ '74.0.3729.38',
+ '75.0.3746.1',
+ '75.0.3746.0',
+ '74.0.3729.37',
+ '73.0.3683.94',
+ '75.0.3745.5',
+ '75.0.3745.4',
+ '75.0.3745.3',
+ '75.0.3745.2',
+ '74.0.3729.36',
+ '75.0.3745.1',
+ '75.0.3745.0',
+ '75.0.3744.2',
+ '74.0.3729.35',
+ '73.0.3683.93',
+ '74.0.3729.34',
+ '75.0.3744.1',
+ '75.0.3744.0',
+ '74.0.3729.33',
+ '73.0.3683.92',
+ '74.0.3729.32',
+ '74.0.3729.31',
+ '73.0.3683.91',
+ '75.0.3741.2',
+ '75.0.3740.5',
+ '74.0.3729.30',
+ '75.0.3741.1',
+ '75.0.3741.0',
+ '74.0.3729.29',
+ '75.0.3740.4',
+ '73.0.3683.90',
+ '74.0.3729.28',
+ '75.0.3740.3',
+ '73.0.3683.89',
+ '75.0.3740.2',
+ '74.0.3729.27',
+ '75.0.3740.1',
+ '75.0.3740.0',
+ '74.0.3729.26',
+ '73.0.3683.88',
+ '73.0.3683.87',
+ '74.0.3729.25',
+ '75.0.3739.1',
+ '75.0.3739.0',
+ '73.0.3683.86',
+ '74.0.3729.24',
+ '73.0.3683.85',
+ '75.0.3738.4',
+ '75.0.3738.3',
+ '75.0.3738.2',
+ '75.0.3738.1',
+ '75.0.3738.0',
+ '74.0.3729.23',
+ '73.0.3683.84',
+ '74.0.3729.22',
+ '74.0.3729.21',
+ '75.0.3737.1',
+ '75.0.3737.0',
+ '74.0.3729.20',
+ '73.0.3683.83',
+ '74.0.3729.19',
+ '75.0.3736.1',
+ '75.0.3736.0',
+ '74.0.3729.18',
+ '73.0.3683.82',
+ '74.0.3729.17',
+ '75.0.3735.1',
+ '75.0.3735.0',
+ '74.0.3729.16',
+ '73.0.3683.81',
+ '75.0.3734.1',
+ '75.0.3734.0',
+ '74.0.3729.15',
+ '73.0.3683.80',
+ '74.0.3729.14',
+ '75.0.3733.1',
+ '75.0.3733.0',
+ '75.0.3732.1',
+ '74.0.3729.13',
+ '74.0.3729.12',
+ '73.0.3683.79',
+ '74.0.3729.11',
+ '75.0.3732.0',
+ '74.0.3729.10',
+ '73.0.3683.78',
+ '74.0.3729.9',
+ '74.0.3729.8',
+ '74.0.3729.7',
+ '75.0.3731.3',
+ '75.0.3731.2',
+ '75.0.3731.0',
+ '74.0.3729.6',
+ '73.0.3683.77',
+ '73.0.3683.76',
+ '75.0.3730.5',
+ '75.0.3730.4',
+ '73.0.3683.75',
+ '74.0.3729.5',
+ '73.0.3683.74',
+ '75.0.3730.3',
+ '75.0.3730.2',
+ '74.0.3729.4',
+ '73.0.3683.73',
+ '73.0.3683.72',
+ '75.0.3730.1',
+ '75.0.3730.0',
+ '74.0.3729.3',
+ '73.0.3683.71',
+ '74.0.3729.2',
+ '73.0.3683.70',
+ '74.0.3729.1',
+ '74.0.3729.0',
+ '74.0.3726.4',
+ '73.0.3683.69',
+ '74.0.3726.3',
+ '74.0.3728.0',
+ '74.0.3726.2',
+ '73.0.3683.68',
+ '74.0.3726.1',
+ '74.0.3726.0',
+ '74.0.3725.4',
+ '73.0.3683.67',
+ '73.0.3683.66',
+ '74.0.3725.3',
+ '74.0.3725.2',
+ '74.0.3725.1',
+ '74.0.3724.8',
+ '74.0.3725.0',
+ '73.0.3683.65',
+ '74.0.3724.7',
+ '74.0.3724.6',
+ '74.0.3724.5',
+ '74.0.3724.4',
+ '74.0.3724.3',
+ '74.0.3724.2',
+ '74.0.3724.1',
+ '74.0.3724.0',
+ '73.0.3683.64',
+ '74.0.3723.1',
+ '74.0.3723.0',
+ '73.0.3683.63',
+ '74.0.3722.1',
+ '74.0.3722.0',
+ '73.0.3683.62',
+ '74.0.3718.9',
+ '74.0.3702.3',
+ '74.0.3721.3',
+ '74.0.3721.2',
+ '74.0.3721.1',
+ '74.0.3721.0',
+ '74.0.3720.6',
+ '73.0.3683.61',
+ '72.0.3626.122',
+ '73.0.3683.60',
+ '74.0.3720.5',
+ '72.0.3626.121',
+ '74.0.3718.8',
+ '74.0.3720.4',
+ '74.0.3720.3',
+ '74.0.3718.7',
+ '74.0.3720.2',
+ '74.0.3720.1',
+ '74.0.3720.0',
+ '74.0.3718.6',
+ '74.0.3719.5',
+ '73.0.3683.59',
+ '74.0.3718.5',
+ '74.0.3718.4',
+ '74.0.3719.4',
+ '74.0.3719.3',
+ '74.0.3719.2',
+ '74.0.3719.1',
+ '73.0.3683.58',
+ '74.0.3719.0',
+ '73.0.3683.57',
+ '73.0.3683.56',
+ '74.0.3718.3',
+ '73.0.3683.55',
+ '74.0.3718.2',
+ '74.0.3718.1',
+ '74.0.3718.0',
+ '73.0.3683.54',
+ '74.0.3717.2',
+ '73.0.3683.53',
+ '74.0.3717.1',
+ '74.0.3717.0',
+ '73.0.3683.52',
+ '74.0.3716.1',
+ '74.0.3716.0',
+ '73.0.3683.51',
+ '74.0.3715.1',
+ '74.0.3715.0',
+ '73.0.3683.50',
+ '74.0.3711.2',
+ '74.0.3714.2',
+ '74.0.3713.3',
+ '74.0.3714.1',
+ '74.0.3714.0',
+ '73.0.3683.49',
+ '74.0.3713.1',
+ '74.0.3713.0',
+ '72.0.3626.120',
+ '73.0.3683.48',
+ '74.0.3712.2',
+ '74.0.3712.1',
+ '74.0.3712.0',
+ '73.0.3683.47',
+ '72.0.3626.119',
+ '73.0.3683.46',
+ '74.0.3710.2',
+ '72.0.3626.118',
+ '74.0.3711.1',
+ '74.0.3711.0',
+ '73.0.3683.45',
+ '72.0.3626.117',
+ '74.0.3710.1',
+ '74.0.3710.0',
+ '73.0.3683.44',
+ '72.0.3626.116',
+ '74.0.3709.1',
+ '74.0.3709.0',
+ '74.0.3704.9',
+ '73.0.3683.43',
+ '72.0.3626.115',
+ '74.0.3704.8',
+ '74.0.3704.7',
+ '74.0.3708.0',
+ '74.0.3706.7',
+ '74.0.3704.6',
+ '73.0.3683.42',
+ '72.0.3626.114',
+ '74.0.3706.6',
+ '72.0.3626.113',
+ '74.0.3704.5',
+ '74.0.3706.5',
+ '74.0.3706.4',
+ '74.0.3706.3',
+ '74.0.3706.2',
+ '74.0.3706.1',
+ '74.0.3706.0',
+ '73.0.3683.41',
+ '72.0.3626.112',
+ '74.0.3705.1',
+ '74.0.3705.0',
+ '73.0.3683.40',
+ '72.0.3626.111',
+ '73.0.3683.39',
+ '74.0.3704.4',
+ '73.0.3683.38',
+ '74.0.3704.3',
+ '74.0.3704.2',
+ '74.0.3704.1',
+ '74.0.3704.0',
+ '73.0.3683.37',
+ '72.0.3626.110',
+ '72.0.3626.109',
+ '74.0.3703.3',
+ '74.0.3703.2',
+ '73.0.3683.36',
+ '74.0.3703.1',
+ '74.0.3703.0',
+ '73.0.3683.35',
+ '72.0.3626.108',
+ '74.0.3702.2',
+ '74.0.3699.3',
+ '74.0.3702.1',
+ '74.0.3702.0',
+ '73.0.3683.34',
+ '72.0.3626.107',
+ '73.0.3683.33',
+ '74.0.3701.1',
+ '74.0.3701.0',
+ '73.0.3683.32',
+ '73.0.3683.31',
+ '72.0.3626.105',
+ '74.0.3700.1',
+ '74.0.3700.0',
+ '73.0.3683.29',
+ '72.0.3626.103',
+ '74.0.3699.2',
+ '74.0.3699.1',
+ '74.0.3699.0',
+ '73.0.3683.28',
+ '72.0.3626.102',
+ '73.0.3683.27',
+ '73.0.3683.26',
+ '74.0.3698.0',
+ '74.0.3696.2',
+ '72.0.3626.101',
+ '73.0.3683.25',
+ '74.0.3696.1',
+ '74.0.3696.0',
+ '74.0.3694.8',
+ '72.0.3626.100',
+ '74.0.3694.7',
+ '74.0.3694.6',
+ '74.0.3694.5',
+ '74.0.3694.4',
+ '72.0.3626.99',
+ '72.0.3626.98',
+ '74.0.3694.3',
+ '73.0.3683.24',
+ '72.0.3626.97',
+ '72.0.3626.96',
+ '72.0.3626.95',
+ '73.0.3683.23',
+ '72.0.3626.94',
+ '73.0.3683.22',
+ '73.0.3683.21',
+ '72.0.3626.93',
+ '74.0.3694.2',
+ '72.0.3626.92',
+ '74.0.3694.1',
+ '74.0.3694.0',
+ '74.0.3693.6',
+ '73.0.3683.20',
+ '72.0.3626.91',
+ '74.0.3693.5',
+ '74.0.3693.4',
+ '74.0.3693.3',
+ '74.0.3693.2',
+ '73.0.3683.19',
+ '74.0.3693.1',
+ '74.0.3693.0',
+ '73.0.3683.18',
+ '72.0.3626.90',
+ '74.0.3692.1',
+ '74.0.3692.0',
+ '73.0.3683.17',
+ '72.0.3626.89',
+ '74.0.3687.3',
+ '74.0.3691.1',
+ '74.0.3691.0',
+ '73.0.3683.16',
+ '72.0.3626.88',
+ '72.0.3626.87',
+ '73.0.3683.15',
+ '74.0.3690.1',
+ '74.0.3690.0',
+ '73.0.3683.14',
+ '72.0.3626.86',
+ '73.0.3683.13',
+ '73.0.3683.12',
+ '74.0.3689.1',
+ '74.0.3689.0',
+ '73.0.3683.11',
+ '72.0.3626.85',
+ '73.0.3683.10',
+ '72.0.3626.84',
+ '73.0.3683.9',
+ '74.0.3688.1',
+ '74.0.3688.0',
+ '73.0.3683.8',
+ '72.0.3626.83',
+ '74.0.3687.2',
+ '74.0.3687.1',
+ '74.0.3687.0',
+ '73.0.3683.7',
+ '72.0.3626.82',
+ '74.0.3686.4',
+ '72.0.3626.81',
+ '74.0.3686.3',
+ '74.0.3686.2',
+ '74.0.3686.1',
+ '74.0.3686.0',
+ '73.0.3683.6',
+ '72.0.3626.80',
+ '74.0.3685.1',
+ '74.0.3685.0',
+ '73.0.3683.5',
+ '72.0.3626.79',
+ '74.0.3684.1',
+ '74.0.3684.0',
+ '73.0.3683.4',
+ '72.0.3626.78',
+ '72.0.3626.77',
+ '73.0.3683.3',
+ '73.0.3683.2',
+ '72.0.3626.76',
+ '73.0.3683.1',
+ '73.0.3683.0',
+ '72.0.3626.75',
+ '71.0.3578.141',
+ '73.0.3682.1',
+ '73.0.3682.0',
+ '72.0.3626.74',
+ '71.0.3578.140',
+ '73.0.3681.4',
+ '73.0.3681.3',
+ '73.0.3681.2',
+ '73.0.3681.1',
+ '73.0.3681.0',
+ '72.0.3626.73',
+ '71.0.3578.139',
+ '72.0.3626.72',
+ '72.0.3626.71',
+ '73.0.3680.1',
+ '73.0.3680.0',
+ '72.0.3626.70',
+ '71.0.3578.138',
+ '73.0.3678.2',
+ '73.0.3679.1',
+ '73.0.3679.0',
+ '72.0.3626.69',
+ '71.0.3578.137',
+ '73.0.3678.1',
+ '73.0.3678.0',
+ '71.0.3578.136',
+ '73.0.3677.1',
+ '73.0.3677.0',
+ '72.0.3626.68',
+ '72.0.3626.67',
+ '71.0.3578.135',
+ '73.0.3676.1',
+ '73.0.3676.0',
+ '73.0.3674.2',
+ '72.0.3626.66',
+ '71.0.3578.134',
+ '73.0.3674.1',
+ '73.0.3674.0',
+ '72.0.3626.65',
+ '71.0.3578.133',
+ '73.0.3673.2',
+ '73.0.3673.1',
+ '73.0.3673.0',
+ '72.0.3626.64',
+ '71.0.3578.132',
+ '72.0.3626.63',
+ '72.0.3626.62',
+ '72.0.3626.61',
+ '72.0.3626.60',
+ '73.0.3672.1',
+ '73.0.3672.0',
+ '72.0.3626.59',
+ '71.0.3578.131',
+ '73.0.3671.3',
+ '73.0.3671.2',
+ '73.0.3671.1',
+ '73.0.3671.0',
+ '72.0.3626.58',
+ '71.0.3578.130',
+ '73.0.3670.1',
+ '73.0.3670.0',
+ '72.0.3626.57',
+ '71.0.3578.129',
+ '73.0.3669.1',
+ '73.0.3669.0',
+ '72.0.3626.56',
+ '71.0.3578.128',
+ '73.0.3668.2',
+ '73.0.3668.1',
+ '73.0.3668.0',
+ '72.0.3626.55',
+ '71.0.3578.127',
+ '73.0.3667.2',
+ '73.0.3667.1',
+ '73.0.3667.0',
+ '72.0.3626.54',
+ '71.0.3578.126',
+ '73.0.3666.1',
+ '73.0.3666.0',
+ '72.0.3626.53',
+ '71.0.3578.125',
+ '73.0.3665.4',
+ '73.0.3665.3',
+ '72.0.3626.52',
+ '73.0.3665.2',
+ '73.0.3664.4',
+ '73.0.3665.1',
+ '73.0.3665.0',
+ '72.0.3626.51',
+ '71.0.3578.124',
+ '72.0.3626.50',
+ '73.0.3664.3',
+ '73.0.3664.2',
+ '73.0.3664.1',
+ '73.0.3664.0',
+ '73.0.3663.2',
+ '72.0.3626.49',
+ '71.0.3578.123',
+ '73.0.3663.1',
+ '73.0.3663.0',
+ '72.0.3626.48',
+ '71.0.3578.122',
+ '73.0.3662.1',
+ '73.0.3662.0',
+ '72.0.3626.47',
+ '71.0.3578.121',
+ '73.0.3661.1',
+ '72.0.3626.46',
+ '73.0.3661.0',
+ '72.0.3626.45',
+ '71.0.3578.120',
+ '73.0.3660.2',
+ '73.0.3660.1',
+ '73.0.3660.0',
+ '72.0.3626.44',
+ '71.0.3578.119',
+ '73.0.3659.1',
+ '73.0.3659.0',
+ '72.0.3626.43',
+ '71.0.3578.118',
+ '73.0.3658.1',
+ '73.0.3658.0',
+ '72.0.3626.42',
+ '71.0.3578.117',
+ '73.0.3657.1',
+ '73.0.3657.0',
+ '72.0.3626.41',
+ '71.0.3578.116',
+ '73.0.3656.1',
+ '73.0.3656.0',
+ '72.0.3626.40',
+ '71.0.3578.115',
+ '73.0.3655.1',
+ '73.0.3655.0',
+ '72.0.3626.39',
+ '71.0.3578.114',
+ '73.0.3654.1',
+ '73.0.3654.0',
+ '72.0.3626.38',
+ '71.0.3578.113',
+ '73.0.3653.1',
+ '73.0.3653.0',
+ '72.0.3626.37',
+ '71.0.3578.112',
+ '73.0.3652.1',
+ '73.0.3652.0',
+ '72.0.3626.36',
+ '71.0.3578.111',
+ '73.0.3651.1',
+ '73.0.3651.0',
+ '72.0.3626.35',
+ '71.0.3578.110',
+ '73.0.3650.1',
+ '73.0.3650.0',
+ '72.0.3626.34',
+ '71.0.3578.109',
+ '73.0.3649.1',
+ '73.0.3649.0',
+ '72.0.3626.33',
+ '71.0.3578.108',
+ '73.0.3648.2',
+ '73.0.3648.1',
+ '73.0.3648.0',
+ '72.0.3626.32',
+ '71.0.3578.107',
+ '73.0.3647.2',
+ '73.0.3647.1',
+ '73.0.3647.0',
+ '72.0.3626.31',
+ '71.0.3578.106',
+ '73.0.3635.3',
+ '73.0.3646.2',
+ '73.0.3646.1',
+ '73.0.3646.0',
+ '72.0.3626.30',
+ '71.0.3578.105',
+ '72.0.3626.29',
+ '73.0.3645.2',
+ '73.0.3645.1',
+ '73.0.3645.0',
+ '72.0.3626.28',
+ '71.0.3578.104',
+ '72.0.3626.27',
+ '72.0.3626.26',
+ '72.0.3626.25',
+ '72.0.3626.24',
+ '73.0.3644.0',
+ '73.0.3643.2',
+ '72.0.3626.23',
+ '71.0.3578.103',
+ '73.0.3643.1',
+ '73.0.3643.0',
+ '72.0.3626.22',
+ '71.0.3578.102',
+ '73.0.3642.1',
+ '73.0.3642.0',
+ '72.0.3626.21',
+ '71.0.3578.101',
+ '73.0.3641.1',
+ '73.0.3641.0',
+ '72.0.3626.20',
+ '71.0.3578.100',
+ '72.0.3626.19',
+ '73.0.3640.1',
+ '73.0.3640.0',
+ '72.0.3626.18',
+ '73.0.3639.1',
+ '71.0.3578.99',
+ '73.0.3639.0',
+ '72.0.3626.17',
+ '73.0.3638.2',
+ '72.0.3626.16',
+ '73.0.3638.1',
+ '73.0.3638.0',
+ '72.0.3626.15',
+ '71.0.3578.98',
+ '73.0.3635.2',
+ '71.0.3578.97',
+ '73.0.3637.1',
+ '73.0.3637.0',
+ '72.0.3626.14',
+ '71.0.3578.96',
+ '71.0.3578.95',
+ '72.0.3626.13',
+ '71.0.3578.94',
+ '73.0.3636.2',
+ '71.0.3578.93',
+ '73.0.3636.1',
+ '73.0.3636.0',
+ '72.0.3626.12',
+ '71.0.3578.92',
+ '73.0.3635.1',
+ '73.0.3635.0',
+ '72.0.3626.11',
+ '71.0.3578.91',
+ '73.0.3634.2',
+ '73.0.3634.1',
+ '73.0.3634.0',
+ '72.0.3626.10',
+ '71.0.3578.90',
+ '71.0.3578.89',
+ '73.0.3633.2',
+ '73.0.3633.1',
+ '73.0.3633.0',
+ '72.0.3610.4',
+ '72.0.3626.9',
+ '71.0.3578.88',
+ '73.0.3632.5',
+ '73.0.3632.4',
+ '73.0.3632.3',
+ '73.0.3632.2',
+ '73.0.3632.1',
+ '73.0.3632.0',
+ '72.0.3626.8',
+ '71.0.3578.87',
+ '73.0.3631.2',
+ '73.0.3631.1',
+ '73.0.3631.0',
+ '72.0.3626.7',
+ '71.0.3578.86',
+ '72.0.3626.6',
+ '73.0.3630.1',
+ '73.0.3630.0',
+ '72.0.3626.5',
+ '71.0.3578.85',
+ '72.0.3626.4',
+ '73.0.3628.3',
+ '73.0.3628.2',
+ '73.0.3629.1',
+ '73.0.3629.0',
+ '72.0.3626.3',
+ '71.0.3578.84',
+ '73.0.3628.1',
+ '73.0.3628.0',
+ '71.0.3578.83',
+ '73.0.3627.1',
+ '73.0.3627.0',
+ '72.0.3626.2',
+ '71.0.3578.82',
+ '71.0.3578.81',
+ '71.0.3578.80',
+ '72.0.3626.1',
+ '72.0.3626.0',
+ '71.0.3578.79',
+ '70.0.3538.124',
+ '71.0.3578.78',
+ '72.0.3623.4',
+ '72.0.3625.2',
+ '72.0.3625.1',
+ '72.0.3625.0',
+ '71.0.3578.77',
+ '70.0.3538.123',
+ '72.0.3624.4',
+ '72.0.3624.3',
+ '72.0.3624.2',
+ '71.0.3578.76',
+ '72.0.3624.1',
+ '72.0.3624.0',
+ '72.0.3623.3',
+ '71.0.3578.75',
+ '70.0.3538.122',
+ '71.0.3578.74',
+ '72.0.3623.2',
+ '72.0.3610.3',
+ '72.0.3623.1',
+ '72.0.3623.0',
+ '72.0.3622.3',
+ '72.0.3622.2',
+ '71.0.3578.73',
+ '70.0.3538.121',
+ '72.0.3622.1',
+ '72.0.3622.0',
+ '71.0.3578.72',
+ '70.0.3538.120',
+ '72.0.3621.1',
+ '72.0.3621.0',
+ '71.0.3578.71',
+ '70.0.3538.119',
+ '72.0.3620.1',
+ '72.0.3620.0',
+ '71.0.3578.70',
+ '70.0.3538.118',
+ '71.0.3578.69',
+ '72.0.3619.1',
+ '72.0.3619.0',
+ '71.0.3578.68',
+ '70.0.3538.117',
+ '71.0.3578.67',
+ '72.0.3618.1',
+ '72.0.3618.0',
+ '71.0.3578.66',
+ '70.0.3538.116',
+ '72.0.3617.1',
+ '72.0.3617.0',
+ '71.0.3578.65',
+ '70.0.3538.115',
+ '72.0.3602.3',
+ '71.0.3578.64',
+ '72.0.3616.1',
+ '72.0.3616.0',
+ '71.0.3578.63',
+ '70.0.3538.114',
+ '71.0.3578.62',
+ '72.0.3615.1',
+ '72.0.3615.0',
+ '71.0.3578.61',
+ '70.0.3538.113',
+ '72.0.3614.1',
+ '72.0.3614.0',
+ '71.0.3578.60',
+ '70.0.3538.112',
+ '72.0.3613.1',
+ '72.0.3613.0',
+ '71.0.3578.59',
+ '70.0.3538.111',
+ '72.0.3612.2',
+ '72.0.3612.1',
+ '72.0.3612.0',
+ '70.0.3538.110',
+ '71.0.3578.58',
+ '70.0.3538.109',
+ '72.0.3611.2',
+ '72.0.3611.1',
+ '72.0.3611.0',
+ '71.0.3578.57',
+ '70.0.3538.108',
+ '72.0.3610.2',
+ '71.0.3578.56',
+ '71.0.3578.55',
+ '72.0.3610.1',
+ '72.0.3610.0',
+ '71.0.3578.54',
+ '70.0.3538.107',
+ '71.0.3578.53',
+ '72.0.3609.3',
+ '71.0.3578.52',
+ '72.0.3609.2',
+ '71.0.3578.51',
+ '72.0.3608.5',
+ '72.0.3609.1',
+ '72.0.3609.0',
+ '71.0.3578.50',
+ '70.0.3538.106',
+ '72.0.3608.4',
+ '72.0.3608.3',
+ '72.0.3608.2',
+ '71.0.3578.49',
+ '72.0.3608.1',
+ '72.0.3608.0',
+ '70.0.3538.105',
+ '71.0.3578.48',
+ '72.0.3607.1',
+ '72.0.3607.0',
+ '71.0.3578.47',
+ '70.0.3538.104',
+ '72.0.3606.2',
+ '72.0.3606.1',
+ '72.0.3606.0',
+ '71.0.3578.46',
+ '70.0.3538.103',
+ '70.0.3538.102',
+ '72.0.3605.3',
+ '72.0.3605.2',
+ '72.0.3605.1',
+ '72.0.3605.0',
+ '71.0.3578.45',
+ '70.0.3538.101',
+ '71.0.3578.44',
+ '71.0.3578.43',
+ '70.0.3538.100',
+ '70.0.3538.99',
+ '71.0.3578.42',
+ '72.0.3604.1',
+ '72.0.3604.0',
+ '71.0.3578.41',
+ '70.0.3538.98',
+ '71.0.3578.40',
+ '72.0.3603.2',
+ '72.0.3603.1',
+ '72.0.3603.0',
+ '71.0.3578.39',
+ '70.0.3538.97',
+ '72.0.3602.2',
+ '71.0.3578.38',
+ '71.0.3578.37',
+ '72.0.3602.1',
+ '72.0.3602.0',
+ '71.0.3578.36',
+ '70.0.3538.96',
+ '72.0.3601.1',
+ '72.0.3601.0',
+ '71.0.3578.35',
+ '70.0.3538.95',
+ '72.0.3600.1',
+ '72.0.3600.0',
+ '71.0.3578.34',
+ '70.0.3538.94',
+ '72.0.3599.3',
+ '72.0.3599.2',
+ '72.0.3599.1',
+ '72.0.3599.0',
+ '71.0.3578.33',
+ '70.0.3538.93',
+ '72.0.3598.1',
+ '72.0.3598.0',
+ '71.0.3578.32',
+ '70.0.3538.87',
+ '72.0.3597.1',
+ '72.0.3597.0',
+ '72.0.3596.2',
+ '71.0.3578.31',
+ '70.0.3538.86',
+ '71.0.3578.30',
+ '71.0.3578.29',
+ '72.0.3596.1',
+ '72.0.3596.0',
+ '71.0.3578.28',
+ '70.0.3538.85',
+ '72.0.3595.2',
+ '72.0.3591.3',
+ '72.0.3595.1',
+ '72.0.3595.0',
+ '71.0.3578.27',
+ '70.0.3538.84',
+ '72.0.3594.1',
+ '72.0.3594.0',
+ '71.0.3578.26',
+ '70.0.3538.83',
+ '72.0.3593.2',
+ '72.0.3593.1',
+ '72.0.3593.0',
+ '71.0.3578.25',
+ '70.0.3538.82',
+ '72.0.3589.3',
+ '72.0.3592.2',
+ '72.0.3592.1',
+ '72.0.3592.0',
+ '71.0.3578.24',
+ '72.0.3589.2',
+ '70.0.3538.81',
+ '70.0.3538.80',
+ '72.0.3591.2',
+ '72.0.3591.1',
+ '72.0.3591.0',
+ '71.0.3578.23',
+ '70.0.3538.79',
+ '71.0.3578.22',
+ '72.0.3590.1',
+ '72.0.3590.0',
+ '71.0.3578.21',
+ '70.0.3538.78',
+ '70.0.3538.77',
+ '72.0.3589.1',
+ '72.0.3589.0',
+ '71.0.3578.20',
+ '70.0.3538.76',
+ '71.0.3578.19',
+ '70.0.3538.75',
+ '72.0.3588.1',
+ '72.0.3588.0',
+ '71.0.3578.18',
+ '70.0.3538.74',
+ '72.0.3586.2',
+ '72.0.3587.0',
+ '71.0.3578.17',
+ '70.0.3538.73',
+ '72.0.3586.1',
+ '72.0.3586.0',
+ '71.0.3578.16',
+ '70.0.3538.72',
+ '72.0.3585.1',
+ '72.0.3585.0',
+ '71.0.3578.15',
+ '70.0.3538.71',
+ '71.0.3578.14',
+ '72.0.3584.1',
+ '72.0.3584.0',
+ '71.0.3578.13',
+ '70.0.3538.70',
+ '72.0.3583.2',
+ '71.0.3578.12',
+ '72.0.3583.1',
+ '72.0.3583.0',
+ '71.0.3578.11',
+ '70.0.3538.69',
+ '71.0.3578.10',
+ '72.0.3582.0',
+ '72.0.3581.4',
+ '71.0.3578.9',
+ '70.0.3538.67',
+ '72.0.3581.3',
+ '72.0.3581.2',
+ '72.0.3581.1',
+ '72.0.3581.0',
+ '71.0.3578.8',
+ '70.0.3538.66',
+ '72.0.3580.1',
+ '72.0.3580.0',
+ '71.0.3578.7',
+ '70.0.3538.65',
+ '71.0.3578.6',
+ '72.0.3579.1',
+ '72.0.3579.0',
+ '71.0.3578.5',
+ '70.0.3538.64',
+ '71.0.3578.4',
+ '71.0.3578.3',
+ '71.0.3578.2',
+ '71.0.3578.1',
+ '71.0.3578.0',
+ '70.0.3538.63',
+ '69.0.3497.128',
+ '70.0.3538.62',
+ '70.0.3538.61',
+ '70.0.3538.60',
+ '70.0.3538.59',
+ '71.0.3577.1',
+ '71.0.3577.0',
+ '70.0.3538.58',
+ '69.0.3497.127',
+ '71.0.3576.2',
+ '71.0.3576.1',
+ '71.0.3576.0',
+ '70.0.3538.57',
+ '70.0.3538.56',
+ '71.0.3575.2',
+ '70.0.3538.55',
+ '69.0.3497.126',
+ '70.0.3538.54',
+ '71.0.3575.1',
+ '71.0.3575.0',
+ '71.0.3574.1',
+ '71.0.3574.0',
+ '70.0.3538.53',
+ '69.0.3497.125',
+ '70.0.3538.52',
+ '71.0.3573.1',
+ '71.0.3573.0',
+ '70.0.3538.51',
+ '69.0.3497.124',
+ '71.0.3572.1',
+ '71.0.3572.0',
+ '70.0.3538.50',
+ '69.0.3497.123',
+ '71.0.3571.2',
+ '70.0.3538.49',
+ '69.0.3497.122',
+ '71.0.3571.1',
+ '71.0.3571.0',
+ '70.0.3538.48',
+ '69.0.3497.121',
+ '71.0.3570.1',
+ '71.0.3570.0',
+ '70.0.3538.47',
+ '69.0.3497.120',
+ '71.0.3568.2',
+ '71.0.3569.1',
+ '71.0.3569.0',
+ '70.0.3538.46',
+ '69.0.3497.119',
+ '70.0.3538.45',
+ '71.0.3568.1',
+ '71.0.3568.0',
+ '70.0.3538.44',
+ '69.0.3497.118',
+ '70.0.3538.43',
+ '70.0.3538.42',
+ '71.0.3567.1',
+ '71.0.3567.0',
+ '70.0.3538.41',
+ '69.0.3497.117',
+ '71.0.3566.1',
+ '71.0.3566.0',
+ '70.0.3538.40',
+ '69.0.3497.116',
+ '71.0.3565.1',
+ '71.0.3565.0',
+ '70.0.3538.39',
+ '69.0.3497.115',
+ '71.0.3564.1',
+ '71.0.3564.0',
+ '70.0.3538.38',
+ '69.0.3497.114',
+ '71.0.3563.0',
+ '71.0.3562.2',
+ '70.0.3538.37',
+ '69.0.3497.113',
+ '70.0.3538.36',
+ '70.0.3538.35',
+ '71.0.3562.1',
+ '71.0.3562.0',
+ '70.0.3538.34',
+ '69.0.3497.112',
+ '70.0.3538.33',
+ '71.0.3561.1',
+ '71.0.3561.0',
+ '70.0.3538.32',
+ '69.0.3497.111',
+ '71.0.3559.6',
+ '71.0.3560.1',
+ '71.0.3560.0',
+ '71.0.3559.5',
+ '71.0.3559.4',
+ '70.0.3538.31',
+ '69.0.3497.110',
+ '71.0.3559.3',
+ '70.0.3538.30',
+ '69.0.3497.109',
+ '71.0.3559.2',
+ '71.0.3559.1',
+ '71.0.3559.0',
+ '70.0.3538.29',
+ '69.0.3497.108',
+ '71.0.3558.2',
+ '71.0.3558.1',
+ '71.0.3558.0',
+ '70.0.3538.28',
+ '69.0.3497.107',
+ '71.0.3557.2',
+ '71.0.3557.1',
+ '71.0.3557.0',
+ '70.0.3538.27',
+ '69.0.3497.106',
+ '71.0.3554.4',
+ '70.0.3538.26',
+ '71.0.3556.1',
+ '71.0.3556.0',
+ '70.0.3538.25',
+ '71.0.3554.3',
+ '69.0.3497.105',
+ '71.0.3554.2',
+ '70.0.3538.24',
+ '69.0.3497.104',
+ '71.0.3555.2',
+ '70.0.3538.23',
+ '71.0.3555.1',
+ '71.0.3555.0',
+ '70.0.3538.22',
+ '69.0.3497.103',
+ '71.0.3554.1',
+ '71.0.3554.0',
+ '70.0.3538.21',
+ '69.0.3497.102',
+ '71.0.3553.3',
+ '70.0.3538.20',
+ '69.0.3497.101',
+ '71.0.3553.2',
+ '69.0.3497.100',
+ '71.0.3553.1',
+ '71.0.3553.0',
+ '70.0.3538.19',
+ '69.0.3497.99',
+ '69.0.3497.98',
+ '69.0.3497.97',
+ '71.0.3552.6',
+ '71.0.3552.5',
+ '71.0.3552.4',
+ '71.0.3552.3',
+ '71.0.3552.2',
+ '71.0.3552.1',
+ '71.0.3552.0',
+ '70.0.3538.18',
+ '69.0.3497.96',
+ '71.0.3551.3',
+ '71.0.3551.2',
+ '71.0.3551.1',
+ '71.0.3551.0',
+ '70.0.3538.17',
+ '69.0.3497.95',
+ '71.0.3550.3',
+ '71.0.3550.2',
+ '71.0.3550.1',
+ '71.0.3550.0',
+ '70.0.3538.16',
+ '69.0.3497.94',
+ '71.0.3549.1',
+ '71.0.3549.0',
+ '70.0.3538.15',
+ '69.0.3497.93',
+ '69.0.3497.92',
+ '71.0.3548.1',
+ '71.0.3548.0',
+ '70.0.3538.14',
+ '69.0.3497.91',
+ '71.0.3547.1',
+ '71.0.3547.0',
+ '70.0.3538.13',
+ '69.0.3497.90',
+ '71.0.3546.2',
+ '69.0.3497.89',
+ '71.0.3546.1',
+ '71.0.3546.0',
+ '70.0.3538.12',
+ '69.0.3497.88',
+ '71.0.3545.4',
+ '71.0.3545.3',
+ '71.0.3545.2',
+ '71.0.3545.1',
+ '71.0.3545.0',
+ '70.0.3538.11',
+ '69.0.3497.87',
+ '71.0.3544.5',
+ '71.0.3544.4',
+ '71.0.3544.3',
+ '71.0.3544.2',
+ '71.0.3544.1',
+ '71.0.3544.0',
+ '69.0.3497.86',
+ '70.0.3538.10',
+ '69.0.3497.85',
+ '70.0.3538.9',
+ '69.0.3497.84',
+ '71.0.3543.4',
+ '70.0.3538.8',
+ '71.0.3543.3',
+ '71.0.3543.2',
+ '71.0.3543.1',
+ '71.0.3543.0',
+ '70.0.3538.7',
+ '69.0.3497.83',
+ '71.0.3542.2',
+ '71.0.3542.1',
+ '71.0.3542.0',
+ '70.0.3538.6',
+ '69.0.3497.82',
+ '69.0.3497.81',
+ '71.0.3541.1',
+ '71.0.3541.0',
+ '70.0.3538.5',
+ '69.0.3497.80',
+ '71.0.3540.1',
+ '71.0.3540.0',
+ '70.0.3538.4',
+ '69.0.3497.79',
+ '70.0.3538.3',
+ '71.0.3539.1',
+ '71.0.3539.0',
+ '69.0.3497.78',
+ '68.0.3440.134',
+ '69.0.3497.77',
+ '70.0.3538.2',
+ '70.0.3538.1',
+ '70.0.3538.0',
+ '69.0.3497.76',
+ '68.0.3440.133',
+ '69.0.3497.75',
+ '70.0.3537.2',
+ '70.0.3537.1',
+ '70.0.3537.0',
+ '69.0.3497.74',
+ '68.0.3440.132',
+ '70.0.3536.0',
+ '70.0.3535.5',
+ '70.0.3535.4',
+ '70.0.3535.3',
+ '69.0.3497.73',
+ '68.0.3440.131',
+ '70.0.3532.8',
+ '70.0.3532.7',
+ '69.0.3497.72',
+ '69.0.3497.71',
+ '70.0.3535.2',
+ '70.0.3535.1',
+ '70.0.3535.0',
+ '69.0.3497.70',
+ '68.0.3440.130',
+ '69.0.3497.69',
+ '68.0.3440.129',
+ '70.0.3534.4',
+ '70.0.3534.3',
+ '70.0.3534.2',
+ '70.0.3534.1',
+ '70.0.3534.0',
+ '69.0.3497.68',
+ '68.0.3440.128',
+ '70.0.3533.2',
+ '70.0.3533.1',
+ '70.0.3533.0',
+ '69.0.3497.67',
+ '68.0.3440.127',
+ '70.0.3532.6',
+ '70.0.3532.5',
+ '70.0.3532.4',
+ '69.0.3497.66',
+ '68.0.3440.126',
+ '70.0.3532.3',
+ '70.0.3532.2',
+ '70.0.3532.1',
+ '69.0.3497.60',
+ '69.0.3497.65',
+ '69.0.3497.64',
+ '70.0.3532.0',
+ '70.0.3531.0',
+ '70.0.3530.4',
+ '70.0.3530.3',
+ '70.0.3530.2',
+ '69.0.3497.58',
+ '68.0.3440.125',
+ '69.0.3497.57',
+ '69.0.3497.56',
+ '69.0.3497.55',
+ '69.0.3497.54',
+ '70.0.3530.1',
+ '70.0.3530.0',
+ '69.0.3497.53',
+ '68.0.3440.124',
+ '69.0.3497.52',
+ '70.0.3529.3',
+ '70.0.3529.2',
+ '70.0.3529.1',
+ '70.0.3529.0',
+ '69.0.3497.51',
+ '70.0.3528.4',
+ '68.0.3440.123',
+ '70.0.3528.3',
+ '70.0.3528.2',
+ '70.0.3528.1',
+ '70.0.3528.0',
+ '69.0.3497.50',
+ '68.0.3440.122',
+ '70.0.3527.1',
+ '70.0.3527.0',
+ '69.0.3497.49',
+ '68.0.3440.121',
+ '70.0.3526.1',
+ '70.0.3526.0',
+ '68.0.3440.120',
+ '69.0.3497.48',
+ '69.0.3497.47',
+ '68.0.3440.119',
+ '68.0.3440.118',
+ '70.0.3525.5',
+ '70.0.3525.4',
+ '70.0.3525.3',
+ '68.0.3440.117',
+ '69.0.3497.46',
+ '70.0.3525.2',
+ '70.0.3525.1',
+ '70.0.3525.0',
+ '69.0.3497.45',
+ '68.0.3440.116',
+ '70.0.3524.4',
+ '70.0.3524.3',
+ '69.0.3497.44',
+ '70.0.3524.2',
+ '70.0.3524.1',
+ '70.0.3524.0',
+ '70.0.3523.2',
+ '69.0.3497.43',
+ '68.0.3440.115',
+ '70.0.3505.9',
+ '69.0.3497.42',
+ '70.0.3505.8',
+ '70.0.3523.1',
+ '70.0.3523.0',
+ '69.0.3497.41',
+ '68.0.3440.114',
+ '70.0.3505.7',
+ '69.0.3497.40',
+ '70.0.3522.1',
+ '70.0.3522.0',
+ '70.0.3521.2',
+ '69.0.3497.39',
+ '68.0.3440.113',
+ '70.0.3505.6',
+ '70.0.3521.1',
+ '70.0.3521.0',
+ '69.0.3497.38',
+ '68.0.3440.112',
+ '70.0.3520.1',
+ '70.0.3520.0',
+ '69.0.3497.37',
+ '68.0.3440.111',
+ '70.0.3519.3',
+ '70.0.3519.2',
+ '70.0.3519.1',
+ '70.0.3519.0',
+ '69.0.3497.36',
+ '68.0.3440.110',
+ '70.0.3518.1',
+ '70.0.3518.0',
+ '69.0.3497.35',
+ '69.0.3497.34',
+ '68.0.3440.109',
+ '70.0.3517.1',
+ '70.0.3517.0',
+ '69.0.3497.33',
+ '68.0.3440.108',
+ '69.0.3497.32',
+ '70.0.3516.3',
+ '70.0.3516.2',
+ '70.0.3516.1',
+ '70.0.3516.0',
+ '69.0.3497.31',
+ '68.0.3440.107',
+ '70.0.3515.4',
+ '68.0.3440.106',
+ '70.0.3515.3',
+ '70.0.3515.2',
+ '70.0.3515.1',
+ '70.0.3515.0',
+ '69.0.3497.30',
+ '68.0.3440.105',
+ '68.0.3440.104',
+ '70.0.3514.2',
+ '70.0.3514.1',
+ '70.0.3514.0',
+ '69.0.3497.29',
+ '68.0.3440.103',
+ '70.0.3513.1',
+ '70.0.3513.0',
+ '69.0.3497.28',
+ )
+ return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
+
+
std_headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
+ 'User-Agent': random_user_agent(),
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
}
+USER_AGENTS = {
+ 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
+}
+
+
NO_DEFAULT = object()
ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December']
+MONTH_NAMES = {
+ 'en': ENGLISH_MONTH_NAMES,
+ 'fr': [
+ 'janvier', 'fƩvrier', 'mars', 'avril', 'mai', 'juin',
+ 'juillet', 'aoƻt', 'septembre', 'octobre', 'novembre', 'dƩcembre'],
+}
+
+KNOWN_EXTENSIONS = (
+ 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
+ 'flv', 'f4v', 'f4a', 'f4b',
+ 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
+ 'mkv', 'mka', 'mk3d',
+ 'avi', 'divx',
+ 'mov',
+ 'asf', 'wmv', 'wma',
+ '3gp', '3g2',
+ 'mp3',
+ 'flac',
+ 'ape',
+ 'wav',
+ 'f4f', 'f4m', 'm3u8', 'smil')
+
+# needed for sanitizing filenames in restricted mode
+ACCENT_CHARS = dict(zip('ĆĆĆĆĆĆ
ĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆÅĆÅĆĆĆĆÅ°ĆĆĆĆ Ć”Ć¢Ć£Ć¤Ć„Ć¦Ć§ĆØĆ©ĆŖƫƬĆĆ®ĆÆĆ°Ć±Ć²Ć³Ć“ĆµĆ¶ÅĆøÅĆ¹ĆŗĆ»Ć¼Å±Ć½Ć¾Ćæ',
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
+
+DATE_FORMATS = (
+ '%d %B %Y',
+ '%d %b %Y',
+ '%B %d %Y',
+ '%B %dst %Y',
+ '%B %dnd %Y',
+ '%B %drd %Y',
+ '%B %dth %Y',
+ '%b %d %Y',
+ '%b %dst %Y',
+ '%b %dnd %Y',
+ '%b %drd %Y',
+ '%b %dth %Y',
+ '%b %dst %Y %I:%M',
+ '%b %dnd %Y %I:%M',
+ '%b %drd %Y %I:%M',
+ '%b %dth %Y %I:%M',
+ '%Y %m %d',
+ '%Y-%m-%d',
+ '%Y/%m/%d',
+ '%Y/%m/%d %H:%M',
+ '%Y/%m/%d %H:%M:%S',
+ '%Y-%m-%d %H:%M',
+ '%Y-%m-%d %H:%M:%S',
+ '%Y-%m-%d %H:%M:%S.%f',
+ '%d.%m.%Y %H:%M',
+ '%d.%m.%Y %H.%M',
+ '%Y-%m-%dT%H:%M:%SZ',
+ '%Y-%m-%dT%H:%M:%S.%fZ',
+ '%Y-%m-%dT%H:%M:%S.%f0Z',
+ '%Y-%m-%dT%H:%M:%S',
+ '%Y-%m-%dT%H:%M:%S.%f',
+ '%Y-%m-%dT%H:%M',
+ '%b %d %Y at %H:%M',
+ '%b %d %Y at %H:%M:%S',
+ '%B %d %Y at %H:%M',
+ '%B %d %Y at %H:%M:%S',
+)
+
+DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_DAY_FIRST.extend([
+ '%d-%m-%Y',
+ '%d.%m.%Y',
+ '%d.%m.%y',
+ '%d/%m/%Y',
+ '%d/%m/%y',
+ '%d/%m/%Y %H:%M:%S',
+])
+
+DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_MONTH_FIRST.extend([
+ '%m-%d-%Y',
+ '%m.%d.%Y',
+ '%m/%d/%Y',
+ '%m/%d/%y',
+ '%m/%d/%Y %H:%M:%S',
+])
+
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+
def preferredencoding():
"""Get preferred encoding.
if sys.version_info >= (2, 7):
- def find_xpath_attr(node, xpath, key, val):
+ def find_xpath_attr(node, xpath, key, val=None):
""" Find the xpath xpath[@key=val] """
- assert re.match(r'^[a-zA-Z-]+$', key)
- assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
- expr = xpath + "[@%s='%s']" % (key, val)
+ assert re.match(r'^[a-zA-Z_-]+$', key)
+ expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
return node.find(expr)
else:
- def find_xpath_attr(node, xpath, key, val):
- # Here comes the crazy part: In 2.6, if the xpath is a unicode,
- # .//node does not match if a node is a direct child of . !
- if isinstance(xpath, compat_str):
- xpath = xpath.encode('ascii')
-
- for f in node.findall(xpath):
- if f.attrib.get(key) == val:
+ def find_xpath_attr(node, xpath, key, val=None):
+ for f in node.findall(compat_xpath(xpath)):
+ if key not in f.attrib:
+ continue
+ if val is None or f.attrib.get(key) == val:
return f
return None
return '/'.join(replaced)
-def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
- if sys.version_info < (2, 7): # Crazy 2.6
- xpath = xpath.encode('ascii')
+def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ def _find_xpath(xpath):
+ return node.find(compat_xpath(xpath))
+
+ if isinstance(xpath, (str, compat_str)):
+ n = _find_xpath(xpath)
+ else:
+ for xp in xpath:
+ n = _find_xpath(xp)
+ if n is not None:
+ break
- n = node.find(xpath)
- if n is None or n.text is None:
+ if n is None:
if default is not NO_DEFAULT:
return default
elif fatal:
raise ExtractorError('Could not find XML element %s' % name)
else:
return None
+ return n
+
+
+def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ n = xpath_element(node, xpath, name, fatal=fatal, default=default)
+ if n is None or n == default:
+ return n
+ if n.text is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element\'s text %s' % name)
+ else:
+ return None
return n.text
+def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
+ n = find_xpath_attr(node, xpath, key)
+ if n is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = '%s[@%s]' % (xpath, key) if name is None else name
+ raise ExtractorError('Could not find XML attribute %s' % name)
+ else:
+ return None
+ return n.attrib[key]
+
+
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""
- return get_element_by_attribute("id", id, html)
+ return get_element_by_attribute('id', id, html)
+
+
+def get_element_by_class(class_name, html):
+ """Return the content of the first tag with the specified class in the passed HTML document"""
+ retval = get_elements_by_class(class_name, html)
+ return retval[0] if retval else None
+
+
+def get_element_by_attribute(attribute, value, html, escape_value=True):
+ retval = get_elements_by_attribute(attribute, value, html, escape_value)
+ return retval[0] if retval else None
+
+def get_elements_by_class(class_name, html):
+ """Return the content of all tags with the specified class in the passed HTML document as a list"""
+ return get_elements_by_attribute(
+ 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
+ html, escape_value=False)
-def get_element_by_attribute(attribute, value, html):
+
+def get_elements_by_attribute(attribute, value, html, escape_value=True):
"""Return the content of the tag with the specified attribute in the passed HTML document"""
- m = re.search(r'''(?xs)
+ value = re.escape(value) if escape_value else value
+
+ retlist = []
+ for m in re.finditer(r'''(?xs)
<([a-zA-Z0-9:._-]+)
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
\s+%s=['"]?%s['"]?
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
\s*>
(?P<content>.*?)
</\1>
- ''' % (re.escape(attribute), re.escape(value)), html)
-
- if not m:
- return None
- res = m.group('content')
-
- if res.startswith('"') or res.startswith("'"):
- res = res[1:-1]
-
- return unescapeHTML(res)
+ ''' % (re.escape(attribute), value), html):
+ res = m.group('content')
+
+ if res.startswith('"') or res.startswith("'"):
+ res = res[1:-1]
+
+ retlist.append(unescapeHTML(res))
+
+ return retlist
+
+
+class HTMLAttributeParser(compat_HTMLParser):
+ """Trivial HTML parser to gather the attributes for a single element"""
+ def __init__(self):
+ self.attrs = {}
+ compat_HTMLParser.__init__(self)
+
+ def handle_starttag(self, tag, attrs):
+ self.attrs = dict(attrs)
+
+
+def extract_attributes(html_element):
+ """Given a string for an HTML element such as
+ <el
+ a="foo" B="bar" c="&98;az" d=boz
+ empty= noval entity="&"
+ sq='"' dq="'"
+ >
+ Decode and return a dictionary of attributes.
+ {
+ 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
+ 'empty': '', 'noval': None, 'entity': '&',
+ 'sq': '"', 'dq': '\''
+ }.
+ NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
+ but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
+ """
+ parser = HTMLAttributeParser()
+ try:
+ parser.feed(html_element)
+ parser.close()
+ # Older Python may throw HTMLParseError in case of malformed HTML
+ except compat_HTMLParseError:
+ pass
+ return parser.attrs
def clean_html(html):
# Newline vs <br />
html = html.replace('\n', ' ')
- html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
- html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
+ html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
+ html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
# Strip html tags
html = re.sub('<.*?>', '', html)
# Replace html entities
def sanitize_filename(s, restricted=False, is_id=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
- Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
+ Set is_id if this is not an arbitrary string, but an ID that should be kept
+ if possible.
"""
def replace_insane(char):
+ if restricted and char in ACCENT_CHARS:
+ return ACCENT_CHARS[char]
if char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
if drive_or_unc:
norm_path.pop(0)
sanitized_path = [
- path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
+ path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
for path_part in norm_path]
if drive_or_unc:
sanitized_path.insert(0, drive_or_unc + os.path.sep)
return os.path.join(*sanitized_path)
+def sanitize_url(url):
+ # Prepend protocol-less URLs with `http:` scheme in order to mitigate
+ # the number of unwanted failures due to missing protocol
+ if url.startswith('//'):
+ return 'http:%s' % url
+ # Fix some common typos seen so far
+ COMMON_TYPOS = (
+ # https://github.com/ytdl-org/youtube-dl/issues/15649
+ (r'^httpss://', r'https://'),
+ # https://bx1.be/lives/direct-tv/
+ (r'^rmtp([es]?)://', r'rtmp\1://'),
+ )
+ for mistake, fixup in COMMON_TYPOS:
+ if re.match(mistake, url):
+ return re.sub(mistake, fixup, url)
+ return url
+
+
+def sanitized_Request(url, *args, **kwargs):
+ return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
+
+
+def expand_path(s):
+ """Expand shell variables and ~"""
+ return os.path.expandvars(compat_expanduser(s))
+
+
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []
return res
-def _htmlentity_transform(entity):
+def _htmlentity_transform(entity_with_semicolon):
"""Transforms an HTML entity to a character."""
+ entity = entity_with_semicolon[:-1]
+
# Known non-numeric HTML entity
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
+ # TODO: HTML5 allows entities without a semicolon. For example,
+ # 'Éric' should be decoded as 'Ćric'.
+ if entity_with_semicolon in compat_html_entities_html5:
+ return compat_html_entities_html5[entity_with_semicolon]
+
mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
numstr = '0%s' % numstr
else:
base = 10
- return compat_chr(int(numstr, base))
+ # See https://github.com/ytdl-org/youtube-dl/issues/7518
+ try:
+ return compat_chr(int(numstr, base))
+ except ValueError:
+ pass
# Unknown entity in name, return its literal representation
- return ('&%s;' % entity)
+ return '&%s;' % entity
def unescapeHTML(s):
assert type(s) == compat_str
return re.sub(
- r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
+ r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
def get_subprocess_encoding():
if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
return s
+ # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+ if sys.platform.startswith('java'):
+ return s
+
return s.encode(get_subprocess_encoding(), 'ignore')
return msg
-class ExtractorError(Exception):
+class YoutubeDLError(Exception):
+ """Base exception for YoutubeDL errors."""
+ pass
+
+
+class ExtractorError(YoutubeDLError):
"""Error during info extraction."""
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
pass
-class DownloadError(Exception):
+class GeoRestrictedError(ExtractorError):
+ """Geographic restriction Error exception.
+
+ This exception may be thrown when a video is not available from your
+ geographic location due to geographic restrictions imposed by a website.
+ """
+ def __init__(self, msg, countries=None):
+ super(GeoRestrictedError, self).__init__(msg, expected=True)
+ self.msg = msg
+ self.countries = countries
+
+
+class DownloadError(YoutubeDLError):
"""Download Error exception.
This exception may be thrown by FileDownloader objects if they are not
self.exc_info = exc_info
-class SameFileError(Exception):
+class SameFileError(YoutubeDLError):
"""Same File exception.
This exception will be thrown by FileDownloader objects if they detect
pass
-class PostProcessingError(Exception):
+class PostProcessingError(YoutubeDLError):
"""Post Processing exception.
This exception may be raised by PostProcessor's .run() method to
"""
def __init__(self, msg):
+ super(PostProcessingError, self).__init__(msg)
self.msg = msg
-class MaxDownloadsReached(Exception):
+class MaxDownloadsReached(YoutubeDLError):
""" --max-downloads limit has been reached. """
pass
-class UnavailableVideoError(Exception):
+class UnavailableVideoError(YoutubeDLError):
"""Unavailable Format exception.
This exception will be thrown when a video is requested
pass
-class ContentTooShortError(Exception):
+class ContentTooShortError(YoutubeDLError):
"""Content Too Short exception.
This exception may be raised by FileDownloader objects when a file they
download is too small for what the server announced first, indicating
the connection was probably interrupted.
"""
- # Both in bytes
- downloaded = None
- expected = None
def __init__(self, downloaded, expected):
+ super(ContentTooShortError, self).__init__(
+ 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
+ )
+ # Both in bytes
self.downloaded = downloaded
self.expected = expected
+class XAttrMetadataError(YoutubeDLError):
+ def __init__(self, code=None, msg='Unknown error'):
+ super(XAttrMetadataError, self).__init__(msg)
+ self.code = code
+ self.msg = msg
+
+ # Parsing code and msg
+ if (self.code in (errno.ENOSPC, errno.EDQUOT)
+ or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+ self.reason = 'NO_SPACE'
+ elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+ self.reason = 'VALUE_TOO_LONG'
+ else:
+ self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(YoutubeDLError):
+ pass
+
+
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
- hc = http_class(*args, **kwargs)
+ # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
+ # expected HTTP responses to meet HTTP/1.0 or later (see also
+ # https://github.com/ytdl-org/youtube-dl/issues/6727)
+ if sys.version_info < (3, 0):
+ kwargs['strict'] = True
+ hc = http_class(*args, **compat_kwargs(kwargs))
source_address = ydl_handler._params.get('source_address')
+
if source_address is not None:
+ # This is to workaround _create_connection() from socket where it will try all
+ # address data from getaddrinfo() including IPv6. This filters the result from
+ # getaddrinfo() based on the source_address value.
+ # This is based on the cpython socket.create_connection() function.
+ # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
+ def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+ host, port = address
+ err = None
+ addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
+ af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
+ ip_addrs = [addr for addr in addrs if addr[0] == af]
+ if addrs and not ip_addrs:
+ ip_version = 'v4' if af == socket.AF_INET else 'v6'
+ raise socket.error(
+ "No remote IP%s addresses available for connect, can't use '%s' as source address"
+ % (ip_version, source_address[0]))
+ for res in ip_addrs:
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ sock.bind(source_address)
+ sock.connect(sa)
+ err = None # Explicitly break reference cycle
+ return sock
+ except socket.error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+ if err is not None:
+ raise err
+ else:
+ raise socket.error('getaddrinfo returns an empty list')
+ if hasattr(hc, '_create_connection'):
+ hc._create_connection = _create_connection
sa = (source_address, 0)
if hasattr(hc, 'source_address'): # Python 2.7+
hc.source_address = sa
else: # Python 2.6
def _hc_connect(self, *args, **kwargs):
- sock = compat_socket_create_connection(
+ sock = _create_connection(
(self.host, self.port), self.timeout, sa)
if is_https:
self.sock = ssl.wrap_socket(
return hc
+def handle_youtubedl_headers(headers):
+ filtered_headers = headers
+
+ if 'Youtubedl-no-compression' in filtered_headers:
+ filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
+ del filtered_headers['Youtubedl-no-compression']
+
+ return filtered_headers
+
+
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
"""Handler for HTTP requests and responses.
the standard headers to every HTTP request and handles gzipped and
deflated responses from web servers. If compression is to be avoided in
a particular request, the original request in the program code only has
- to include the HTTP header "Youtubedl-No-Compression", which will be
+ to include the HTTP header "Youtubedl-no-compression", which will be
removed before making the real request.
Part of this code was copied from:
self._params = params
def http_open(self, req):
+ conn_class = compat_http_client.HTTPConnection
+
+ socks_proxy = req.headers.get('Ytdl-socks-proxy')
+ if socks_proxy:
+ conn_class = make_socks_conn_class(conn_class, socks_proxy)
+ del req.headers['Ytdl-socks-proxy']
+
return self.do_open(functools.partial(
- _create_http_connection, self, compat_http_client.HTTPConnection, False),
+ _create_http_connection, self, conn_class, False),
req)
@staticmethod
except zlib.error:
return zlib.decompress(data)
- @staticmethod
- def addinfourl_wrapper(stream, headers, url, code):
- if hasattr(compat_urllib_request.addinfourl, 'getcode'):
- return compat_urllib_request.addinfourl(stream, headers, url, code)
- ret = compat_urllib_request.addinfourl(stream, headers, url)
- ret.code = code
- return ret
-
def http_request(self, req):
+ # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+ # always respected by websites, some tend to give out URLs with non percent-encoded
+ # non-ASCII characters (see telemb.py, ard.py [#3412])
+ # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+ # To work around aforementioned issue we will replace request's original URL with
+ # percent-encoded one
+ # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
+ # the code of this workaround has been moved here from YoutubeDL.urlopen()
+ url = req.get_full_url()
+ url_escaped = escape_url(url)
+
+ # Substitute URL if any change after escaping
+ if url != url_escaped:
+ req = update_Request(req, url=url_escaped)
+
for h, v in std_headers.items():
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
# The dict keys are capitalized because of this bug by urllib
if h.capitalize() not in req.headers:
req.add_header(h, v)
- if 'Youtubedl-no-compression' in req.headers:
- if 'Accept-encoding' in req.headers:
- del req.headers['Accept-encoding']
- del req.headers['Youtubedl-no-compression']
+
+ req.headers = handle_youtubedl_headers(req.headers)
if sys.version_info < (2, 7) and '#' in req.get_full_url():
# Python 2.6 is brain-dead when it comes to fragments
break
else:
raise original_ioerror
- resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
+ del resp.headers['Content-encoding']
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
gz = io.BytesIO(self.deflate(resp.read()))
- resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
+ del resp.headers['Content-encoding']
+ # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6457).
+ if 300 <= resp.code < 400:
+ location = resp.headers.get('Location')
+ if location:
+ # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
+ if sys.version_info >= (3, 0):
+ location = location.encode('iso-8859-1').decode('utf-8')
+ else:
+ location = location.decode('utf-8')
+ location_escaped = escape_url(location)
+ if location != location_escaped:
+ del resp.headers['Location']
+ if sys.version_info < (3, 0):
+ location_escaped = location_escaped.encode('utf-8')
+ resp.headers['Location'] = location_escaped
return resp
https_request = http_request
https_response = http_response
+def make_socks_conn_class(base_class, socks_proxy):
+ assert issubclass(base_class, (
+ compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
+
+ url_components = compat_urlparse.urlparse(socks_proxy)
+ if url_components.scheme.lower() == 'socks5':
+ socks_type = ProxyType.SOCKS5
+ elif url_components.scheme.lower() in ('socks', 'socks4'):
+ socks_type = ProxyType.SOCKS4
+ elif url_components.scheme.lower() == 'socks4a':
+ socks_type = ProxyType.SOCKS4A
+
+ def unquote_if_non_empty(s):
+ if not s:
+ return s
+ return compat_urllib_parse_unquote_plus(s)
+
+ proxy_args = (
+ socks_type,
+ url_components.hostname, url_components.port or 1080,
+ True, # Remote DNS
+ unquote_if_non_empty(url_components.username),
+ unquote_if_non_empty(url_components.password),
+ )
+
+ class SocksConnection(base_class):
+ def connect(self):
+ self.sock = sockssocket()
+ self.sock.setproxy(*proxy_args)
+ if type(self.timeout) in (int, float):
+ self.sock.settimeout(self.timeout)
+ self.sock.connect((self.host, self.port))
+
+ if isinstance(self, compat_http_client.HTTPSConnection):
+ if hasattr(self, '_context'): # Python > 2.6
+ self.sock = self._context.wrap_socket(
+ self.sock, server_hostname=self.host)
+ else:
+ self.sock = ssl.wrap_socket(self.sock)
+
+ return SocksConnection
+
+
class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
def __init__(self, params, https_conn_class=None, *args, **kwargs):
compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
def https_open(self, req):
kwargs = {}
+ conn_class = self._https_conn_class
+
if hasattr(self, '_context'): # python > 2.6
kwargs['context'] = self._context
if hasattr(self, '_check_hostname'): # python 3.x
kwargs['check_hostname'] = self._check_hostname
+
+ socks_proxy = req.headers.get('Ytdl-socks-proxy')
+ if socks_proxy:
+ conn_class = make_socks_conn_class(conn_class, socks_proxy)
+ del req.headers['Ytdl-socks-proxy']
+
return self.do_open(functools.partial(
- _create_http_connection, self, self._https_conn_class, True),
+ _create_http_connection, self, conn_class, True),
req, **kwargs)
+class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+ _HTTPONLY_PREFIX = '#HttpOnly_'
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ # Store session cookies with `expires` set to 0 instead of an empty
+ # string
+ for cookie in self:
+ if cookie.expires is None:
+ cookie.expires = 0
+ compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
+
+ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
+ cf = io.StringIO()
+ with open(filename) as f:
+ for line in f:
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ cf.write(compat_str(line))
+ cf.seek(0)
+ self._really_load(cf, filename, ignore_discard, ignore_expires)
+ # Session cookies are denoted by either `expires` field set to
+ # an empty string or 0. MozillaCookieJar only recognizes the former
+ # (see [1]). So we need force the latter to be recognized as session
+ # cookies on our own.
+ # Session cookies may be important for cookies-based authentication,
+ # e.g. usually, when user does not check 'Remember me' check box while
+ # logging in on a site, some important cookies are stored as session
+ # cookies so that not recognizing them will result in failed login.
+ # 1. https://bugs.python.org/issue17164
+ for cookie in self:
+ # Treat `expires=0` cookies as session cookies
+ if cookie.expires == 0:
+ cookie.expires = None
+ cookie.discard = True
+
+
+class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
+ def __init__(self, cookiejar=None):
+ compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
+
+ def http_response(self, request, response):
+ # Python 2 will choke on next HTTP request in row if there are non-ASCII
+ # characters in Set-Cookie HTTP header of last response (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6769).
+ # In order to at least prevent crashing we will percent encode Set-Cookie
+ # header before HTTPCookieProcessor starts processing it.
+ # if sys.version_info < (3, 0) and response.headers:
+ # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
+ # set_cookie = response.headers.get(set_cookie_header)
+ # if set_cookie:
+ # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
+ # if set_cookie != set_cookie_escaped:
+ # del response.headers[set_cookie_header]
+ # response.headers[set_cookie_header] = set_cookie_escaped
+ return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
+
+ https_request = compat_urllib_request.HTTPCookieProcessor.http_request
+ https_response = http_response
+
+
+def extract_timezone(date_str):
+ m = re.search(
+ r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+ date_str)
+ if not m:
+ timezone = datetime.timedelta()
+ else:
+ date_str = date_str[:-len(m.group('tz'))]
+ if not m.group('sign'):
+ timezone = datetime.timedelta()
+ else:
+ sign = 1 if m.group('sign') == '+' else -1
+ timezone = datetime.timedelta(
+ hours=sign * int(m.group('hours')),
+ minutes=sign * int(m.group('minutes')))
+ return timezone, date_str
+
+
def parse_iso8601(date_str, delimiter='T', timezone=None):
""" Return a UNIX timestamp from the given date """
if date_str is None:
return None
+ date_str = re.sub(r'\.[0-9]+', '', date_str)
+
if timezone is None:
- m = re.search(
- r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
- date_str)
- if not m:
- timezone = datetime.timedelta()
- else:
- date_str = date_str[:-len(m.group(0))]
- if not m.group('sign'):
- timezone = datetime.timedelta()
- else:
- sign = 1 if m.group('sign') == '+' else -1
- timezone = datetime.timedelta(
- hours=sign * int(m.group('hours')),
- minutes=sign * int(m.group('minutes')))
- date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
- dt = datetime.datetime.strptime(date_str, date_format) - timezone
- return calendar.timegm(dt.timetuple())
+ timezone, date_str = extract_timezone(date_str)
+
+ try:
+ date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
+ dt = datetime.datetime.strptime(date_str, date_format) - timezone
+ return calendar.timegm(dt.timetuple())
+ except ValueError:
+ pass
+
+
+def date_formats(day_first=True):
+ return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
def unified_strdate(date_str, day_first=True):
upload_date = None
# Replace commas
date_str = date_str.replace(',', ' ')
- # %z (UTC offset) is only supported in python>=3.2
- if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
- date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+ _, date_str = extract_timezone(date_str)
- format_expressions = [
- '%d %B %Y',
- '%d %b %Y',
- '%B %d %Y',
- '%b %d %Y',
- '%b %dst %Y %I:%M%p',
- '%b %dnd %Y %I:%M%p',
- '%b %dth %Y %I:%M%p',
- '%Y %m %d',
- '%Y-%m-%d',
- '%Y/%m/%d',
- '%Y/%m/%d %H:%M:%S',
- '%Y-%m-%d %H:%M:%S',
- '%Y-%m-%d %H:%M:%S.%f',
- '%d.%m.%Y %H:%M',
- '%d.%m.%Y %H.%M',
- '%Y-%m-%dT%H:%M:%SZ',
- '%Y-%m-%dT%H:%M:%S.%fZ',
- '%Y-%m-%dT%H:%M:%S.%f0Z',
- '%Y-%m-%dT%H:%M:%S',
- '%Y-%m-%dT%H:%M:%S.%f',
- '%Y-%m-%dT%H:%M',
- ]
- if day_first:
- format_expressions.extend([
- '%d-%m-%Y',
- '%d.%m.%Y',
- '%d/%m/%Y',
- '%d/%m/%y',
- '%d/%m/%Y %H:%M:%S',
- ])
- else:
- format_expressions.extend([
- '%m-%d-%Y',
- '%m.%d.%Y',
- '%m/%d/%Y',
- '%m/%d/%y',
- '%m/%d/%Y %H:%M:%S',
- ])
- for expression in format_expressions:
+ for expression in date_formats(day_first):
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except ValueError:
if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
- upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
- return upload_date
+ try:
+ upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+ except ValueError:
+ pass
+ if upload_date is not None:
+ return compat_str(upload_date)
+
+
+def unified_timestamp(date_str, day_first=True):
+ if date_str is None:
+ return None
+
+ date_str = re.sub(r'[,|]', '', date_str)
+
+ pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
+ timezone, date_str = extract_timezone(date_str)
+
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+
+ # Remove unrecognized timezones from ISO 8601 alike timestamps
+ m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+ if m:
+ date_str = date_str[:-len(m.group('tz'))]
+
+ # Python only supports microseconds, so remove nanoseconds
+ m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
+ if m:
+ date_str = m.group(1)
+
+ for expression in date_formats(day_first):
+ try:
+ dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
+ return calendar.timegm(dt.timetuple())
+ except ValueError:
+ pass
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ return calendar.timegm(timetuple) + pm_delta * 3600
def determine_ext(url, default_ext='unknown_video'):
- if url is None:
+ if url is None or '.' not in url:
return default_ext
guess = url.partition('?')[0].rpartition('.')[2]
if re.match(r'^[A-Za-z0-9]+$', guess):
return guess
+ # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
+ elif guess.rstrip('/') in KNOWN_EXTENSIONS:
+ return guess.rstrip('/')
else:
return default_ext
-def subtitles_filename(filename, sub_lang, sub_format):
- return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
+def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
+ return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
def date_from_str(date_str):
return today
if date_str == 'yesterday':
return today - datetime.timedelta(days=1)
- match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+ match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
if match is not None:
sign = match.group('sign')
time = int(match.group('time'))
if sign == '-':
time = -time
unit = match.group('unit')
- # A bad aproximation?
+ # A bad approximation?
if unit == 'month':
unit = 'day'
time *= 30
unit += 's'
delta = datetime.timedelta(**{unit: time})
return today + delta
- return datetime.datetime.strptime(date_str, "%Y%m%d").date()
+ return datetime.datetime.strptime(date_str, '%Y%m%d').date()
def hyphenate_date(date_str):
if fileno not in WIN_OUTPUT_IDS:
return False
- GetStdHandle = ctypes.WINFUNCTYPE(
+ GetStdHandle = compat_ctypes_WINFUNCTYPE(
ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
- (b"GetStdHandle", ctypes.windll.kernel32))
+ ('GetStdHandle', ctypes.windll.kernel32))
h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
- WriteConsoleW = ctypes.WINFUNCTYPE(
+ WriteConsoleW = compat_ctypes_WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
- ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
+ ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
written = ctypes.wintypes.DWORD(0)
- GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
+ GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
- GetConsoleMode = ctypes.WINFUNCTYPE(
+ GetConsoleMode = compat_ctypes_WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
ctypes.POINTER(ctypes.wintypes.DWORD))(
- (b"GetConsoleMode", ctypes.windll.kernel32))
+ ('GetConsoleMode', ctypes.windll.kernel32))
INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
- return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
- GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+ return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
+ or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h):
return False
if _windows_write_string(s, out):
return
- if ('b' in getattr(out, 'mode', '') or
- sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
+ if ('b' in getattr(out, 'mode', '')
+ or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
byt = s.encode(encoding or preferredencoding(), 'ignore')
out.write(byt)
elif hasattr(out, 'buffer'):
def intlist_to_bytes(xs):
if not xs:
return b''
- return struct_pack('%dB' % len(xs), *xs)
+ return compat_struct_pack('%dB' % len(xs), *xs)
# Cross-platform file locking
raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
else:
- import fcntl
+ # Some platforms, such as Jython, is missing fcntl
+ try:
+ import fcntl
- def _lock_file(f, exclusive):
- fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+ def _lock_file(f, exclusive):
+ fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
- def _unlock_file(f):
- fcntl.flock(f, fcntl.LOCK_UN)
+ def _unlock_file(f):
+ fcntl.flock(f, fcntl.LOCK_UN)
+ except ImportError:
+ UNSUPPORTED_MSG = 'file locking is not supported on this platform'
+
+ def _lock_file(f, exclusive):
+ raise IOError(UNSUPPORTED_MSG)
+
+ def _unlock_file(f):
+ raise IOError(UNSUPPORTED_MSG)
class locked_file(object):
if isinstance(a, bytes):
# We may get a filename encoded with 'encodeFilename'
a = a.decode(encoding)
- quoted_args.append(pipes.quote(a))
+ quoted_args.append(compat_shlex_quote(a))
return ' '.join(quoted_args)
def smuggle_url(url, data):
""" Pass additional data in a URL for internal use. """
- sdata = compat_urllib_parse.urlencode(
+ url, idata = unsmuggle_url(url, {})
+ data.update(idata)
+ sdata = compat_urllib_parse_urlencode(
{'__youtubedl_smuggle': json.dumps(data)})
return url + '#' + sdata
return '%.2f%s' % (converted, suffix)
+def lookup_unit_table(unit_table, s):
+ units_re = '|'.join(re.escape(u) for u in unit_table)
+ m = re.match(
+ r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
+ if not m:
+ return None
+ num_str = m.group('num').replace(',', '.')
+ mult = unit_table[m.group('unit')]
+ return int(float(num_str) * mult)
+
+
def parse_filesize(s):
if s is None:
return None
- # The lower-case forms are of course incorrect and inofficial,
+ # The lower-case forms are of course incorrect and unofficial,
# but we support those too
_UNIT_TABLE = {
'B': 1,
'b': 1,
+ 'bytes': 1,
'KiB': 1024,
'KB': 1000,
'kB': 1024,
'Kb': 1000,
+ 'kb': 1000,
+ 'kilobytes': 1000,
+ 'kibibytes': 1024,
'MiB': 1024 ** 2,
'MB': 1000 ** 2,
'mB': 1024 ** 2,
'Mb': 1000 ** 2,
+ 'mb': 1000 ** 2,
+ 'megabytes': 1000 ** 2,
+ 'mebibytes': 1024 ** 2,
'GiB': 1024 ** 3,
'GB': 1000 ** 3,
'gB': 1024 ** 3,
'Gb': 1000 ** 3,
+ 'gb': 1000 ** 3,
+ 'gigabytes': 1000 ** 3,
+ 'gibibytes': 1024 ** 3,
'TiB': 1024 ** 4,
'TB': 1000 ** 4,
'tB': 1024 ** 4,
'Tb': 1000 ** 4,
+ 'tb': 1000 ** 4,
+ 'terabytes': 1000 ** 4,
+ 'tebibytes': 1024 ** 4,
'PiB': 1024 ** 5,
'PB': 1000 ** 5,
'pB': 1024 ** 5,
'Pb': 1000 ** 5,
+ 'pb': 1000 ** 5,
+ 'petabytes': 1000 ** 5,
+ 'pebibytes': 1024 ** 5,
'EiB': 1024 ** 6,
'EB': 1000 ** 6,
'eB': 1024 ** 6,
'Eb': 1000 ** 6,
+ 'eb': 1000 ** 6,
+ 'exabytes': 1000 ** 6,
+ 'exbibytes': 1024 ** 6,
'ZiB': 1024 ** 7,
'ZB': 1000 ** 7,
'zB': 1024 ** 7,
'Zb': 1000 ** 7,
+ 'zb': 1000 ** 7,
+ 'zettabytes': 1000 ** 7,
+ 'zebibytes': 1024 ** 7,
'YiB': 1024 ** 8,
'YB': 1000 ** 8,
'yB': 1024 ** 8,
'Yb': 1000 ** 8,
+ 'yb': 1000 ** 8,
+ 'yottabytes': 1000 ** 8,
+ 'yobibytes': 1024 ** 8,
}
- units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
- m = re.match(
- r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
- if not m:
+ return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_count(s):
+ if s is None:
return None
- num_str = m.group('num').replace(',', '.')
- mult = _UNIT_TABLE[m.group('unit')]
- return int(float(num_str) * mult)
+ s = s.strip()
+
+ if re.match(r'^[\d,.]+$', s):
+ return str_to_int(s)
+
+ _UNIT_TABLE = {
+ 'k': 1000,
+ 'K': 1000,
+ 'm': 1000 ** 2,
+ 'M': 1000 ** 2,
+ 'kk': 1000 ** 2,
+ 'KK': 1000 ** 2,
+ }
+
+ return lookup_unit_table(_UNIT_TABLE, s)
-def month_by_name(name):
+def parse_resolution(s):
+ if s is None:
+ return {}
+
+ mobj = re.search(r'\b(?P<w>\d+)\s*[xXĆ]\s*(?P<h>\d+)\b', s)
+ if mobj:
+ return {
+ 'width': int(mobj.group('w')),
+ 'height': int(mobj.group('h')),
+ }
+
+ mobj = re.search(r'\b(\d+)[pPiI]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1))}
+
+ mobj = re.search(r'\b([48])[kK]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1)) * 540}
+
+ return {}
+
+
+def parse_bitrate(s):
+ if not isinstance(s, compat_str):
+ return
+ mobj = re.search(r'\b(\d+)\s*kbps', s)
+ if mobj:
+ return int(mobj.group(1))
+
+
+def month_by_name(name, lang='en'):
""" Return the number of a month by (locale-independently) English name """
+ month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
+
try:
- return ENGLISH_MONTH_NAMES.index(name) + 1
+ return month_names.index(name) + 1
except ValueError:
return None
def setproctitle(title):
assert isinstance(title, compat_str)
+
+ # ctypes in Jython is not complete
+ # http://bugs.jython.org/issue2148
+ if sys.platform.startswith('java'):
+ return
+
try:
- libc = ctypes.cdll.LoadLibrary("libc.so.6")
+ libc = ctypes.cdll.LoadLibrary('libc.so.6')
except OSError:
return
+ except TypeError:
+ # LoadLibrary in Windows Python 2.7.13 only expects
+ # a bytestring, but since unicode_literals turns
+ # every string into a unicode string, it fails.
+ return
title_bytes = title.encode('utf-8')
buf = ctypes.create_string_buffer(len(title_bytes))
buf.value = title_bytes
def remove_start(s, start):
- if s.startswith(start):
- return s[len(start):]
- return s
+ return s[len(start):] if s is not None and s.startswith(start) else s
def remove_end(s, end):
- if s.endswith(end):
- return s[:-len(end)]
+ return s[:-len(end)] if s is not None and s.endswith(end) else s
+
+
+def remove_quotes(s):
+ if s is None or len(s) < 2:
+ return s
+ for quote in ('"', "'", ):
+ if s[0] == quote and s[-1] == quote:
+ return s[1:-1]
return s
return path.strip('/').split('/')[-1]
+def base_url(url):
+ return re.match(r'https?://[^?#&]+/', url).group()
+
+
+def urljoin(base, path):
+ if isinstance(path, bytes):
+ path = path.decode('utf-8')
+ if not isinstance(path, compat_str) or not path:
+ return None
+ if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
+ return path
+ if isinstance(base, bytes):
+ base = base.decode('utf-8')
+ if not isinstance(base, compat_str) or not re.match(
+ r'^(?:https?:)?//', base):
+ return None
+ return compat_urlparse.urljoin(base, path)
+
+
class HEADRequest(compat_urllib_request.Request):
def get_method(self):
- return "HEAD"
+ return 'HEAD'
+
+
+class PUTRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return 'PUT'
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
v = getattr(v, get_attr, None)
if v == '':
v = None
- return default if v is None else (int(v) * invscale // scale)
+ if v is None:
+ return default
+ try:
+ return int(v) * invscale // scale
+ except (ValueError, TypeError):
+ return default
def str_or_none(v, default=None):
def str_to_int(int_str):
""" A more relaxed version of int_or_none """
- if int_str is None:
- return None
- int_str = re.sub(r'[,\.\+]', '', int_str)
- return int(int_str)
+ if isinstance(int_str, compat_integer_types):
+ return int_str
+ elif isinstance(int_str, compat_str):
+ int_str = re.sub(r'[,\.\+]', '', int_str)
+ return int_or_none(int_str)
def float_or_none(v, scale=1, invscale=1, default=None):
- return default if v is None else (float(v) * invscale / scale)
+ if v is None:
+ return default
+ try:
+ return float(v) * invscale / scale
+ except (ValueError, TypeError):
+ return default
+
+
+def bool_or_none(v, default=None):
+ return v if isinstance(v, bool) else default
+
+
+def strip_or_none(v, default=None):
+ return v.strip() if isinstance(v, compat_str) else default
+
+
+def url_or_none(url):
+ if not url or not isinstance(url, compat_str):
+ return None
+ url = url.strip()
+ return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
def parse_duration(s):
s = s.strip()
- m = re.match(
- r'''(?ix)(?:P?T)?
- (?:
- (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
- (?P<only_hours>[0-9.]+)\s*(?:hours?)|
-
- \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
- (?:
+ days, hours, mins, secs, ms = [None] * 5
+ m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
+ if m:
+ days, hours, mins, secs, ms = m.groups()
+ else:
+ m = re.match(
+ r'''(?ix)(?:P?
(?:
- (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
- (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
+ [0-9]+\s*y(?:ears?)?\s*
)?
- (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
- )?
- (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
- )$''', s)
- if not m:
- return None
- res = 0
- if m.group('only_mins'):
- return float_or_none(m.group('only_mins'), invscale=60)
- if m.group('only_hours'):
- return float_or_none(m.group('only_hours'), invscale=60 * 60)
- if m.group('secs'):
- res += int(m.group('secs'))
- if m.group('mins_reversed'):
- res += int(m.group('mins_reversed')) * 60
- if m.group('mins'):
- res += int(m.group('mins')) * 60
- if m.group('hours'):
- res += int(m.group('hours')) * 60 * 60
- if m.group('hours_reversed'):
- res += int(m.group('hours_reversed')) * 60 * 60
- if m.group('days'):
- res += int(m.group('days')) * 24 * 60 * 60
- if m.group('ms'):
- res += float(m.group('ms'))
- return res
+ (?:
+ [0-9]+\s*m(?:onths?)?\s*
+ )?
+ (?:
+ [0-9]+\s*w(?:eeks?)?\s*
+ )?
+ (?:
+ (?P<days>[0-9]+)\s*d(?:ays?)?\s*
+ )?
+ T)?
+ (?:
+ (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
+ )?
+ (?:
+ (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
+ )?
+ (?:
+ (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
+ )?Z?$''', s)
+ if m:
+ days, hours, mins, secs, ms = m.groups()
+ else:
+ m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
+ if m:
+ hours, mins = m.groups()
+ else:
+ return None
+
+ duration = 0
+ if secs:
+ duration += float(secs)
+ if mins:
+ duration += float(mins) * 60
+ if hours:
+ duration += float(hours) * 60 * 60
+ if days:
+ duration += float(days) * 24 * 60 * 60
+ if ms:
+ duration += float(ms)
+ return duration
def prepend_extension(filename, ext, expected_real_ext=None):
""" Returns the version of the specified executable,
or False if the executable is not present """
try:
+ # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
+ # SIGTTOU if youtube-dl is run in the background.
+ # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
out, _ = subprocess.Popen(
[encodeArgument(exe)] + args,
+ stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
except OSError:
return False
class OnDemandPagedList(PagedList):
- def __init__(self, pagefunc, pagesize):
+ def __init__(self, pagefunc, pagesize, use_cache=True):
self._pagefunc = pagefunc
self._pagesize = pagesize
+ self._use_cache = use_cache
+ if use_cache:
+ self._cache = {}
def getslice(self, start=0, end=None):
res = []
if start >= nextfirstid:
continue
- page_results = list(self._pagefunc(pagenum))
+ page_results = None
+ if self._use_cache:
+ page_results = self._cache.get(pagenum)
+ if page_results is None:
+ page_results = list(self._pagefunc(pagenum))
+ if self._use_cache:
+ self._cache[pagenum] = page_results
startv = (
start % self._pagesize
"""Escape URL as suggested by RFC 3986"""
url_parsed = compat_urllib_parse_urlparse(url)
return url_parsed._replace(
+ netloc=url_parsed.netloc.encode('idna').decode('ascii'),
path=escape_rfc3986(url_parsed.path),
params=escape_rfc3986(url_parsed.params),
query=escape_rfc3986(url_parsed.query),
fragment=escape_rfc3986(url_parsed.fragment)
).geturl()
-try:
- struct.pack('!I', 0)
-except TypeError:
- # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
- def struct_pack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.pack(spec, *args)
-
- def struct_unpack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.unpack(spec, *args)
-else:
- struct_pack = struct.pack
- struct_unpack = struct.unpack
-
def read_batch_urls(batch_fd):
def fixup(url):
def urlencode_postdata(*args, **kargs):
- return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
+ return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
-try:
- etree_iter = xml.etree.ElementTree.Element.iter
-except AttributeError: # Python <=2.6
- etree_iter = lambda n: n.findall('.//*')
+def update_url_query(url, query):
+ if not query:
+ return url
+ parsed_url = compat_urlparse.urlparse(url)
+ qs = compat_parse_qs(parsed_url.query)
+ qs.update(query)
+ return compat_urlparse.urlunparse(parsed_url._replace(
+ query=compat_urllib_parse_urlencode(qs, True)))
+
+
+def update_Request(req, url=None, data=None, headers={}, query={}):
+ req_headers = req.headers.copy()
+ req_headers.update(headers)
+ req_data = data or req.data
+ req_url = update_url_query(url or req.get_full_url(), query)
+ req_get_method = req.get_method()
+ if req_get_method == 'HEAD':
+ req_type = HEADRequest
+ elif req_get_method == 'PUT':
+ req_type = PUTRequest
+ else:
+ req_type = compat_urllib_request.Request
+ new_req = req_type(
+ req_url, data=req_data, headers=req_headers,
+ origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+ if hasattr(req, 'timeout'):
+ new_req.timeout = req.timeout
+ return new_req
+
+
+def _multipart_encode_impl(data, boundary):
+ content_type = 'multipart/form-data; boundary=%s' % boundary
+
+ out = b''
+ for k, v in data.items():
+ out += b'--' + boundary.encode('ascii') + b'\r\n'
+ if isinstance(k, compat_str):
+ k = k.encode('utf-8')
+ if isinstance(v, compat_str):
+ v = v.encode('utf-8')
+ # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
+ # suggests sending UTF-8 directly. Firefox sends UTF-8, too
+ content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
+ if boundary.encode('ascii') in content:
+ raise ValueError('Boundary overlaps with data')
+ out += content
+
+ out += b'--' + boundary.encode('ascii') + b'--\r\n'
+
+ return out, content_type
+
+
+def multipart_encode(data, boundary=None):
+ '''
+ Encode a dict to RFC 7578-compliant form-data
+
+ data:
+ A dict where keys and values can be either Unicode or bytes-like
+ objects.
+ boundary:
+ If specified a Unicode object, it's used as the boundary. Otherwise
+ a random boundary is generated.
+
+ Reference: https://tools.ietf.org/html/rfc7578
+ '''
+ has_specified_boundary = boundary is not None
+
+ while True:
+ if boundary is None:
+ boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
+ try:
+ out, content_type = _multipart_encode_impl(data, boundary)
+ break
+ except ValueError:
+ if has_specified_boundary:
+ raise
+ boundary = None
-def parse_xml(s):
- class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
- def doctype(self, name, pubid, system):
- pass # Ignore doctypes
+ return out, content_type
+
+
+def dict_get(d, key_or_keys, default=None, skip_false_values=True):
+ if isinstance(key_or_keys, (list, tuple)):
+ for key in key_or_keys:
+ if key not in d or d[key] is None or skip_false_values and not d[key]:
+ continue
+ return d[key]
+ return default
+ return d.get(key_or_keys, default)
+
+
+def try_get(src, getter, expected_type=None):
+ if not isinstance(getter, (list, tuple)):
+ getter = [getter]
+ for get in getter:
+ try:
+ v = get(src)
+ except (AttributeError, KeyError, TypeError, IndexError):
+ pass
+ else:
+ if expected_type is None or isinstance(v, expected_type):
+ return v
+
+
+def merge_dicts(*dicts):
+ merged = {}
+ for a_dict in dicts:
+ for k, v in a_dict.items():
+ if v is None:
+ continue
+ if (k not in merged
+ or (isinstance(v, compat_str) and v
+ and isinstance(merged[k], compat_str)
+ and not merged[k])):
+ merged[k] = v
+ return merged
- parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
- kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
- tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
- # Fix up XML parser in Python 2.x
- if sys.version_info < (3, 0):
- for n in etree_iter(tree):
- if n.text is not None:
- if not isinstance(n.text, compat_str):
- n.text = n.text.decode('utf-8')
- return tree
+
+def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
+ return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
US_RATINGS = {
}
+TV_PARENTAL_GUIDELINES = {
+ 'TV-Y': 0,
+ 'TV-Y7': 7,
+ 'TV-G': 0,
+ 'TV-PG': 0,
+ 'TV-14': 14,
+ 'TV-MA': 17,
+}
+
+
def parse_age_limit(s):
- if s is None:
+ if type(s) == int:
+ return s if 0 <= s <= 21 else None
+ if not isinstance(s, compat_basestring):
return None
m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
- return int(m.group('age')) if m else US_RATINGS.get(s, None)
+ if m:
+ return int(m.group('age'))
+ if s in US_RATINGS:
+ return US_RATINGS[s]
+ m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
+ if m:
+ return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
+ return None
def strip_jsonp(code):
return re.sub(
- r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+ r'''(?sx)^
+ (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
+ (?:\s*&&\s*(?P=func_name))?
+ \s*\(\s*(?P<callback_data>.*)\);?
+ \s*?(?://[^\n]*)*$''',
+ r'\g<callback_data>', code)
def js_to_json(code):
+ COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+ SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
+ INTEGER_TABLE = (
+ (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
+ (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+ )
+
def fix_kv(m):
v = m.group(0)
if v in ('true', 'false', 'null'):
return v
- if v.startswith('"'):
- return v
- if v.startswith("'"):
- v = v[1:-1]
- v = re.sub(r"\\\\|\\'|\"", lambda m: {
- '\\\\': '\\\\',
- "\\'": "'",
+ elif v.startswith('/*') or v.startswith('//') or v == ',':
+ return ""
+
+ if v[0] in ("'", '"'):
+ v = re.sub(r'(?s)\\.|"', lambda m: {
'"': '\\"',
- }[m.group(0)], v)
+ "\\'": "'",
+ '\\\n': '',
+ '\\x': '\\u00',
+ }.get(m.group(0), m.group(0)), v[1:-1])
+
+ for regex, base in INTEGER_TABLE:
+ im = re.match(regex, v)
+ if im:
+ i = int(im.group(1), base)
+ return '"%d":' % i if v.endswith(':') else '%d' % i
+
return '"%s"' % v
- res = re.sub(r'''(?x)
- "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
- '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
- [a-zA-Z_][.a-zA-Z_0-9]*
- ''', fix_kv, code)
- res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
- return res
+ return re.sub(r'''(?sx)
+ "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
+ '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
+ {comment}|,(?={skip}[\]}}])|
+ (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
+ \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
+ [0-9]+(?={skip}:)
+ '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
def qualities(quality_ids):
def args_to_str(args):
# Get a short string representation for a subprocess command
- return ' '.join(shlex_quote(a) for a in args)
+ return ' '.join(compat_shlex_quote(a) for a in args)
+
+
+def error_to_compat_str(err):
+ err_str = str(err)
+ # On python 2 error byte string must be decoded with proper
+ # encoding rather than ascii
+ if sys.version_info[0] < 3:
+ err_str = err_str.decode(preferredencoding())
+ return err_str
def mimetype2ext(mt):
+ if mt is None:
+ return None
+
+ ext = {
+ 'audio/mp4': 'm4a',
+ # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
+ # it's the most popular one
+ 'audio/mpeg': 'mp3',
+ }.get(mt)
+ if ext is not None:
+ return ext
+
_, _, res = mt.rpartition('/')
+ res = res.split(';')[0].strip().lower()
return {
- 'x-ms-wmv': 'wmv',
- 'x-mp4-fragmented': 'mp4',
+ '3gpp': '3gp',
+ 'smptett+xml': 'tt',
+ 'ttaf+xml': 'dfxp',
'ttml+xml': 'ttml',
+ 'x-flv': 'flv',
+ 'x-mp4-fragmented': 'mp4',
+ 'x-ms-sami': 'sami',
+ 'x-ms-wmv': 'wmv',
+ 'mpegurl': 'm3u8',
+ 'x-mpegurl': 'm3u8',
+ 'vnd.apple.mpegurl': 'm3u8',
+ 'dash+xml': 'mpd',
+ 'f4m+xml': 'f4m',
+ 'hds+xml': 'f4m',
+ 'vnd.ms-sstr+xml': 'ism',
+ 'quicktime': 'mov',
+ 'mp2t': 'ts',
}.get(res, res)
+def parse_codecs(codecs_str):
+ # http://tools.ietf.org/html/rfc6381
+ if not codecs_str:
+ return {}
+ splited_codecs = list(filter(None, map(
+ lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
+ vcodec, acodec = None, None
+ for full_codec in splited_codecs:
+ codec = full_codec.split('.')[0]
+ if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
+ if not vcodec:
+ vcodec = full_codec
+ elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
+ if not acodec:
+ acodec = full_codec
+ else:
+ write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
+ if not vcodec and not acodec:
+ if len(splited_codecs) == 2:
+ return {
+ 'vcodec': splited_codecs[0],
+ 'acodec': splited_codecs[1],
+ }
+ else:
+ return {
+ 'vcodec': vcodec or 'none',
+ 'acodec': acodec or 'none',
+ }
+ return {}
+
+
def urlhandle_detect_ext(url_handle):
- try:
- url_handle.headers
- getheader = lambda h: url_handle.headers[h]
- except AttributeError: # Python < 3
- getheader = url_handle.info().getheader
+ getheader = url_handle.headers.get
cd = getheader('Content-Disposition')
if cd:
return mimetype2ext(getheader('Content-Type'))
+def encode_data_uri(data, mime_type):
+ return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
+
+
def age_restricted(content_limit, age_limit):
""" Returns True iff the content should be blocked """
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?:
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+ (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
)
\s*$
m = operator_rex.search(filter_part)
if m:
op = COMPARISON_OPERATORS[m.group('op')]
- if m.group('strval') is not None:
+ actual_value = dct.get(m.group('key'))
+ if (m.group('quotedstrval') is not None
+ or m.group('strval') is not None
+ # If the original field is a string and matching comparisonvalue is
+ # a number we should respect the origin of the original field
+ # and process comparison value as a string (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11082).
+ or actual_value is not None and m.group('intval') is not None
+ and isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
- comparison_value = m.group('strval')
+ comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
+ quote = m.group('quote')
+ if quote is not None:
+ comparison_value = comparison_value.replace(r'\%s' % quote, quote)
else:
try:
comparison_value = int(m.group('intval'))
raise ValueError(
'Invalid integer value %r in filter part %r' % (
m.group('intval'), filter_part))
- actual_value = dct.get(m.group('key'))
if actual_value is None:
return m.group('none_inclusive')
return op(actual_value, comparison_value)
UNARY_OPERATORS = {
- '': lambda v: v is not None,
- '!': lambda v: v is None,
+ '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
+ '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
}
operator_rex = re.compile(r'''(?x)\s*
(?P<op>%s)\s*(?P<key>[a-z_]+)
def parse_dfxp_time_expr(time_expr):
if not time_expr:
- return 0.0
+ return
mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
if mobj:
return float(mobj.group('time_offset'))
- mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
+ mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
if mobj:
- return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
+ return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
def srt_subtitles_timecode(seconds):
def dfxp2srt(dfxp_data):
+ '''
+ @param dfxp_data A bytes-like object containing DFXP data
+ @returns A unicode object containing converted SRT data
+ '''
+ LEGACY_NAMESPACES = (
+ (b'http://www.w3.org/ns/ttml', [
+ b'http://www.w3.org/2004/11/ttaf1',
+ b'http://www.w3.org/2006/04/ttaf1',
+ b'http://www.w3.org/2006/10/ttaf1',
+ ]),
+ (b'http://www.w3.org/ns/ttml#styling', [
+ b'http://www.w3.org/ns/ttml#style',
+ ]),
+ )
+
+ SUPPORTED_STYLING = [
+ 'color',
+ 'fontFamily',
+ 'fontSize',
+ 'fontStyle',
+ 'fontWeight',
+ 'textDecoration'
+ ]
+
_x = functools.partial(xpath_with_ns, ns_map={
+ 'xml': 'http://www.w3.org/XML/1998/namespace',
'ttml': 'http://www.w3.org/ns/ttml',
- 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
+ 'tts': 'http://www.w3.org/ns/ttml#styling',
})
- def parse_node(node):
- str_or_empty = functools.partial(str_or_none, default='')
+ styles = {}
+ default_style = {}
- out = str_or_empty(node.text)
+ class TTMLPElementParser(object):
+ _out = ''
+ _unclosed_elements = []
+ _applied_styles = []
- for child in node:
- if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
- out += '\n' + str_or_empty(child.tail)
- elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
- out += str_or_empty(parse_node(child))
+ def start(self, tag, attrib):
+ if tag in (_x('ttml:br'), 'br'):
+ self._out += '\n'
else:
- out += str_or_empty(xml.etree.ElementTree.tostring(child))
+ unclosed_elements = []
+ style = {}
+ element_style_id = attrib.get('style')
+ if default_style:
+ style.update(default_style)
+ if element_style_id:
+ style.update(styles.get(element_style_id, {}))
+ for prop in SUPPORTED_STYLING:
+ prop_val = attrib.get(_x('tts:' + prop))
+ if prop_val:
+ style[prop] = prop_val
+ if style:
+ font = ''
+ for k, v in sorted(style.items()):
+ if self._applied_styles and self._applied_styles[-1].get(k) == v:
+ continue
+ if k == 'color':
+ font += ' color="%s"' % v
+ elif k == 'fontSize':
+ font += ' size="%s"' % v
+ elif k == 'fontFamily':
+ font += ' face="%s"' % v
+ elif k == 'fontWeight' and v == 'bold':
+ self._out += '<b>'
+ unclosed_elements.append('b')
+ elif k == 'fontStyle' and v == 'italic':
+ self._out += '<i>'
+ unclosed_elements.append('i')
+ elif k == 'textDecoration' and v == 'underline':
+ self._out += '<u>'
+ unclosed_elements.append('u')
+ if font:
+ self._out += '<font' + font + '>'
+ unclosed_elements.append('font')
+ applied_style = {}
+ if self._applied_styles:
+ applied_style.update(self._applied_styles[-1])
+ applied_style.update(style)
+ self._applied_styles.append(applied_style)
+ self._unclosed_elements.append(unclosed_elements)
+
+ def end(self, tag):
+ if tag not in (_x('ttml:br'), 'br'):
+ unclosed_elements = self._unclosed_elements.pop()
+ for element in reversed(unclosed_elements):
+ self._out += '</%s>' % element
+ if unclosed_elements and self._applied_styles:
+ self._applied_styles.pop()
+
+ def data(self, data):
+ self._out += data
+
+ def close(self):
+ return self._out.strip()
+
+ def parse_node(node):
+ target = TTMLPElementParser()
+ parser = xml.etree.ElementTree.XMLParser(target=target)
+ parser.feed(xml.etree.ElementTree.tostring(node))
+ return parser.close()
- return out
+ for k, v in LEGACY_NAMESPACES:
+ for ns in v:
+ dfxp_data = dfxp_data.replace(ns, k)
- dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
+ dfxp = compat_etree_fromstring(dfxp_data)
out = []
- paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
if not paras:
raise ValueError('Invalid dfxp/TTML subtitle')
+ repeat = False
+ while True:
+ for style in dfxp.findall(_x('.//ttml:style')):
+ style_id = style.get('id') or style.get(_x('xml:id'))
+ if not style_id:
+ continue
+ parent_style_id = style.get('style')
+ if parent_style_id:
+ if parent_style_id not in styles:
+ repeat = True
+ continue
+ styles[style_id] = styles[parent_style_id].copy()
+ for prop in SUPPORTED_STYLING:
+ prop_val = style.get(_x('tts:' + prop))
+ if prop_val:
+ styles.setdefault(style_id, {})[prop] = prop_val
+ if repeat:
+ repeat = False
+ else:
+ break
+
+ for p in ('body', 'div'):
+ ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
+ if ele is None:
+ continue
+ style = styles.get(ele.get('style'))
+ if not style:
+ continue
+ default_style.update(style)
+
for para, index in zip(paras, itertools.count(1)):
- begin_time = parse_dfxp_time_expr(para.attrib['begin'])
+ begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
+ dur = parse_dfxp_time_expr(para.attrib.get('dur'))
+ if begin_time is None:
+ continue
if not end_time:
- end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
+ if not dur:
+ continue
+ end_time = begin_time + dur
out.append('%d\n%s --> %s\n%s\n\n' % (
index,
srt_subtitles_timecode(begin_time),
return ''.join(out)
+def cli_option(params, command_option, param):
+ param = params.get(param)
+ if param:
+ param = compat_str(param)
+ return [command_option, param] if param is not None else []
+
+
+def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
+ param = params.get(param)
+ if param is None:
+ return []
+ assert isinstance(param, bool)
+ if separator:
+ return [command_option + separator + (true_value if param else false_value)]
+ return [command_option, true_value if param else false_value]
+
+
+def cli_valueless_option(params, command_option, param, expected_value=True):
+ param = params.get(param)
+ return [command_option] if param == expected_value else []
+
+
+def cli_configuration_args(params, param, default=[]):
+ ex_args = params.get(param)
+ if ex_args is None:
+ return default
+ assert isinstance(ex_args, list)
+ return ex_args
+
+
class ISO639Utils(object):
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
_lang_map = {
'gv': 'glv',
'ha': 'hau',
'he': 'heb',
+ 'iw': 'heb', # Replaced by he in 1989 revision
'hi': 'hin',
'ho': 'hmo',
'hr': 'hrv',
'hz': 'her',
'ia': 'ina',
'id': 'ind',
+ 'in': 'ind', # Replaced by id in 1989 revision
'ie': 'ile',
'ig': 'ibo',
'ii': 'iii',
'wo': 'wol',
'xh': 'xho',
'yi': 'yid',
+ 'ji': 'yid', # Replaced by yi in 1989 revision
'yo': 'yor',
'za': 'zha',
'zh': 'zho',
return cls._country_map.get(code.upper())
+class GeoUtils(object):
+ # Major IPv4 address blocks per country
+ _country_ip_map = {
+ 'AD': '46.172.224.0/19',
+ 'AE': '94.200.0.0/13',
+ 'AF': '149.54.0.0/17',
+ 'AG': '209.59.64.0/18',
+ 'AI': '204.14.248.0/21',
+ 'AL': '46.99.0.0/16',
+ 'AM': '46.70.0.0/15',
+ 'AO': '105.168.0.0/13',
+ 'AP': '182.50.184.0/21',
+ 'AQ': '23.154.160.0/24',
+ 'AR': '181.0.0.0/12',
+ 'AS': '202.70.112.0/20',
+ 'AT': '77.116.0.0/14',
+ 'AU': '1.128.0.0/11',
+ 'AW': '181.41.0.0/18',
+ 'AX': '185.217.4.0/22',
+ 'AZ': '5.197.0.0/16',
+ 'BA': '31.176.128.0/17',
+ 'BB': '65.48.128.0/17',
+ 'BD': '114.130.0.0/16',
+ 'BE': '57.0.0.0/8',
+ 'BF': '102.178.0.0/15',
+ 'BG': '95.42.0.0/15',
+ 'BH': '37.131.0.0/17',
+ 'BI': '154.117.192.0/18',
+ 'BJ': '137.255.0.0/16',
+ 'BL': '185.212.72.0/23',
+ 'BM': '196.12.64.0/18',
+ 'BN': '156.31.0.0/16',
+ 'BO': '161.56.0.0/16',
+ 'BQ': '161.0.80.0/20',
+ 'BR': '191.128.0.0/12',
+ 'BS': '24.51.64.0/18',
+ 'BT': '119.2.96.0/19',
+ 'BW': '168.167.0.0/16',
+ 'BY': '178.120.0.0/13',
+ 'BZ': '179.42.192.0/18',
+ 'CA': '99.224.0.0/11',
+ 'CD': '41.243.0.0/16',
+ 'CF': '197.242.176.0/21',
+ 'CG': '160.113.0.0/16',
+ 'CH': '85.0.0.0/13',
+ 'CI': '102.136.0.0/14',
+ 'CK': '202.65.32.0/19',
+ 'CL': '152.172.0.0/14',
+ 'CM': '102.244.0.0/14',
+ 'CN': '36.128.0.0/10',
+ 'CO': '181.240.0.0/12',
+ 'CR': '201.192.0.0/12',
+ 'CU': '152.206.0.0/15',
+ 'CV': '165.90.96.0/19',
+ 'CW': '190.88.128.0/17',
+ 'CY': '31.153.0.0/16',
+ 'CZ': '88.100.0.0/14',
+ 'DE': '53.0.0.0/8',
+ 'DJ': '197.241.0.0/17',
+ 'DK': '87.48.0.0/12',
+ 'DM': '192.243.48.0/20',
+ 'DO': '152.166.0.0/15',
+ 'DZ': '41.96.0.0/12',
+ 'EC': '186.68.0.0/15',
+ 'EE': '90.190.0.0/15',
+ 'EG': '156.160.0.0/11',
+ 'ER': '196.200.96.0/20',
+ 'ES': '88.0.0.0/11',
+ 'ET': '196.188.0.0/14',
+ 'EU': '2.16.0.0/13',
+ 'FI': '91.152.0.0/13',
+ 'FJ': '144.120.0.0/16',
+ 'FK': '80.73.208.0/21',
+ 'FM': '119.252.112.0/20',
+ 'FO': '88.85.32.0/19',
+ 'FR': '90.0.0.0/9',
+ 'GA': '41.158.0.0/15',
+ 'GB': '25.0.0.0/8',
+ 'GD': '74.122.88.0/21',
+ 'GE': '31.146.0.0/16',
+ 'GF': '161.22.64.0/18',
+ 'GG': '62.68.160.0/19',
+ 'GH': '154.160.0.0/12',
+ 'GI': '95.164.0.0/16',
+ 'GL': '88.83.0.0/19',
+ 'GM': '160.182.0.0/15',
+ 'GN': '197.149.192.0/18',
+ 'GP': '104.250.0.0/19',
+ 'GQ': '105.235.224.0/20',
+ 'GR': '94.64.0.0/13',
+ 'GT': '168.234.0.0/16',
+ 'GU': '168.123.0.0/16',
+ 'GW': '197.214.80.0/20',
+ 'GY': '181.41.64.0/18',
+ 'HK': '113.252.0.0/14',
+ 'HN': '181.210.0.0/16',
+ 'HR': '93.136.0.0/13',
+ 'HT': '148.102.128.0/17',
+ 'HU': '84.0.0.0/14',
+ 'ID': '39.192.0.0/10',
+ 'IE': '87.32.0.0/12',
+ 'IL': '79.176.0.0/13',
+ 'IM': '5.62.80.0/20',
+ 'IN': '117.192.0.0/10',
+ 'IO': '203.83.48.0/21',
+ 'IQ': '37.236.0.0/14',
+ 'IR': '2.176.0.0/12',
+ 'IS': '82.221.0.0/16',
+ 'IT': '79.0.0.0/10',
+ 'JE': '87.244.64.0/18',
+ 'JM': '72.27.0.0/17',
+ 'JO': '176.29.0.0/16',
+ 'JP': '133.0.0.0/8',
+ 'KE': '105.48.0.0/12',
+ 'KG': '158.181.128.0/17',
+ 'KH': '36.37.128.0/17',
+ 'KI': '103.25.140.0/22',
+ 'KM': '197.255.224.0/20',
+ 'KN': '198.167.192.0/19',
+ 'KP': '175.45.176.0/22',
+ 'KR': '175.192.0.0/10',
+ 'KW': '37.36.0.0/14',
+ 'KY': '64.96.0.0/15',
+ 'KZ': '2.72.0.0/13',
+ 'LA': '115.84.64.0/18',
+ 'LB': '178.135.0.0/16',
+ 'LC': '24.92.144.0/20',
+ 'LI': '82.117.0.0/19',
+ 'LK': '112.134.0.0/15',
+ 'LR': '102.183.0.0/16',
+ 'LS': '129.232.0.0/17',
+ 'LT': '78.56.0.0/13',
+ 'LU': '188.42.0.0/16',
+ 'LV': '46.109.0.0/16',
+ 'LY': '41.252.0.0/14',
+ 'MA': '105.128.0.0/11',
+ 'MC': '88.209.64.0/18',
+ 'MD': '37.246.0.0/16',
+ 'ME': '178.175.0.0/17',
+ 'MF': '74.112.232.0/21',
+ 'MG': '154.126.0.0/17',
+ 'MH': '117.103.88.0/21',
+ 'MK': '77.28.0.0/15',
+ 'ML': '154.118.128.0/18',
+ 'MM': '37.111.0.0/17',
+ 'MN': '49.0.128.0/17',
+ 'MO': '60.246.0.0/16',
+ 'MP': '202.88.64.0/20',
+ 'MQ': '109.203.224.0/19',
+ 'MR': '41.188.64.0/18',
+ 'MS': '208.90.112.0/22',
+ 'MT': '46.11.0.0/16',
+ 'MU': '105.16.0.0/12',
+ 'MV': '27.114.128.0/18',
+ 'MW': '102.70.0.0/15',
+ 'MX': '187.192.0.0/11',
+ 'MY': '175.136.0.0/13',
+ 'MZ': '197.218.0.0/15',
+ 'NA': '41.182.0.0/16',
+ 'NC': '101.101.0.0/18',
+ 'NE': '197.214.0.0/18',
+ 'NF': '203.17.240.0/22',
+ 'NG': '105.112.0.0/12',
+ 'NI': '186.76.0.0/15',
+ 'NL': '145.96.0.0/11',
+ 'NO': '84.208.0.0/13',
+ 'NP': '36.252.0.0/15',
+ 'NR': '203.98.224.0/19',
+ 'NU': '49.156.48.0/22',
+ 'NZ': '49.224.0.0/14',
+ 'OM': '5.36.0.0/15',
+ 'PA': '186.72.0.0/15',
+ 'PE': '186.160.0.0/14',
+ 'PF': '123.50.64.0/18',
+ 'PG': '124.240.192.0/19',
+ 'PH': '49.144.0.0/13',
+ 'PK': '39.32.0.0/11',
+ 'PL': '83.0.0.0/11',
+ 'PM': '70.36.0.0/20',
+ 'PR': '66.50.0.0/16',
+ 'PS': '188.161.0.0/16',
+ 'PT': '85.240.0.0/13',
+ 'PW': '202.124.224.0/20',
+ 'PY': '181.120.0.0/14',
+ 'QA': '37.210.0.0/15',
+ 'RE': '102.35.0.0/16',
+ 'RO': '79.112.0.0/13',
+ 'RS': '93.86.0.0/15',
+ 'RU': '5.136.0.0/13',
+ 'RW': '41.186.0.0/16',
+ 'SA': '188.48.0.0/13',
+ 'SB': '202.1.160.0/19',
+ 'SC': '154.192.0.0/11',
+ 'SD': '102.120.0.0/13',
+ 'SE': '78.64.0.0/12',
+ 'SG': '8.128.0.0/10',
+ 'SI': '188.196.0.0/14',
+ 'SK': '78.98.0.0/15',
+ 'SL': '102.143.0.0/17',
+ 'SM': '89.186.32.0/19',
+ 'SN': '41.82.0.0/15',
+ 'SO': '154.115.192.0/18',
+ 'SR': '186.179.128.0/17',
+ 'SS': '105.235.208.0/21',
+ 'ST': '197.159.160.0/19',
+ 'SV': '168.243.0.0/16',
+ 'SX': '190.102.0.0/20',
+ 'SY': '5.0.0.0/16',
+ 'SZ': '41.84.224.0/19',
+ 'TC': '65.255.48.0/20',
+ 'TD': '154.68.128.0/19',
+ 'TG': '196.168.0.0/14',
+ 'TH': '171.96.0.0/13',
+ 'TJ': '85.9.128.0/18',
+ 'TK': '27.96.24.0/21',
+ 'TL': '180.189.160.0/20',
+ 'TM': '95.85.96.0/19',
+ 'TN': '197.0.0.0/11',
+ 'TO': '175.176.144.0/21',
+ 'TR': '78.160.0.0/11',
+ 'TT': '186.44.0.0/15',
+ 'TV': '202.2.96.0/19',
+ 'TW': '120.96.0.0/11',
+ 'TZ': '156.156.0.0/14',
+ 'UA': '37.52.0.0/14',
+ 'UG': '102.80.0.0/13',
+ 'US': '6.0.0.0/8',
+ 'UY': '167.56.0.0/13',
+ 'UZ': '84.54.64.0/18',
+ 'VA': '212.77.0.0/19',
+ 'VC': '207.191.240.0/21',
+ 'VE': '186.88.0.0/13',
+ 'VG': '66.81.192.0/20',
+ 'VI': '146.226.0.0/16',
+ 'VN': '14.160.0.0/11',
+ 'VU': '202.80.32.0/20',
+ 'WF': '117.20.32.0/21',
+ 'WS': '202.4.32.0/19',
+ 'YE': '134.35.0.0/16',
+ 'YT': '41.242.116.0/22',
+ 'ZA': '41.0.0.0/11',
+ 'ZM': '102.144.0.0/13',
+ 'ZW': '102.177.192.0/18',
+ }
+
+ @classmethod
+ def random_ipv4(cls, code_or_block):
+ if len(code_or_block) == 2:
+ block = cls._country_ip_map.get(code_or_block.upper())
+ if not block:
+ return None
+ else:
+ block = code_or_block
+ addr, preflen = block.split('/')
+ addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+ addr_max = addr_min | (0xffffffff >> int(preflen))
+ return compat_str(socket.inet_ntoa(
+ compat_struct_pack('!L', random.randint(addr_min, addr_max))))
+
+
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
def __init__(self, proxies=None):
# Set default handlers
setattr(self, '%s_open' % type,
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
meth(r, proxy, type))
- return compat_urllib_request.ProxyHandler.__init__(self, proxies)
+ compat_urllib_request.ProxyHandler.__init__(self, proxies)
def proxy_open(self, req, proxy, type):
req_proxy = req.headers.get('Ytdl-request-proxy')
if proxy == '__noproxy__':
return None # No Proxy
+ if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+ req.add_header('Ytdl-socks-proxy', proxy)
+ # youtube-dl's http/https handlers do wrapping the socket with socks
+ return None
return compat_urllib_request.ProxyHandler.proxy_open(
self, req, proxy, type)
+
+
+# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
+# released into Public Domain
+# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
+
+def long_to_bytes(n, blocksize=0):
+ """long_to_bytes(n:long, blocksize:int) : string
+ Convert a long integer to a byte string.
+
+ If optional blocksize is given and greater than zero, pad the front of the
+ byte string with binary zeros so that the length is a multiple of
+ blocksize.
+ """
+ # after much testing, this algorithm was deemed to be the fastest
+ s = b''
+ n = int(n)
+ while n > 0:
+ s = compat_struct_pack('>I', n & 0xffffffff) + s
+ n = n >> 32
+ # strip off leading zeros
+ for i in range(len(s)):
+ if s[i] != b'\000'[0]:
+ break
+ else:
+ # only happens when n == 0
+ s = b'\000'
+ i = 0
+ s = s[i:]
+ # add back some pad bytes. this could be done more efficiently w.r.t. the
+ # de-padding being done above, but sigh...
+ if blocksize > 0 and len(s) % blocksize:
+ s = (blocksize - len(s) % blocksize) * b'\000' + s
+ return s
+
+
+def bytes_to_long(s):
+ """bytes_to_long(string) : long
+ Convert a byte string to a long integer.
+
+ This is (essentially) the inverse of long_to_bytes().
+ """
+ acc = 0
+ length = len(s)
+ if length % 4:
+ extra = (4 - length % 4)
+ s = b'\000' * extra + s
+ length = length + extra
+ for i in range(0, length, 4):
+ acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+ return acc
+
+
+def ohdave_rsa_encrypt(data, exponent, modulus):
+ '''
+ Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
+
+ Input:
+ data: data to encrypt, bytes-like object
+ exponent, modulus: parameter e and N of RSA algorithm, both integer
+ Output: hex string of encrypted data
+
+ Limitation: supports one block encryption only
+ '''
+
+ payload = int(binascii.hexlify(data[::-1]), 16)
+ encrypted = pow(payload, exponent, modulus)
+ return '%x' % encrypted
+
+
+def pkcs1pad(data, length):
+ """
+ Padding input data with PKCS#1 scheme
+
+ @param {int[]} data input data
+ @param {int} length target length
+ @returns {int[]} padded data
+ """
+ if len(data) > length - 11:
+ raise ValueError('Input data too long for PKCS#1 padding')
+
+ pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
+ return [0, 2] + pseudo_random + [0] + data
+
+
+def encode_base_n(num, n, table=None):
+ FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ if not table:
+ table = FULL_TABLE[:n]
+
+ if n > len(table):
+ raise ValueError('base %d exceeds table length %d' % (n, len(table)))
+
+ if num == 0:
+ return table[0]
+
+ ret = ''
+ while num:
+ ret = table[num % n] + ret
+ num = num // n
+ return ret
+
+
+def decode_packed_codes(code):
+ mobj = re.search(PACKED_CODES_RE, code)
+ obfucasted_code, base, count, symbols = mobj.groups()
+ base = int(base)
+ count = int(count)
+ symbols = symbols.split('|')
+ symbol_table = {}
+
+ while count:
+ count -= 1
+ base_n_count = encode_base_n(count, base)
+ symbol_table[base_n_count] = symbols[count] or base_n_count
+
+ return re.sub(
+ r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
+ obfucasted_code)
+
+
+def caesar(s, alphabet, shift):
+ if shift == 0:
+ return s
+ l = len(alphabet)
+ return ''.join(
+ alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
+ for c in s)
+
+
+def rot47(s):
+ return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
+
+
+def parse_m3u8_attributes(attrib):
+ info = {}
+ for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
+ if val.startswith('"'):
+ val = val[1:-1]
+ info[key] = val
+ return info
+
+
+def urshift(val, n):
+ return val >> n if val >= 0 else (val + 0x100000000) >> n
+
+
+# Based on png2str() written by @gdkchan and improved by @yokrysty
+# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
+def decode_png(png_data):
+ # Reference: https://www.w3.org/TR/PNG/
+ header = png_data[8:]
+
+ if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
+ raise IOError('Not a valid PNG file.')
+
+ int_map = {1: '>B', 2: '>H', 4: '>I'}
+ unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
+
+ chunks = []
+
+ while header:
+ length = unpack_integer(header[:4])
+ header = header[4:]
+
+ chunk_type = header[:4]
+ header = header[4:]
+
+ chunk_data = header[:length]
+ header = header[length:]
+
+ header = header[4:] # Skip CRC
+
+ chunks.append({
+ 'type': chunk_type,
+ 'length': length,
+ 'data': chunk_data
+ })
+
+ ihdr = chunks[0]['data']
+
+ width = unpack_integer(ihdr[:4])
+ height = unpack_integer(ihdr[4:8])
+
+ idat = b''
+
+ for chunk in chunks:
+ if chunk['type'] == b'IDAT':
+ idat += chunk['data']
+
+ if not idat:
+ raise IOError('Unable to read PNG data.')
+
+ decompressed_data = bytearray(zlib.decompress(idat))
+
+ stride = width * 3
+ pixels = []
+
+ def _get_pixel(idx):
+ x = idx % stride
+ y = idx // stride
+ return pixels[y][x]
+
+ for y in range(height):
+ basePos = y * (1 + stride)
+ filter_type = decompressed_data[basePos]
+
+ current_row = []
+
+ pixels.append(current_row)
+
+ for x in range(stride):
+ color = decompressed_data[1 + basePos + x]
+ basex = y * stride + x
+ left = 0
+ up = 0
+
+ if x > 2:
+ left = _get_pixel(basex - 3)
+ if y > 0:
+ up = _get_pixel(basex - stride)
+
+ if filter_type == 1: # Sub
+ color = (color + left) & 0xff
+ elif filter_type == 2: # Up
+ color = (color + up) & 0xff
+ elif filter_type == 3: # Average
+ color = (color + ((left + up) >> 1)) & 0xff
+ elif filter_type == 4: # Paeth
+ a = left
+ b = up
+ c = 0
+
+ if x > 2 and y > 0:
+ c = _get_pixel(basex - stride - 3)
+
+ p = a + b - c
+
+ pa = abs(p - a)
+ pb = abs(p - b)
+ pc = abs(p - c)
+
+ if pa <= pb and pa <= pc:
+ color = (color + a) & 0xff
+ elif pb <= pc:
+ color = (color + b) & 0xff
+ else:
+ color = (color + c) & 0xff
+
+ current_row.append(color)
+
+ return width, height, pixels
+
+
+def write_xattr(path, key, value):
+ # This mess below finds the best xattr tool for the job
+ try:
+ # try the pyxattr module...
+ import xattr
+
+ if hasattr(xattr, 'set'): # pyxattr
+ # Unicode arguments are not supported in python-pyxattr until
+ # version 0.5.0
+ # See https://github.com/ytdl-org/youtube-dl/issues/5498
+ pyxattr_required_version = '0.5.0'
+ if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+ # TODO: fallback to CLI tools
+ raise XAttrUnavailableError(
+ 'python-pyxattr is detected but is too old. '
+ 'youtube-dl requires %s or above while your version is %s. '
+ 'Falling back to other xattr implementations' % (
+ pyxattr_required_version, xattr.__version__))
+
+ setxattr = xattr.set
+ else: # xattr
+ setxattr = xattr.setxattr
+
+ try:
+ setxattr(path, key, value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+
+ except ImportError:
+ if compat_os_name == 'nt':
+ # Write xattrs to NTFS Alternate Data Streams:
+ # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+ assert ':' not in key
+ assert os.path.exists(path)
+
+ ads_fn = path + ':' + key
+ try:
+ with open(ads_fn, 'wb') as f:
+ f.write(value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ else:
+ user_has_setfattr = check_executable('setfattr', ['--version'])
+ user_has_xattr = check_executable('xattr', ['-h'])
+
+ if user_has_setfattr or user_has_xattr:
+
+ value = value.decode('utf-8')
+ if user_has_setfattr:
+ executable = 'setfattr'
+ opts = ['-n', key, '-v', value]
+ elif user_has_xattr:
+ executable = 'xattr'
+ opts = ['-w', key, value]
+
+ cmd = ([encodeFilename(executable, True)]
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(path, True)])
+
+ try:
+ p = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ stdout, stderr = p.communicate()
+ stderr = stderr.decode('utf-8', 'replace')
+ if p.returncode != 0:
+ raise XAttrMetadataError(p.returncode, stderr)
+
+ else:
+ # On Unix, and can't find pyxattr, setfattr, or xattr.
+ if sys.platform.startswith('linux'):
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'pyxattr' or 'xattr' "
+ "modules, or the GNU 'attr' package "
+ "(which contains the 'setfattr' tool).")
+ else:
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'xattr' module, "
+ "or the 'xattr' binary.")
+
+
+def random_birthday(year_field, month_field, day_field):
+ start_date = datetime.date(1950, 1, 1)
+ end_date = datetime.date(1995, 12, 31)
+ offset = random.randint(0, (end_date - start_date).days)
+ random_date = start_date + datetime.timedelta(offset)
+ return {
+ year_field: str(random_date.year),
+ month_field: str(random_date.month),
+ day_field: str(random_date.day),
+ }