Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import socket
  30 import ssl
  31 import subprocess
  32 import sys
  33 import tempfile
  34 import time
  35 import traceback
  36 import xml.etree.ElementTree
  37 import zlib
  38
  39 from .compat import (
  40     compat_HTMLParseError,
  41     compat_HTMLParser,
  42     compat_basestring,
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_ctypes_WINFUNCTYPE,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_http_client,
  51     compat_integer_types,
  52     compat_kwargs,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_parse_unquote_plus,
  64     compat_urllib_request,
  65     compat_urlparse,
  66     compat_xpath,
  67 )
  68
  69 from .socks import (
  70     ProxyType,
  71     sockssocket,
  72 )
  73
  74
  75 def register_socks_protocols():
  76     # "Register" SOCKS protocols
  77     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  78     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  79     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  80         if scheme not in compat_urlparse.uses_netloc:
  81             compat_urlparse.uses_netloc.append(scheme)
  82
  83
  84 # This is not clearly defined otherwise
  85 compiled_regex_type = type(re.compile(''))
  86
  87
  88 def random_user_agent():
  89     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  90     _CHROME_VERSIONS = (
  91         '74.0.3729.129',
  92         '76.0.3780.3',
  93         '76.0.3780.2',
  94         '74.0.3729.128',
  95         '76.0.3780.1',
  96         '76.0.3780.0',
  97         '75.0.3770.15',
  98         '74.0.3729.127',
  99         '74.0.3729.126',
 100         '76.0.3779.1',
 101         '76.0.3779.0',
 102         '75.0.3770.14',
 103         '74.0.3729.125',
 104         '76.0.3778.1',
 105         '76.0.3778.0',
 106         '75.0.3770.13',
 107         '74.0.3729.124',
 108         '74.0.3729.123',
 109         '73.0.3683.121',
 110         '76.0.3777.1',
 111         '76.0.3777.0',
 112         '75.0.3770.12',
 113         '74.0.3729.122',
 114         '76.0.3776.4',
 115         '75.0.3770.11',
 116         '74.0.3729.121',
 117         '76.0.3776.3',
 118         '76.0.3776.2',
 119         '73.0.3683.120',
 120         '74.0.3729.120',
 121         '74.0.3729.119',
 122         '74.0.3729.118',
 123         '76.0.3776.1',
 124         '76.0.3776.0',
 125         '76.0.3775.5',
 126         '75.0.3770.10',
 127         '74.0.3729.117',
 128         '76.0.3775.4',
 129         '76.0.3775.3',
 130         '74.0.3729.116',
 131         '75.0.3770.9',
 132         '76.0.3775.2',
 133         '76.0.3775.1',
 134         '76.0.3775.0',
 135         '75.0.3770.8',
 136         '74.0.3729.115',
 137         '74.0.3729.114',
 138         '76.0.3774.1',
 139         '76.0.3774.0',
 140         '75.0.3770.7',
 141         '74.0.3729.113',
 142         '74.0.3729.112',
 143         '74.0.3729.111',
 144         '76.0.3773.1',
 145         '76.0.3773.0',
 146         '75.0.3770.6',
 147         '74.0.3729.110',
 148         '74.0.3729.109',
 149         '76.0.3772.1',
 150         '76.0.3772.0',
 151         '75.0.3770.5',
 152         '74.0.3729.108',
 153         '74.0.3729.107',
 154         '76.0.3771.1',
 155         '76.0.3771.0',
 156         '75.0.3770.4',
 157         '74.0.3729.106',
 158         '74.0.3729.105',
 159         '75.0.3770.3',
 160         '74.0.3729.104',
 161         '74.0.3729.103',
 162         '74.0.3729.102',
 163         '75.0.3770.2',
 164         '74.0.3729.101',
 165         '75.0.3770.1',
 166         '75.0.3770.0',
 167         '74.0.3729.100',
 168         '75.0.3769.5',
 169         '75.0.3769.4',
 170         '74.0.3729.99',
 171         '75.0.3769.3',
 172         '75.0.3769.2',
 173         '75.0.3768.6',
 174         '74.0.3729.98',
 175         '75.0.3769.1',
 176         '75.0.3769.0',
 177         '74.0.3729.97',
 178         '73.0.3683.119',
 179         '73.0.3683.118',
 180         '74.0.3729.96',
 181         '75.0.3768.5',
 182         '75.0.3768.4',
 183         '75.0.3768.3',
 184         '75.0.3768.2',
 185         '74.0.3729.95',
 186         '74.0.3729.94',
 187         '75.0.3768.1',
 188         '75.0.3768.0',
 189         '74.0.3729.93',
 190         '74.0.3729.92',
 191         '73.0.3683.117',
 192         '74.0.3729.91',
 193         '75.0.3766.3',
 194         '74.0.3729.90',
 195         '75.0.3767.2',
 196         '75.0.3767.1',
 197         '75.0.3767.0',
 198         '74.0.3729.89',
 199         '73.0.3683.116',
 200         '75.0.3766.2',
 201         '74.0.3729.88',
 202         '75.0.3766.1',
 203         '75.0.3766.0',
 204         '74.0.3729.87',
 205         '73.0.3683.115',
 206         '74.0.3729.86',
 207         '75.0.3765.1',
 208         '75.0.3765.0',
 209         '74.0.3729.85',
 210         '73.0.3683.114',
 211         '74.0.3729.84',
 212         '75.0.3764.1',
 213         '75.0.3764.0',
 214         '74.0.3729.83',
 215         '73.0.3683.113',
 216         '75.0.3763.2',
 217         '75.0.3761.4',
 218         '74.0.3729.82',
 219         '75.0.3763.1',
 220         '75.0.3763.0',
 221         '74.0.3729.81',
 222         '73.0.3683.112',
 223         '75.0.3762.1',
 224         '75.0.3762.0',
 225         '74.0.3729.80',
 226         '75.0.3761.3',
 227         '74.0.3729.79',
 228         '73.0.3683.111',
 229         '75.0.3761.2',
 230         '74.0.3729.78',
 231         '74.0.3729.77',
 232         '75.0.3761.1',
 233         '75.0.3761.0',
 234         '73.0.3683.110',
 235         '74.0.3729.76',
 236         '74.0.3729.75',
 237         '75.0.3760.0',
 238         '74.0.3729.74',
 239         '75.0.3759.8',
 240         '75.0.3759.7',
 241         '75.0.3759.6',
 242         '74.0.3729.73',
 243         '75.0.3759.5',
 244         '74.0.3729.72',
 245         '73.0.3683.109',
 246         '75.0.3759.4',
 247         '75.0.3759.3',
 248         '74.0.3729.71',
 249         '75.0.3759.2',
 250         '74.0.3729.70',
 251         '73.0.3683.108',
 252         '74.0.3729.69',
 253         '75.0.3759.1',
 254         '75.0.3759.0',
 255         '74.0.3729.68',
 256         '73.0.3683.107',
 257         '74.0.3729.67',
 258         '75.0.3758.1',
 259         '75.0.3758.0',
 260         '74.0.3729.66',
 261         '73.0.3683.106',
 262         '74.0.3729.65',
 263         '75.0.3757.1',
 264         '75.0.3757.0',
 265         '74.0.3729.64',
 266         '73.0.3683.105',
 267         '74.0.3729.63',
 268         '75.0.3756.1',
 269         '75.0.3756.0',
 270         '74.0.3729.62',
 271         '73.0.3683.104',
 272         '75.0.3755.3',
 273         '75.0.3755.2',
 274         '73.0.3683.103',
 275         '75.0.3755.1',
 276         '75.0.3755.0',
 277         '74.0.3729.61',
 278         '73.0.3683.102',
 279         '74.0.3729.60',
 280         '75.0.3754.2',
 281         '74.0.3729.59',
 282         '75.0.3753.4',
 283         '74.0.3729.58',
 284         '75.0.3754.1',
 285         '75.0.3754.0',
 286         '74.0.3729.57',
 287         '73.0.3683.101',
 288         '75.0.3753.3',
 289         '75.0.3752.2',
 290         '75.0.3753.2',
 291         '74.0.3729.56',
 292         '75.0.3753.1',
 293         '75.0.3753.0',
 294         '74.0.3729.55',
 295         '73.0.3683.100',
 296         '74.0.3729.54',
 297         '75.0.3752.1',
 298         '75.0.3752.0',
 299         '74.0.3729.53',
 300         '73.0.3683.99',
 301         '74.0.3729.52',
 302         '75.0.3751.1',
 303         '75.0.3751.0',
 304         '74.0.3729.51',
 305         '73.0.3683.98',
 306         '74.0.3729.50',
 307         '75.0.3750.0',
 308         '74.0.3729.49',
 309         '74.0.3729.48',
 310         '74.0.3729.47',
 311         '75.0.3749.3',
 312         '74.0.3729.46',
 313         '73.0.3683.97',
 314         '75.0.3749.2',
 315         '74.0.3729.45',
 316         '75.0.3749.1',
 317         '75.0.3749.0',
 318         '74.0.3729.44',
 319         '73.0.3683.96',
 320         '74.0.3729.43',
 321         '74.0.3729.42',
 322         '75.0.3748.1',
 323         '75.0.3748.0',
 324         '74.0.3729.41',
 325         '75.0.3747.1',
 326         '73.0.3683.95',
 327         '75.0.3746.4',
 328         '74.0.3729.40',
 329         '74.0.3729.39',
 330         '75.0.3747.0',
 331         '75.0.3746.3',
 332         '75.0.3746.2',
 333         '74.0.3729.38',
 334         '75.0.3746.1',
 335         '75.0.3746.0',
 336         '74.0.3729.37',
 337         '73.0.3683.94',
 338         '75.0.3745.5',
 339         '75.0.3745.4',
 340         '75.0.3745.3',
 341         '75.0.3745.2',
 342         '74.0.3729.36',
 343         '75.0.3745.1',
 344         '75.0.3745.0',
 345         '75.0.3744.2',
 346         '74.0.3729.35',
 347         '73.0.3683.93',
 348         '74.0.3729.34',
 349         '75.0.3744.1',
 350         '75.0.3744.0',
 351         '74.0.3729.33',
 352         '73.0.3683.92',
 353         '74.0.3729.32',
 354         '74.0.3729.31',
 355         '73.0.3683.91',
 356         '75.0.3741.2',
 357         '75.0.3740.5',
 358         '74.0.3729.30',
 359         '75.0.3741.1',
 360         '75.0.3741.0',
 361         '74.0.3729.29',
 362         '75.0.3740.4',
 363         '73.0.3683.90',
 364         '74.0.3729.28',
 365         '75.0.3740.3',
 366         '73.0.3683.89',
 367         '75.0.3740.2',
 368         '74.0.3729.27',
 369         '75.0.3740.1',
 370         '75.0.3740.0',
 371         '74.0.3729.26',
 372         '73.0.3683.88',
 373         '73.0.3683.87',
 374         '74.0.3729.25',
 375         '75.0.3739.1',
 376         '75.0.3739.0',
 377         '73.0.3683.86',
 378         '74.0.3729.24',
 379         '73.0.3683.85',
 380         '75.0.3738.4',
 381         '75.0.3738.3',
 382         '75.0.3738.2',
 383         '75.0.3738.1',
 384         '75.0.3738.0',
 385         '74.0.3729.23',
 386         '73.0.3683.84',
 387         '74.0.3729.22',
 388         '74.0.3729.21',
 389         '75.0.3737.1',
 390         '75.0.3737.0',
 391         '74.0.3729.20',
 392         '73.0.3683.83',
 393         '74.0.3729.19',
 394         '75.0.3736.1',
 395         '75.0.3736.0',
 396         '74.0.3729.18',
 397         '73.0.3683.82',
 398         '74.0.3729.17',
 399         '75.0.3735.1',
 400         '75.0.3735.0',
 401         '74.0.3729.16',
 402         '73.0.3683.81',
 403         '75.0.3734.1',
 404         '75.0.3734.0',
 405         '74.0.3729.15',
 406         '73.0.3683.80',
 407         '74.0.3729.14',
 408         '75.0.3733.1',
 409         '75.0.3733.0',
 410         '75.0.3732.1',
 411         '74.0.3729.13',
 412         '74.0.3729.12',
 413         '73.0.3683.79',
 414         '74.0.3729.11',
 415         '75.0.3732.0',
 416         '74.0.3729.10',
 417         '73.0.3683.78',
 418         '74.0.3729.9',
 419         '74.0.3729.8',
 420         '74.0.3729.7',
 421         '75.0.3731.3',
 422         '75.0.3731.2',
 423         '75.0.3731.0',
 424         '74.0.3729.6',
 425         '73.0.3683.77',
 426         '73.0.3683.76',
 427         '75.0.3730.5',
 428         '75.0.3730.4',
 429         '73.0.3683.75',
 430         '74.0.3729.5',
 431         '73.0.3683.74',
 432         '75.0.3730.3',
 433         '75.0.3730.2',
 434         '74.0.3729.4',
 435         '73.0.3683.73',
 436         '73.0.3683.72',
 437         '75.0.3730.1',
 438         '75.0.3730.0',
 439         '74.0.3729.3',
 440         '73.0.3683.71',
 441         '74.0.3729.2',
 442         '73.0.3683.70',
 443         '74.0.3729.1',
 444         '74.0.3729.0',
 445         '74.0.3726.4',
 446         '73.0.3683.69',
 447         '74.0.3726.3',
 448         '74.0.3728.0',
 449         '74.0.3726.2',
 450         '73.0.3683.68',
 451         '74.0.3726.1',
 452         '74.0.3726.0',
 453         '74.0.3725.4',
 454         '73.0.3683.67',
 455         '73.0.3683.66',
 456         '74.0.3725.3',
 457         '74.0.3725.2',
 458         '74.0.3725.1',
 459         '74.0.3724.8',
 460         '74.0.3725.0',
 461         '73.0.3683.65',
 462         '74.0.3724.7',
 463         '74.0.3724.6',
 464         '74.0.3724.5',
 465         '74.0.3724.4',
 466         '74.0.3724.3',
 467         '74.0.3724.2',
 468         '74.0.3724.1',
 469         '74.0.3724.0',
 470         '73.0.3683.64',
 471         '74.0.3723.1',
 472         '74.0.3723.0',
 473         '73.0.3683.63',
 474         '74.0.3722.1',
 475         '74.0.3722.0',
 476         '73.0.3683.62',
 477         '74.0.3718.9',
 478         '74.0.3702.3',
 479         '74.0.3721.3',
 480         '74.0.3721.2',
 481         '74.0.3721.1',
 482         '74.0.3721.0',
 483         '74.0.3720.6',
 484         '73.0.3683.61',
 485         '72.0.3626.122',
 486         '73.0.3683.60',
 487         '74.0.3720.5',
 488         '72.0.3626.121',
 489         '74.0.3718.8',
 490         '74.0.3720.4',
 491         '74.0.3720.3',
 492         '74.0.3718.7',
 493         '74.0.3720.2',
 494         '74.0.3720.1',
 495         '74.0.3720.0',
 496         '74.0.3718.6',
 497         '74.0.3719.5',
 498         '73.0.3683.59',
 499         '74.0.3718.5',
 500         '74.0.3718.4',
 501         '74.0.3719.4',
 502         '74.0.3719.3',
 503         '74.0.3719.2',
 504         '74.0.3719.1',
 505         '73.0.3683.58',
 506         '74.0.3719.0',
 507         '73.0.3683.57',
 508         '73.0.3683.56',
 509         '74.0.3718.3',
 510         '73.0.3683.55',
 511         '74.0.3718.2',
 512         '74.0.3718.1',
 513         '74.0.3718.0',
 514         '73.0.3683.54',
 515         '74.0.3717.2',
 516         '73.0.3683.53',
 517         '74.0.3717.1',
 518         '74.0.3717.0',
 519         '73.0.3683.52',
 520         '74.0.3716.1',
 521         '74.0.3716.0',
 522         '73.0.3683.51',
 523         '74.0.3715.1',
 524         '74.0.3715.0',
 525         '73.0.3683.50',
 526         '74.0.3711.2',
 527         '74.0.3714.2',
 528         '74.0.3713.3',
 529         '74.0.3714.1',
 530         '74.0.3714.0',
 531         '73.0.3683.49',
 532         '74.0.3713.1',
 533         '74.0.3713.0',
 534         '72.0.3626.120',
 535         '73.0.3683.48',
 536         '74.0.3712.2',
 537         '74.0.3712.1',
 538         '74.0.3712.0',
 539         '73.0.3683.47',
 540         '72.0.3626.119',
 541         '73.0.3683.46',
 542         '74.0.3710.2',
 543         '72.0.3626.118',
 544         '74.0.3711.1',
 545         '74.0.3711.0',
 546         '73.0.3683.45',
 547         '72.0.3626.117',
 548         '74.0.3710.1',
 549         '74.0.3710.0',
 550         '73.0.3683.44',
 551         '72.0.3626.116',
 552         '74.0.3709.1',
 553         '74.0.3709.0',
 554         '74.0.3704.9',
 555         '73.0.3683.43',
 556         '72.0.3626.115',
 557         '74.0.3704.8',
 558         '74.0.3704.7',
 559         '74.0.3708.0',
 560         '74.0.3706.7',
 561         '74.0.3704.6',
 562         '73.0.3683.42',
 563         '72.0.3626.114',
 564         '74.0.3706.6',
 565         '72.0.3626.113',
 566         '74.0.3704.5',
 567         '74.0.3706.5',
 568         '74.0.3706.4',
 569         '74.0.3706.3',
 570         '74.0.3706.2',
 571         '74.0.3706.1',
 572         '74.0.3706.0',
 573         '73.0.3683.41',
 574         '72.0.3626.112',
 575         '74.0.3705.1',
 576         '74.0.3705.0',
 577         '73.0.3683.40',
 578         '72.0.3626.111',
 579         '73.0.3683.39',
 580         '74.0.3704.4',
 581         '73.0.3683.38',
 582         '74.0.3704.3',
 583         '74.0.3704.2',
 584         '74.0.3704.1',
 585         '74.0.3704.0',
 586         '73.0.3683.37',
 587         '72.0.3626.110',
 588         '72.0.3626.109',
 589         '74.0.3703.3',
 590         '74.0.3703.2',
 591         '73.0.3683.36',
 592         '74.0.3703.1',
 593         '74.0.3703.0',
 594         '73.0.3683.35',
 595         '72.0.3626.108',
 596         '74.0.3702.2',
 597         '74.0.3699.3',
 598         '74.0.3702.1',
 599         '74.0.3702.0',
 600         '73.0.3683.34',
 601         '72.0.3626.107',
 602         '73.0.3683.33',
 603         '74.0.3701.1',
 604         '74.0.3701.0',
 605         '73.0.3683.32',
 606         '73.0.3683.31',
 607         '72.0.3626.105',
 608         '74.0.3700.1',
 609         '74.0.3700.0',
 610         '73.0.3683.29',
 611         '72.0.3626.103',
 612         '74.0.3699.2',
 613         '74.0.3699.1',
 614         '74.0.3699.0',
 615         '73.0.3683.28',
 616         '72.0.3626.102',
 617         '73.0.3683.27',
 618         '73.0.3683.26',
 619         '74.0.3698.0',
 620         '74.0.3696.2',
 621         '72.0.3626.101',
 622         '73.0.3683.25',
 623         '74.0.3696.1',
 624         '74.0.3696.0',
 625         '74.0.3694.8',
 626         '72.0.3626.100',
 627         '74.0.3694.7',
 628         '74.0.3694.6',
 629         '74.0.3694.5',
 630         '74.0.3694.4',
 631         '72.0.3626.99',
 632         '72.0.3626.98',
 633         '74.0.3694.3',
 634         '73.0.3683.24',
 635         '72.0.3626.97',
 636         '72.0.3626.96',
 637         '72.0.3626.95',
 638         '73.0.3683.23',
 639         '72.0.3626.94',
 640         '73.0.3683.22',
 641         '73.0.3683.21',
 642         '72.0.3626.93',
 643         '74.0.3694.2',
 644         '72.0.3626.92',
 645         '74.0.3694.1',
 646         '74.0.3694.0',
 647         '74.0.3693.6',
 648         '73.0.3683.20',
 649         '72.0.3626.91',
 650         '74.0.3693.5',
 651         '74.0.3693.4',
 652         '74.0.3693.3',
 653         '74.0.3693.2',
 654         '73.0.3683.19',
 655         '74.0.3693.1',
 656         '74.0.3693.0',
 657         '73.0.3683.18',
 658         '72.0.3626.90',
 659         '74.0.3692.1',
 660         '74.0.3692.0',
 661         '73.0.3683.17',
 662         '72.0.3626.89',
 663         '74.0.3687.3',
 664         '74.0.3691.1',
 665         '74.0.3691.0',
 666         '73.0.3683.16',
 667         '72.0.3626.88',
 668         '72.0.3626.87',
 669         '73.0.3683.15',
 670         '74.0.3690.1',
 671         '74.0.3690.0',
 672         '73.0.3683.14',
 673         '72.0.3626.86',
 674         '73.0.3683.13',
 675         '73.0.3683.12',
 676         '74.0.3689.1',
 677         '74.0.3689.0',
 678         '73.0.3683.11',
 679         '72.0.3626.85',
 680         '73.0.3683.10',
 681         '72.0.3626.84',
 682         '73.0.3683.9',
 683         '74.0.3688.1',
 684         '74.0.3688.0',
 685         '73.0.3683.8',
 686         '72.0.3626.83',
 687         '74.0.3687.2',
 688         '74.0.3687.1',
 689         '74.0.3687.0',
 690         '73.0.3683.7',
 691         '72.0.3626.82',
 692         '74.0.3686.4',
 693         '72.0.3626.81',
 694         '74.0.3686.3',
 695         '74.0.3686.2',
 696         '74.0.3686.1',
 697         '74.0.3686.0',
 698         '73.0.3683.6',
 699         '72.0.3626.80',
 700         '74.0.3685.1',
 701         '74.0.3685.0',
 702         '73.0.3683.5',
 703         '72.0.3626.79',
 704         '74.0.3684.1',
 705         '74.0.3684.0',
 706         '73.0.3683.4',
 707         '72.0.3626.78',
 708         '72.0.3626.77',
 709         '73.0.3683.3',
 710         '73.0.3683.2',
 711         '72.0.3626.76',
 712         '73.0.3683.1',
 713         '73.0.3683.0',
 714         '72.0.3626.75',
 715         '71.0.3578.141',
 716         '73.0.3682.1',
 717         '73.0.3682.0',
 718         '72.0.3626.74',
 719         '71.0.3578.140',
 720         '73.0.3681.4',
 721         '73.0.3681.3',
 722         '73.0.3681.2',
 723         '73.0.3681.1',
 724         '73.0.3681.0',
 725         '72.0.3626.73',
 726         '71.0.3578.139',
 727         '72.0.3626.72',
 728         '72.0.3626.71',
 729         '73.0.3680.1',
 730         '73.0.3680.0',
 731         '72.0.3626.70',
 732         '71.0.3578.138',
 733         '73.0.3678.2',
 734         '73.0.3679.1',
 735         '73.0.3679.0',
 736         '72.0.3626.69',
 737         '71.0.3578.137',
 738         '73.0.3678.1',
 739         '73.0.3678.0',
 740         '71.0.3578.136',
 741         '73.0.3677.1',
 742         '73.0.3677.0',
 743         '72.0.3626.68',
 744         '72.0.3626.67',
 745         '71.0.3578.135',
 746         '73.0.3676.1',
 747         '73.0.3676.0',
 748         '73.0.3674.2',
 749         '72.0.3626.66',
 750         '71.0.3578.134',
 751         '73.0.3674.1',
 752         '73.0.3674.0',
 753         '72.0.3626.65',
 754         '71.0.3578.133',
 755         '73.0.3673.2',
 756         '73.0.3673.1',
 757         '73.0.3673.0',
 758         '72.0.3626.64',
 759         '71.0.3578.132',
 760         '72.0.3626.63',
 761         '72.0.3626.62',
 762         '72.0.3626.61',
 763         '72.0.3626.60',
 764         '73.0.3672.1',
 765         '73.0.3672.0',
 766         '72.0.3626.59',
 767         '71.0.3578.131',
 768         '73.0.3671.3',
 769         '73.0.3671.2',
 770         '73.0.3671.1',
 771         '73.0.3671.0',
 772         '72.0.3626.58',
 773         '71.0.3578.130',
 774         '73.0.3670.1',
 775         '73.0.3670.0',
 776         '72.0.3626.57',
 777         '71.0.3578.129',
 778         '73.0.3669.1',
 779         '73.0.3669.0',
 780         '72.0.3626.56',
 781         '71.0.3578.128',
 782         '73.0.3668.2',
 783         '73.0.3668.1',
 784         '73.0.3668.0',
 785         '72.0.3626.55',
 786         '71.0.3578.127',
 787         '73.0.3667.2',
 788         '73.0.3667.1',
 789         '73.0.3667.0',
 790         '72.0.3626.54',
 791         '71.0.3578.126',
 792         '73.0.3666.1',
 793         '73.0.3666.0',
 794         '72.0.3626.53',
 795         '71.0.3578.125',
 796         '73.0.3665.4',
 797         '73.0.3665.3',
 798         '72.0.3626.52',
 799         '73.0.3665.2',
 800         '73.0.3664.4',
 801         '73.0.3665.1',
 802         '73.0.3665.0',
 803         '72.0.3626.51',
 804         '71.0.3578.124',
 805         '72.0.3626.50',
 806         '73.0.3664.3',
 807         '73.0.3664.2',
 808         '73.0.3664.1',
 809         '73.0.3664.0',
 810         '73.0.3663.2',
 811         '72.0.3626.49',
 812         '71.0.3578.123',
 813         '73.0.3663.1',
 814         '73.0.3663.0',
 815         '72.0.3626.48',
 816         '71.0.3578.122',
 817         '73.0.3662.1',
 818         '73.0.3662.0',
 819         '72.0.3626.47',
 820         '71.0.3578.121',
 821         '73.0.3661.1',
 822         '72.0.3626.46',
 823         '73.0.3661.0',
 824         '72.0.3626.45',
 825         '71.0.3578.120',
 826         '73.0.3660.2',
 827         '73.0.3660.1',
 828         '73.0.3660.0',
 829         '72.0.3626.44',
 830         '71.0.3578.119',
 831         '73.0.3659.1',
 832         '73.0.3659.0',
 833         '72.0.3626.43',
 834         '71.0.3578.118',
 835         '73.0.3658.1',
 836         '73.0.3658.0',
 837         '72.0.3626.42',
 838         '71.0.3578.117',
 839         '73.0.3657.1',
 840         '73.0.3657.0',
 841         '72.0.3626.41',
 842         '71.0.3578.116',
 843         '73.0.3656.1',
 844         '73.0.3656.0',
 845         '72.0.3626.40',
 846         '71.0.3578.115',
 847         '73.0.3655.1',
 848         '73.0.3655.0',
 849         '72.0.3626.39',
 850         '71.0.3578.114',
 851         '73.0.3654.1',
 852         '73.0.3654.0',
 853         '72.0.3626.38',
 854         '71.0.3578.113',
 855         '73.0.3653.1',
 856         '73.0.3653.0',
 857         '72.0.3626.37',
 858         '71.0.3578.112',
 859         '73.0.3652.1',
 860         '73.0.3652.0',
 861         '72.0.3626.36',
 862         '71.0.3578.111',
 863         '73.0.3651.1',
 864         '73.0.3651.0',
 865         '72.0.3626.35',
 866         '71.0.3578.110',
 867         '73.0.3650.1',
 868         '73.0.3650.0',
 869         '72.0.3626.34',
 870         '71.0.3578.109',
 871         '73.0.3649.1',
 872         '73.0.3649.0',
 873         '72.0.3626.33',
 874         '71.0.3578.108',
 875         '73.0.3648.2',
 876         '73.0.3648.1',
 877         '73.0.3648.0',
 878         '72.0.3626.32',
 879         '71.0.3578.107',
 880         '73.0.3647.2',
 881         '73.0.3647.1',
 882         '73.0.3647.0',
 883         '72.0.3626.31',
 884         '71.0.3578.106',
 885         '73.0.3635.3',
 886         '73.0.3646.2',
 887         '73.0.3646.1',
 888         '73.0.3646.0',
 889         '72.0.3626.30',
 890         '71.0.3578.105',
 891         '72.0.3626.29',
 892         '73.0.3645.2',
 893         '73.0.3645.1',
 894         '73.0.3645.0',
 895         '72.0.3626.28',
 896         '71.0.3578.104',
 897         '72.0.3626.27',
 898         '72.0.3626.26',
 899         '72.0.3626.25',
 900         '72.0.3626.24',
 901         '73.0.3644.0',
 902         '73.0.3643.2',
 903         '72.0.3626.23',
 904         '71.0.3578.103',
 905         '73.0.3643.1',
 906         '73.0.3643.0',
 907         '72.0.3626.22',
 908         '71.0.3578.102',
 909         '73.0.3642.1',
 910         '73.0.3642.0',
 911         '72.0.3626.21',
 912         '71.0.3578.101',
 913         '73.0.3641.1',
 914         '73.0.3641.0',
 915         '72.0.3626.20',
 916         '71.0.3578.100',
 917         '72.0.3626.19',
 918         '73.0.3640.1',
 919         '73.0.3640.0',
 920         '72.0.3626.18',
 921         '73.0.3639.1',
 922         '71.0.3578.99',
 923         '73.0.3639.0',
 924         '72.0.3626.17',
 925         '73.0.3638.2',
 926         '72.0.3626.16',
 927         '73.0.3638.1',
 928         '73.0.3638.0',
 929         '72.0.3626.15',
 930         '71.0.3578.98',
 931         '73.0.3635.2',
 932         '71.0.3578.97',
 933         '73.0.3637.1',
 934         '73.0.3637.0',
 935         '72.0.3626.14',
 936         '71.0.3578.96',
 937         '71.0.3578.95',
 938         '72.0.3626.13',
 939         '71.0.3578.94',
 940         '73.0.3636.2',
 941         '71.0.3578.93',
 942         '73.0.3636.1',
 943         '73.0.3636.0',
 944         '72.0.3626.12',
 945         '71.0.3578.92',
 946         '73.0.3635.1',
 947         '73.0.3635.0',
 948         '72.0.3626.11',
 949         '71.0.3578.91',
 950         '73.0.3634.2',
 951         '73.0.3634.1',
 952         '73.0.3634.0',
 953         '72.0.3626.10',
 954         '71.0.3578.90',
 955         '71.0.3578.89',
 956         '73.0.3633.2',
 957         '73.0.3633.1',
 958         '73.0.3633.0',
 959         '72.0.3610.4',
 960         '72.0.3626.9',
 961         '71.0.3578.88',
 962         '73.0.3632.5',
 963         '73.0.3632.4',
 964         '73.0.3632.3',
 965         '73.0.3632.2',
 966         '73.0.3632.1',
 967         '73.0.3632.0',
 968         '72.0.3626.8',
 969         '71.0.3578.87',
 970         '73.0.3631.2',
 971         '73.0.3631.1',
 972         '73.0.3631.0',
 973         '72.0.3626.7',
 974         '71.0.3578.86',
 975         '72.0.3626.6',
 976         '73.0.3630.1',
 977         '73.0.3630.0',
 978         '72.0.3626.5',
 979         '71.0.3578.85',
 980         '72.0.3626.4',
 981         '73.0.3628.3',
 982         '73.0.3628.2',
 983         '73.0.3629.1',
 984         '73.0.3629.0',
 985         '72.0.3626.3',
 986         '71.0.3578.84',
 987         '73.0.3628.1',
 988         '73.0.3628.0',
 989         '71.0.3578.83',
 990         '73.0.3627.1',
 991         '73.0.3627.0',
 992         '72.0.3626.2',
 993         '71.0.3578.82',
 994         '71.0.3578.81',
 995         '71.0.3578.80',
 996         '72.0.3626.1',
 997         '72.0.3626.0',
 998         '71.0.3578.79',
 999         '70.0.3538.124',
1000         '71.0.3578.78',
1001         '72.0.3623.4',
1002         '72.0.3625.2',
1003         '72.0.3625.1',
1004         '72.0.3625.0',
1005         '71.0.3578.77',
1006         '70.0.3538.123',
1007         '72.0.3624.4',
1008         '72.0.3624.3',
1009         '72.0.3624.2',
1010         '71.0.3578.76',
1011         '72.0.3624.1',
1012         '72.0.3624.0',
1013         '72.0.3623.3',
1014         '71.0.3578.75',
1015         '70.0.3538.122',
1016         '71.0.3578.74',
1017         '72.0.3623.2',
1018         '72.0.3610.3',
1019         '72.0.3623.1',
1020         '72.0.3623.0',
1021         '72.0.3622.3',
1022         '72.0.3622.2',
1023         '71.0.3578.73',
1024         '70.0.3538.121',
1025         '72.0.3622.1',
1026         '72.0.3622.0',
1027         '71.0.3578.72',
1028         '70.0.3538.120',
1029         '72.0.3621.1',
1030         '72.0.3621.0',
1031         '71.0.3578.71',
1032         '70.0.3538.119',
1033         '72.0.3620.1',
1034         '72.0.3620.0',
1035         '71.0.3578.70',
1036         '70.0.3538.118',
1037         '71.0.3578.69',
1038         '72.0.3619.1',
1039         '72.0.3619.0',
1040         '71.0.3578.68',
1041         '70.0.3538.117',
1042         '71.0.3578.67',
1043         '72.0.3618.1',
1044         '72.0.3618.0',
1045         '71.0.3578.66',
1046         '70.0.3538.116',
1047         '72.0.3617.1',
1048         '72.0.3617.0',
1049         '71.0.3578.65',
1050         '70.0.3538.115',
1051         '72.0.3602.3',
1052         '71.0.3578.64',
1053         '72.0.3616.1',
1054         '72.0.3616.0',
1055         '71.0.3578.63',
1056         '70.0.3538.114',
1057         '71.0.3578.62',
1058         '72.0.3615.1',
1059         '72.0.3615.0',
1060         '71.0.3578.61',
1061         '70.0.3538.113',
1062         '72.0.3614.1',
1063         '72.0.3614.0',
1064         '71.0.3578.60',
1065         '70.0.3538.112',
1066         '72.0.3613.1',
1067         '72.0.3613.0',
1068         '71.0.3578.59',
1069         '70.0.3538.111',
1070         '72.0.3612.2',
1071         '72.0.3612.1',
1072         '72.0.3612.0',
1073         '70.0.3538.110',
1074         '71.0.3578.58',
1075         '70.0.3538.109',
1076         '72.0.3611.2',
1077         '72.0.3611.1',
1078         '72.0.3611.0',
1079         '71.0.3578.57',
1080         '70.0.3538.108',
1081         '72.0.3610.2',
1082         '71.0.3578.56',
1083         '71.0.3578.55',
1084         '72.0.3610.1',
1085         '72.0.3610.0',
1086         '71.0.3578.54',
1087         '70.0.3538.107',
1088         '71.0.3578.53',
1089         '72.0.3609.3',
1090         '71.0.3578.52',
1091         '72.0.3609.2',
1092         '71.0.3578.51',
1093         '72.0.3608.5',
1094         '72.0.3609.1',
1095         '72.0.3609.0',
1096         '71.0.3578.50',
1097         '70.0.3538.106',
1098         '72.0.3608.4',
1099         '72.0.3608.3',
1100         '72.0.3608.2',
1101         '71.0.3578.49',
1102         '72.0.3608.1',
1103         '72.0.3608.0',
1104         '70.0.3538.105',
1105         '71.0.3578.48',
1106         '72.0.3607.1',
1107         '72.0.3607.0',
1108         '71.0.3578.47',
1109         '70.0.3538.104',
1110         '72.0.3606.2',
1111         '72.0.3606.1',
1112         '72.0.3606.0',
1113         '71.0.3578.46',
1114         '70.0.3538.103',
1115         '70.0.3538.102',
1116         '72.0.3605.3',
1117         '72.0.3605.2',
1118         '72.0.3605.1',
1119         '72.0.3605.0',
1120         '71.0.3578.45',
1121         '70.0.3538.101',
1122         '71.0.3578.44',
1123         '71.0.3578.43',
1124         '70.0.3538.100',
1125         '70.0.3538.99',
1126         '71.0.3578.42',
1127         '72.0.3604.1',
1128         '72.0.3604.0',
1129         '71.0.3578.41',
1130         '70.0.3538.98',
1131         '71.0.3578.40',
1132         '72.0.3603.2',
1133         '72.0.3603.1',
1134         '72.0.3603.0',
1135         '71.0.3578.39',
1136         '70.0.3538.97',
1137         '72.0.3602.2',
1138         '71.0.3578.38',
1139         '71.0.3578.37',
1140         '72.0.3602.1',
1141         '72.0.3602.0',
1142         '71.0.3578.36',
1143         '70.0.3538.96',
1144         '72.0.3601.1',
1145         '72.0.3601.0',
1146         '71.0.3578.35',
1147         '70.0.3538.95',
1148         '72.0.3600.1',
1149         '72.0.3600.0',
1150         '71.0.3578.34',
1151         '70.0.3538.94',
1152         '72.0.3599.3',
1153         '72.0.3599.2',
1154         '72.0.3599.1',
1155         '72.0.3599.0',
1156         '71.0.3578.33',
1157         '70.0.3538.93',
1158         '72.0.3598.1',
1159         '72.0.3598.0',
1160         '71.0.3578.32',
1161         '70.0.3538.87',
1162         '72.0.3597.1',
1163         '72.0.3597.0',
1164         '72.0.3596.2',
1165         '71.0.3578.31',
1166         '70.0.3538.86',
1167         '71.0.3578.30',
1168         '71.0.3578.29',
1169         '72.0.3596.1',
1170         '72.0.3596.0',
1171         '71.0.3578.28',
1172         '70.0.3538.85',
1173         '72.0.3595.2',
1174         '72.0.3591.3',
1175         '72.0.3595.1',
1176         '72.0.3595.0',
1177         '71.0.3578.27',
1178         '70.0.3538.84',
1179         '72.0.3594.1',
1180         '72.0.3594.0',
1181         '71.0.3578.26',
1182         '70.0.3538.83',
1183         '72.0.3593.2',
1184         '72.0.3593.1',
1185         '72.0.3593.0',
1186         '71.0.3578.25',
1187         '70.0.3538.82',
1188         '72.0.3589.3',
1189         '72.0.3592.2',
1190         '72.0.3592.1',
1191         '72.0.3592.0',
1192         '71.0.3578.24',
1193         '72.0.3589.2',
1194         '70.0.3538.81',
1195         '70.0.3538.80',
1196         '72.0.3591.2',
1197         '72.0.3591.1',
1198         '72.0.3591.0',
1199         '71.0.3578.23',
1200         '70.0.3538.79',
1201         '71.0.3578.22',
1202         '72.0.3590.1',
1203         '72.0.3590.0',
1204         '71.0.3578.21',
1205         '70.0.3538.78',
1206         '70.0.3538.77',
1207         '72.0.3589.1',
1208         '72.0.3589.0',
1209         '71.0.3578.20',
1210         '70.0.3538.76',
1211         '71.0.3578.19',
1212         '70.0.3538.75',
1213         '72.0.3588.1',
1214         '72.0.3588.0',
1215         '71.0.3578.18',
1216         '70.0.3538.74',
1217         '72.0.3586.2',
1218         '72.0.3587.0',
1219         '71.0.3578.17',
1220         '70.0.3538.73',
1221         '72.0.3586.1',
1222         '72.0.3586.0',
1223         '71.0.3578.16',
1224         '70.0.3538.72',
1225         '72.0.3585.1',
1226         '72.0.3585.0',
1227         '71.0.3578.15',
1228         '70.0.3538.71',
1229         '71.0.3578.14',
1230         '72.0.3584.1',
1231         '72.0.3584.0',
1232         '71.0.3578.13',
1233         '70.0.3538.70',
1234         '72.0.3583.2',
1235         '71.0.3578.12',
1236         '72.0.3583.1',
1237         '72.0.3583.0',
1238         '71.0.3578.11',
1239         '70.0.3538.69',
1240         '71.0.3578.10',
1241         '72.0.3582.0',
1242         '72.0.3581.4',
1243         '71.0.3578.9',
1244         '70.0.3538.67',
1245         '72.0.3581.3',
1246         '72.0.3581.2',
1247         '72.0.3581.1',
1248         '72.0.3581.0',
1249         '71.0.3578.8',
1250         '70.0.3538.66',
1251         '72.0.3580.1',
1252         '72.0.3580.0',
1253         '71.0.3578.7',
1254         '70.0.3538.65',
1255         '71.0.3578.6',
1256         '72.0.3579.1',
1257         '72.0.3579.0',
1258         '71.0.3578.5',
1259         '70.0.3538.64',
1260         '71.0.3578.4',
1261         '71.0.3578.3',
1262         '71.0.3578.2',
1263         '71.0.3578.1',
1264         '71.0.3578.0',
1265         '70.0.3538.63',
1266         '69.0.3497.128',
1267         '70.0.3538.62',
1268         '70.0.3538.61',
1269         '70.0.3538.60',
1270         '70.0.3538.59',
1271         '71.0.3577.1',
1272         '71.0.3577.0',
1273         '70.0.3538.58',
1274         '69.0.3497.127',
1275         '71.0.3576.2',
1276         '71.0.3576.1',
1277         '71.0.3576.0',
1278         '70.0.3538.57',
1279         '70.0.3538.56',
1280         '71.0.3575.2',
1281         '70.0.3538.55',
1282         '69.0.3497.126',
1283         '70.0.3538.54',
1284         '71.0.3575.1',
1285         '71.0.3575.0',
1286         '71.0.3574.1',
1287         '71.0.3574.0',
1288         '70.0.3538.53',
1289         '69.0.3497.125',
1290         '70.0.3538.52',
1291         '71.0.3573.1',
1292         '71.0.3573.0',
1293         '70.0.3538.51',
1294         '69.0.3497.124',
1295         '71.0.3572.1',
1296         '71.0.3572.0',
1297         '70.0.3538.50',
1298         '69.0.3497.123',
1299         '71.0.3571.2',
1300         '70.0.3538.49',
1301         '69.0.3497.122',
1302         '71.0.3571.1',
1303         '71.0.3571.0',
1304         '70.0.3538.48',
1305         '69.0.3497.121',
1306         '71.0.3570.1',
1307         '71.0.3570.0',
1308         '70.0.3538.47',
1309         '69.0.3497.120',
1310         '71.0.3568.2',
1311         '71.0.3569.1',
1312         '71.0.3569.0',
1313         '70.0.3538.46',
1314         '69.0.3497.119',
1315         '70.0.3538.45',
1316         '71.0.3568.1',
1317         '71.0.3568.0',
1318         '70.0.3538.44',
1319         '69.0.3497.118',
1320         '70.0.3538.43',
1321         '70.0.3538.42',
1322         '71.0.3567.1',
1323         '71.0.3567.0',
1324         '70.0.3538.41',
1325         '69.0.3497.117',
1326         '71.0.3566.1',
1327         '71.0.3566.0',
1328         '70.0.3538.40',
1329         '69.0.3497.116',
1330         '71.0.3565.1',
1331         '71.0.3565.0',
1332         '70.0.3538.39',
1333         '69.0.3497.115',
1334         '71.0.3564.1',
1335         '71.0.3564.0',
1336         '70.0.3538.38',
1337         '69.0.3497.114',
1338         '71.0.3563.0',
1339         '71.0.3562.2',
1340         '70.0.3538.37',
1341         '69.0.3497.113',
1342         '70.0.3538.36',
1343         '70.0.3538.35',
1344         '71.0.3562.1',
1345         '71.0.3562.0',
1346         '70.0.3538.34',
1347         '69.0.3497.112',
1348         '70.0.3538.33',
1349         '71.0.3561.1',
1350         '71.0.3561.0',
1351         '70.0.3538.32',
1352         '69.0.3497.111',
1353         '71.0.3559.6',
1354         '71.0.3560.1',
1355         '71.0.3560.0',
1356         '71.0.3559.5',
1357         '71.0.3559.4',
1358         '70.0.3538.31',
1359         '69.0.3497.110',
1360         '71.0.3559.3',
1361         '70.0.3538.30',
1362         '69.0.3497.109',
1363         '71.0.3559.2',
1364         '71.0.3559.1',
1365         '71.0.3559.0',
1366         '70.0.3538.29',
1367         '69.0.3497.108',
1368         '71.0.3558.2',
1369         '71.0.3558.1',
1370         '71.0.3558.0',
1371         '70.0.3538.28',
1372         '69.0.3497.107',
1373         '71.0.3557.2',
1374         '71.0.3557.1',
1375         '71.0.3557.0',
1376         '70.0.3538.27',
1377         '69.0.3497.106',
1378         '71.0.3554.4',
1379         '70.0.3538.26',
1380         '71.0.3556.1',
1381         '71.0.3556.0',
1382         '70.0.3538.25',
1383         '71.0.3554.3',
1384         '69.0.3497.105',
1385         '71.0.3554.2',
1386         '70.0.3538.24',
1387         '69.0.3497.104',
1388         '71.0.3555.2',
1389         '70.0.3538.23',
1390         '71.0.3555.1',
1391         '71.0.3555.0',
1392         '70.0.3538.22',
1393         '69.0.3497.103',
1394         '71.0.3554.1',
1395         '71.0.3554.0',
1396         '70.0.3538.21',
1397         '69.0.3497.102',
1398         '71.0.3553.3',
1399         '70.0.3538.20',
1400         '69.0.3497.101',
1401         '71.0.3553.2',
1402         '69.0.3497.100',
1403         '71.0.3553.1',
1404         '71.0.3553.0',
1405         '70.0.3538.19',
1406         '69.0.3497.99',
1407         '69.0.3497.98',
1408         '69.0.3497.97',
1409         '71.0.3552.6',
1410         '71.0.3552.5',
1411         '71.0.3552.4',
1412         '71.0.3552.3',
1413         '71.0.3552.2',
1414         '71.0.3552.1',
1415         '71.0.3552.0',
1416         '70.0.3538.18',
1417         '69.0.3497.96',
1418         '71.0.3551.3',
1419         '71.0.3551.2',
1420         '71.0.3551.1',
1421         '71.0.3551.0',
1422         '70.0.3538.17',
1423         '69.0.3497.95',
1424         '71.0.3550.3',
1425         '71.0.3550.2',
1426         '71.0.3550.1',
1427         '71.0.3550.0',
1428         '70.0.3538.16',
1429         '69.0.3497.94',
1430         '71.0.3549.1',
1431         '71.0.3549.0',
1432         '70.0.3538.15',
1433         '69.0.3497.93',
1434         '69.0.3497.92',
1435         '71.0.3548.1',
1436         '71.0.3548.0',
1437         '70.0.3538.14',
1438         '69.0.3497.91',
1439         '71.0.3547.1',
1440         '71.0.3547.0',
1441         '70.0.3538.13',
1442         '69.0.3497.90',
1443         '71.0.3546.2',
1444         '69.0.3497.89',
1445         '71.0.3546.1',
1446         '71.0.3546.0',
1447         '70.0.3538.12',
1448         '69.0.3497.88',
1449         '71.0.3545.4',
1450         '71.0.3545.3',
1451         '71.0.3545.2',
1452         '71.0.3545.1',
1453         '71.0.3545.0',
1454         '70.0.3538.11',
1455         '69.0.3497.87',
1456         '71.0.3544.5',
1457         '71.0.3544.4',
1458         '71.0.3544.3',
1459         '71.0.3544.2',
1460         '71.0.3544.1',
1461         '71.0.3544.0',
1462         '69.0.3497.86',
1463         '70.0.3538.10',
1464         '69.0.3497.85',
1465         '70.0.3538.9',
1466         '69.0.3497.84',
1467         '71.0.3543.4',
1468         '70.0.3538.8',
1469         '71.0.3543.3',
1470         '71.0.3543.2',
1471         '71.0.3543.1',
1472         '71.0.3543.0',
1473         '70.0.3538.7',
1474         '69.0.3497.83',
1475         '71.0.3542.2',
1476         '71.0.3542.1',
1477         '71.0.3542.0',
1478         '70.0.3538.6',
1479         '69.0.3497.82',
1480         '69.0.3497.81',
1481         '71.0.3541.1',
1482         '71.0.3541.0',
1483         '70.0.3538.5',
1484         '69.0.3497.80',
1485         '71.0.3540.1',
1486         '71.0.3540.0',
1487         '70.0.3538.4',
1488         '69.0.3497.79',
1489         '70.0.3538.3',
1490         '71.0.3539.1',
1491         '71.0.3539.0',
1492         '69.0.3497.78',
1493         '68.0.3440.134',
1494         '69.0.3497.77',
1495         '70.0.3538.2',
1496         '70.0.3538.1',
1497         '70.0.3538.0',
1498         '69.0.3497.76',
1499         '68.0.3440.133',
1500         '69.0.3497.75',
1501         '70.0.3537.2',
1502         '70.0.3537.1',
1503         '70.0.3537.0',
1504         '69.0.3497.74',
1505         '68.0.3440.132',
1506         '70.0.3536.0',
1507         '70.0.3535.5',
1508         '70.0.3535.4',
1509         '70.0.3535.3',
1510         '69.0.3497.73',
1511         '68.0.3440.131',
1512         '70.0.3532.8',
1513         '70.0.3532.7',
1514         '69.0.3497.72',
1515         '69.0.3497.71',
1516         '70.0.3535.2',
1517         '70.0.3535.1',
1518         '70.0.3535.0',
1519         '69.0.3497.70',
1520         '68.0.3440.130',
1521         '69.0.3497.69',
1522         '68.0.3440.129',
1523         '70.0.3534.4',
1524         '70.0.3534.3',
1525         '70.0.3534.2',
1526         '70.0.3534.1',
1527         '70.0.3534.0',
1528         '69.0.3497.68',
1529         '68.0.3440.128',
1530         '70.0.3533.2',
1531         '70.0.3533.1',
1532         '70.0.3533.0',
1533         '69.0.3497.67',
1534         '68.0.3440.127',
1535         '70.0.3532.6',
1536         '70.0.3532.5',
1537         '70.0.3532.4',
1538         '69.0.3497.66',
1539         '68.0.3440.126',
1540         '70.0.3532.3',
1541         '70.0.3532.2',
1542         '70.0.3532.1',
1543         '69.0.3497.60',
1544         '69.0.3497.65',
1545         '69.0.3497.64',
1546         '70.0.3532.0',
1547         '70.0.3531.0',
1548         '70.0.3530.4',
1549         '70.0.3530.3',
1550         '70.0.3530.2',
1551         '69.0.3497.58',
1552         '68.0.3440.125',
1553         '69.0.3497.57',
1554         '69.0.3497.56',
1555         '69.0.3497.55',
1556         '69.0.3497.54',
1557         '70.0.3530.1',
1558         '70.0.3530.0',
1559         '69.0.3497.53',
1560         '68.0.3440.124',
1561         '69.0.3497.52',
1562         '70.0.3529.3',
1563         '70.0.3529.2',
1564         '70.0.3529.1',
1565         '70.0.3529.0',
1566         '69.0.3497.51',
1567         '70.0.3528.4',
1568         '68.0.3440.123',
1569         '70.0.3528.3',
1570         '70.0.3528.2',
1571         '70.0.3528.1',
1572         '70.0.3528.0',
1573         '69.0.3497.50',
1574         '68.0.3440.122',
1575         '70.0.3527.1',
1576         '70.0.3527.0',
1577         '69.0.3497.49',
1578         '68.0.3440.121',
1579         '70.0.3526.1',
1580         '70.0.3526.0',
1581         '68.0.3440.120',
1582         '69.0.3497.48',
1583         '69.0.3497.47',
1584         '68.0.3440.119',
1585         '68.0.3440.118',
1586         '70.0.3525.5',
1587         '70.0.3525.4',
1588         '70.0.3525.3',
1589         '68.0.3440.117',
1590         '69.0.3497.46',
1591         '70.0.3525.2',
1592         '70.0.3525.1',
1593         '70.0.3525.0',
1594         '69.0.3497.45',
1595         '68.0.3440.116',
1596         '70.0.3524.4',
1597         '70.0.3524.3',
1598         '69.0.3497.44',
1599         '70.0.3524.2',
1600         '70.0.3524.1',
1601         '70.0.3524.0',
1602         '70.0.3523.2',
1603         '69.0.3497.43',
1604         '68.0.3440.115',
1605         '70.0.3505.9',
1606         '69.0.3497.42',
1607         '70.0.3505.8',
1608         '70.0.3523.1',
1609         '70.0.3523.0',
1610         '69.0.3497.41',
1611         '68.0.3440.114',
1612         '70.0.3505.7',
1613         '69.0.3497.40',
1614         '70.0.3522.1',
1615         '70.0.3522.0',
1616         '70.0.3521.2',
1617         '69.0.3497.39',
1618         '68.0.3440.113',
1619         '70.0.3505.6',
1620         '70.0.3521.1',
1621         '70.0.3521.0',
1622         '69.0.3497.38',
1623         '68.0.3440.112',
1624         '70.0.3520.1',
1625         '70.0.3520.0',
1626         '69.0.3497.37',
1627         '68.0.3440.111',
1628         '70.0.3519.3',
1629         '70.0.3519.2',
1630         '70.0.3519.1',
1631         '70.0.3519.0',
1632         '69.0.3497.36',
1633         '68.0.3440.110',
1634         '70.0.3518.1',
1635         '70.0.3518.0',
1636         '69.0.3497.35',
1637         '69.0.3497.34',
1638         '68.0.3440.109',
1639         '70.0.3517.1',
1640         '70.0.3517.0',
1641         '69.0.3497.33',
1642         '68.0.3440.108',
1643         '69.0.3497.32',
1644         '70.0.3516.3',
1645         '70.0.3516.2',
1646         '70.0.3516.1',
1647         '70.0.3516.0',
1648         '69.0.3497.31',
1649         '68.0.3440.107',
1650         '70.0.3515.4',
1651         '68.0.3440.106',
1652         '70.0.3515.3',
1653         '70.0.3515.2',
1654         '70.0.3515.1',
1655         '70.0.3515.0',
1656         '69.0.3497.30',
1657         '68.0.3440.105',
1658         '68.0.3440.104',
1659         '70.0.3514.2',
1660         '70.0.3514.1',
1661         '70.0.3514.0',
1662         '69.0.3497.29',
1663         '68.0.3440.103',
1664         '70.0.3513.1',
1665         '70.0.3513.0',
1666         '69.0.3497.28',
1667     )
1668     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1669
1670
1671 std_headers = {
1672     'User-Agent': random_user_agent(),
1673     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1674     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1675     'Accept-Encoding': 'gzip, deflate',
1676     'Accept-Language': 'en-us,en;q=0.5',
1677 }
1678
1679
1680 USER_AGENTS = {
1681     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1682 }
1683
1684
1685 NO_DEFAULT = object()
1686
1687 ENGLISH_MONTH_NAMES = [
1688     'January', 'February', 'March', 'April', 'May', 'June',
1689     'July', 'August', 'September', 'October', 'November', 'December']
1690
1691 MONTH_NAMES = {
1692     'en': ENGLISH_MONTH_NAMES,
1693     'fr': [
1694         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1695         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1696 }
1697
1698 KNOWN_EXTENSIONS = (
1699     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1700     'flv', 'f4v', 'f4a', 'f4b',
1701     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1702     'mkv', 'mka', 'mk3d',
1703     'avi', 'divx',
1704     'mov',
1705     'asf', 'wmv', 'wma',
1706     '3gp', '3g2',
1707     'mp3',
1708     'flac',
1709     'ape',
1710     'wav',
1711     'f4f', 'f4m', 'm3u8', 'smil')
1712
1713 # needed for sanitizing filenames in restricted mode
1714 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1715                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1716                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1717
1718 DATE_FORMATS = (
1719     '%d %B %Y',
1720     '%d %b %Y',
1721     '%B %d %Y',
1722     '%B %dst %Y',
1723     '%B %dnd %Y',
1724     '%B %drd %Y',
1725     '%B %dth %Y',
1726     '%b %d %Y',
1727     '%b %dst %Y',
1728     '%b %dnd %Y',
1729     '%b %drd %Y',
1730     '%b %dth %Y',
1731     '%b %dst %Y %I:%M',
1732     '%b %dnd %Y %I:%M',
1733     '%b %drd %Y %I:%M',
1734     '%b %dth %Y %I:%M',
1735     '%Y %m %d',
1736     '%Y-%m-%d',
1737     '%Y/%m/%d',
1738     '%Y/%m/%d %H:%M',
1739     '%Y/%m/%d %H:%M:%S',
1740     '%Y-%m-%d %H:%M',
1741     '%Y-%m-%d %H:%M:%S',
1742     '%Y-%m-%d %H:%M:%S.%f',
1743     '%d.%m.%Y %H:%M',
1744     '%d.%m.%Y %H.%M',
1745     '%Y-%m-%dT%H:%M:%SZ',
1746     '%Y-%m-%dT%H:%M:%S.%fZ',
1747     '%Y-%m-%dT%H:%M:%S.%f0Z',
1748     '%Y-%m-%dT%H:%M:%S',
1749     '%Y-%m-%dT%H:%M:%S.%f',
1750     '%Y-%m-%dT%H:%M',
1751     '%b %d %Y at %H:%M',
1752     '%b %d %Y at %H:%M:%S',
1753     '%B %d %Y at %H:%M',
1754     '%B %d %Y at %H:%M:%S',
1755 )
1756
1757 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1758 DATE_FORMATS_DAY_FIRST.extend([
1759     '%d-%m-%Y',
1760     '%d.%m.%Y',
1761     '%d.%m.%y',
1762     '%d/%m/%Y',
1763     '%d/%m/%y',
1764     '%d/%m/%Y %H:%M:%S',
1765 ])
1766
1767 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1768 DATE_FORMATS_MONTH_FIRST.extend([
1769     '%m-%d-%Y',
1770     '%m.%d.%Y',
1771     '%m/%d/%Y',
1772     '%m/%d/%y',
1773     '%m/%d/%Y %H:%M:%S',
1774 ])
1775
1776 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1777 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1778
1779
1780 def preferredencoding():
1781     """Get preferred encoding.
1782
1783     Returns the best encoding scheme for the system, based on
1784     locale.getpreferredencoding() and some further tweaks.
1785     """
1786     try:
1787         pref = locale.getpreferredencoding()
1788         'TEST'.encode(pref)
1789     except Exception:
1790         pref = 'UTF-8'
1791
1792     return pref
1793
1794
1795 def write_json_file(obj, fn):
1796     """ Encode obj as JSON and write it to fn, atomically if possible """
1797
1798     fn = encodeFilename(fn)
1799     if sys.version_info < (3, 0) and sys.platform != 'win32':
1800         encoding = get_filesystem_encoding()
1801         # os.path.basename returns a bytes object, but NamedTemporaryFile
1802         # will fail if the filename contains non ascii characters unless we
1803         # use a unicode object
1804         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1805         # the same for os.path.dirname
1806         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1807     else:
1808         path_basename = os.path.basename
1809         path_dirname = os.path.dirname
1810
1811     args = {
1812         'suffix': '.tmp',
1813         'prefix': path_basename(fn) + '.',
1814         'dir': path_dirname(fn),
1815         'delete': False,
1816     }
1817
1818     # In Python 2.x, json.dump expects a bytestream.
1819     # In Python 3.x, it writes to a character stream
1820     if sys.version_info < (3, 0):
1821         args['mode'] = 'wb'
1822     else:
1823         args.update({
1824             'mode': 'w',
1825             'encoding': 'utf-8',
1826         })
1827
1828     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1829
1830     try:
1831         with tf:
1832             json.dump(obj, tf)
1833         if sys.platform == 'win32':
1834             # Need to remove existing file on Windows, else os.rename raises
1835             # WindowsError or FileExistsError.
1836             try:
1837                 os.unlink(fn)
1838             except OSError:
1839                 pass
1840         try:
1841             mask = os.umask(0)
1842             os.umask(mask)
1843             os.chmod(tf.name, 0o666 & ~mask)
1844         except OSError:
1845             pass
1846         os.rename(tf.name, fn)
1847     except Exception:
1848         try:
1849             os.remove(tf.name)
1850         except OSError:
1851             pass
1852         raise
1853
1854
1855 if sys.version_info >= (2, 7):
1856     def find_xpath_attr(node, xpath, key, val=None):
1857         """ Find the xpath xpath[@key=val] """
1858         assert re.match(r'^[a-zA-Z_-]+$', key)
1859         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1860         return node.find(expr)
1861 else:
1862     def find_xpath_attr(node, xpath, key, val=None):
1863         for f in node.findall(compat_xpath(xpath)):
1864             if key not in f.attrib:
1865                 continue
1866             if val is None or f.attrib.get(key) == val:
1867                 return f
1868         return None
1869
1870 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1871 # the namespace parameter
1872
1873
1874 def xpath_with_ns(path, ns_map):
1875     components = [c.split(':') for c in path.split('/')]
1876     replaced = []
1877     for c in components:
1878         if len(c) == 1:
1879             replaced.append(c[0])
1880         else:
1881             ns, tag = c
1882             replaced.append('{%s}%s' % (ns_map[ns], tag))
1883     return '/'.join(replaced)
1884
1885
1886 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1887     def _find_xpath(xpath):
1888         return node.find(compat_xpath(xpath))
1889
1890     if isinstance(xpath, (str, compat_str)):
1891         n = _find_xpath(xpath)
1892     else:
1893         for xp in xpath:
1894             n = _find_xpath(xp)
1895             if n is not None:
1896                 break
1897
1898     if n is None:
1899         if default is not NO_DEFAULT:
1900             return default
1901         elif fatal:
1902             name = xpath if name is None else name
1903             raise ExtractorError('Could not find XML element %s' % name)
1904         else:
1905             return None
1906     return n
1907
1908
1909 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1910     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1911     if n is None or n == default:
1912         return n
1913     if n.text is None:
1914         if default is not NO_DEFAULT:
1915             return default
1916         elif fatal:
1917             name = xpath if name is None else name
1918             raise ExtractorError('Could not find XML element\'s text %s' % name)
1919         else:
1920             return None
1921     return n.text
1922
1923
1924 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1925     n = find_xpath_attr(node, xpath, key)
1926     if n is None:
1927         if default is not NO_DEFAULT:
1928             return default
1929         elif fatal:
1930             name = '%s[@%s]' % (xpath, key) if name is None else name
1931             raise ExtractorError('Could not find XML attribute %s' % name)
1932         else:
1933             return None
1934     return n.attrib[key]
1935
1936
1937 def get_element_by_id(id, html):
1938     """Return the content of the tag with the specified ID in the passed HTML document"""
1939     return get_element_by_attribute('id', id, html)
1940
1941
1942 def get_element_by_class(class_name, html):
1943     """Return the content of the first tag with the specified class in the passed HTML document"""
1944     retval = get_elements_by_class(class_name, html)
1945     return retval[0] if retval else None
1946
1947
1948 def get_element_by_attribute(attribute, value, html, escape_value=True):
1949     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1950     return retval[0] if retval else None
1951
1952
1953 def get_elements_by_class(class_name, html):
1954     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1955     return get_elements_by_attribute(
1956         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1957         html, escape_value=False)
1958
1959
1960 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1961     """Return the content of the tag with the specified attribute in the passed HTML document"""
1962
1963     value = re.escape(value) if escape_value else value
1964
1965     retlist = []
1966     for m in re.finditer(r'''(?xs)
1967         <([a-zA-Z0-9:._-]+)
1968          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1969          \s+%s=['"]?%s['"]?
1970          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1971         \s*>
1972         (?P<content>.*?)
1973         </\1>
1974     ''' % (re.escape(attribute), value), html):
1975         res = m.group('content')
1976
1977         if res.startswith('"') or res.startswith("'"):
1978             res = res[1:-1]
1979
1980         retlist.append(unescapeHTML(res))
1981
1982     return retlist
1983
1984
1985 class HTMLAttributeParser(compat_HTMLParser):
1986     """Trivial HTML parser to gather the attributes for a single element"""
1987     def __init__(self):
1988         self.attrs = {}
1989         compat_HTMLParser.__init__(self)
1990
1991     def handle_starttag(self, tag, attrs):
1992         self.attrs = dict(attrs)
1993
1994
1995 def extract_attributes(html_element):
1996     """Given a string for an HTML element such as
1997     <el
1998          a="foo" B="bar" c="&98;az" d=boz
1999          empty= noval entity="&amp;"
2000          sq='"' dq="'"
2001     >
2002     Decode and return a dictionary of attributes.
2003     {
2004         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2005         'empty': '', 'noval': None, 'entity': '&',
2006         'sq': '"', 'dq': '\''
2007     }.
2008     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2009     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2010     """
2011     parser = HTMLAttributeParser()
2012     try:
2013         parser.feed(html_element)
2014         parser.close()
2015     # Older Python may throw HTMLParseError in case of malformed HTML
2016     except compat_HTMLParseError:
2017         pass
2018     return parser.attrs
2019
2020
2021 def clean_html(html):
2022     """Clean an HTML snippet into a readable string"""
2023
2024     if html is None:  # Convenience for sanitizing descriptions etc.
2025         return html
2026
2027     # Newline vs <br />
2028     html = html.replace('\n', ' ')
2029     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2030     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2031     # Strip html tags
2032     html = re.sub('<.*?>', '', html)
2033     # Replace html entities
2034     html = unescapeHTML(html)
2035     return html.strip()
2036
2037
2038 def sanitize_open(filename, open_mode):
2039     """Try to open the given filename, and slightly tweak it if this fails.
2040
2041     Attempts to open the given filename. If this fails, it tries to change
2042     the filename slightly, step by step, until it's either able to open it
2043     or it fails and raises a final exception, like the standard open()
2044     function.
2045
2046     It returns the tuple (stream, definitive_file_name).
2047     """
2048     try:
2049         if filename == '-':
2050             if sys.platform == 'win32':
2051                 import msvcrt
2052                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2053             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2054         stream = open(encodeFilename(filename), open_mode)
2055         return (stream, filename)
2056     except (IOError, OSError) as err:
2057         if err.errno in (errno.EACCES,):
2058             raise
2059
2060         # In case of error, try to remove win32 forbidden chars
2061         alt_filename = sanitize_path(filename)
2062         if alt_filename == filename:
2063             raise
2064         else:
2065             # An exception here should be caught in the caller
2066             stream = open(encodeFilename(alt_filename), open_mode)
2067             return (stream, alt_filename)
2068
2069
2070 def timeconvert(timestr):
2071     """Convert RFC 2822 defined time string into system timestamp"""
2072     timestamp = None
2073     timetuple = email.utils.parsedate_tz(timestr)
2074     if timetuple is not None:
2075         timestamp = email.utils.mktime_tz(timetuple)
2076     return timestamp
2077
2078
2079 def sanitize_filename(s, restricted=False, is_id=False):
2080     """Sanitizes a string so it could be used as part of a filename.
2081     If restricted is set, use a stricter subset of allowed characters.
2082     Set is_id if this is not an arbitrary string, but an ID that should be kept
2083     if possible.
2084     """
2085     def replace_insane(char):
2086         if restricted and char in ACCENT_CHARS:
2087             return ACCENT_CHARS[char]
2088         if char == '?' or ord(char) < 32 or ord(char) == 127:
2089             return ''
2090         elif char == '"':
2091             return '' if restricted else '\''
2092         elif char == ':':
2093             return '_-' if restricted else ' -'
2094         elif char in '\\/|*<>':
2095             return '_'
2096         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2097             return '_'
2098         if restricted and ord(char) > 127:
2099             return '_'
2100         return char
2101
2102     # Handle timestamps
2103     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2104     result = ''.join(map(replace_insane, s))
2105     if not is_id:
2106         while '__' in result:
2107             result = result.replace('__', '_')
2108         result = result.strip('_')
2109         # Common case of "Foreign band name - English song title"
2110         if restricted and result.startswith('-_'):
2111             result = result[2:]
2112         if result.startswith('-'):
2113             result = '_' + result[len('-'):]
2114         result = result.lstrip('.')
2115         if not result:
2116             result = '_'
2117     return result
2118
2119
2120 def sanitize_path(s):
2121     """Sanitizes and normalizes path on Windows"""
2122     if sys.platform != 'win32':
2123         return s
2124     drive_or_unc, _ = os.path.splitdrive(s)
2125     if sys.version_info < (2, 7) and not drive_or_unc:
2126         drive_or_unc, _ = os.path.splitunc(s)
2127     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2128     if drive_or_unc:
2129         norm_path.pop(0)
2130     sanitized_path = [
2131         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2132         for path_part in norm_path]
2133     if drive_or_unc:
2134         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2135     return os.path.join(*sanitized_path)
2136
2137
2138 def sanitize_url(url):
2139     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2140     # the number of unwanted failures due to missing protocol
2141     if url.startswith('//'):
2142         return 'http:%s' % url
2143     # Fix some common typos seen so far
2144     COMMON_TYPOS = (
2145         # https://github.com/ytdl-org/youtube-dl/issues/15649
2146         (r'^httpss://', r'https://'),
2147         # https://bx1.be/lives/direct-tv/
2148         (r'^rmtp([es]?)://', r'rtmp\1://'),
2149     )
2150     for mistake, fixup in COMMON_TYPOS:
2151         if re.match(mistake, url):
2152             return re.sub(mistake, fixup, url)
2153     return url
2154
2155
2156 def sanitized_Request(url, *args, **kwargs):
2157     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2158
2159
2160 def expand_path(s):
2161     """Expand shell variables and ~"""
2162     return os.path.expandvars(compat_expanduser(s))
2163
2164
2165 def orderedSet(iterable):
2166     """ Remove all duplicates from the input iterable """
2167     res = []
2168     for el in iterable:
2169         if el not in res:
2170             res.append(el)
2171     return res
2172
2173
2174 def _htmlentity_transform(entity_with_semicolon):
2175     """Transforms an HTML entity to a character."""
2176     entity = entity_with_semicolon[:-1]
2177
2178     # Known non-numeric HTML entity
2179     if entity in compat_html_entities.name2codepoint:
2180         return compat_chr(compat_html_entities.name2codepoint[entity])
2181
2182     # TODO: HTML5 allows entities without a semicolon. For example,
2183     # '&Eacuteric' should be decoded as 'Éric'.
2184     if entity_with_semicolon in compat_html_entities_html5:
2185         return compat_html_entities_html5[entity_with_semicolon]
2186
2187     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2188     if mobj is not None:
2189         numstr = mobj.group(1)
2190         if numstr.startswith('x'):
2191             base = 16
2192             numstr = '0%s' % numstr
2193         else:
2194             base = 10
2195         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2196         try:
2197             return compat_chr(int(numstr, base))
2198         except ValueError:
2199             pass
2200
2201     # Unknown entity in name, return its literal representation
2202     return '&%s;' % entity
2203
2204
2205 def unescapeHTML(s):
2206     if s is None:
2207         return None
2208     assert type(s) == compat_str
2209
2210     return re.sub(
2211         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2212
2213
2214 def get_subprocess_encoding():
2215     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2216         # For subprocess calls, encode with locale encoding
2217         # Refer to http://stackoverflow.com/a/9951851/35070
2218         encoding = preferredencoding()
2219     else:
2220         encoding = sys.getfilesystemencoding()
2221     if encoding is None:
2222         encoding = 'utf-8'
2223     return encoding
2224
2225
2226 def encodeFilename(s, for_subprocess=False):
2227     """
2228     @param s The name of the file
2229     """
2230
2231     assert type(s) == compat_str
2232
2233     # Python 3 has a Unicode API
2234     if sys.version_info >= (3, 0):
2235         return s
2236
2237     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2238     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2239     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2240     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241         return s
2242
2243     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2244     if sys.platform.startswith('java'):
2245         return s
2246
2247     return s.encode(get_subprocess_encoding(), 'ignore')
2248
2249
2250 def decodeFilename(b, for_subprocess=False):
2251
2252     if sys.version_info >= (3, 0):
2253         return b
2254
2255     if not isinstance(b, bytes):
2256         return b
2257
2258     return b.decode(get_subprocess_encoding(), 'ignore')
2259
2260
2261 def encodeArgument(s):
2262     if not isinstance(s, compat_str):
2263         # Legacy code that uses byte strings
2264         # Uncomment the following line after fixing all post processors
2265         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2266         s = s.decode('ascii')
2267     return encodeFilename(s, True)
2268
2269
2270 def decodeArgument(b):
2271     return decodeFilename(b, True)
2272
2273
2274 def decodeOption(optval):
2275     if optval is None:
2276         return optval
2277     if isinstance(optval, bytes):
2278         optval = optval.decode(preferredencoding())
2279
2280     assert isinstance(optval, compat_str)
2281     return optval
2282
2283
2284 def formatSeconds(secs):
2285     if secs > 3600:
2286         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2287     elif secs > 60:
2288         return '%d:%02d' % (secs // 60, secs % 60)
2289     else:
2290         return '%d' % secs
2291
2292
2293 def make_HTTPS_handler(params, **kwargs):
2294     opts_no_check_certificate = params.get('nocheckcertificate', False)
2295     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2296         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2297         if opts_no_check_certificate:
2298             context.check_hostname = False
2299             context.verify_mode = ssl.CERT_NONE
2300         try:
2301             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2302         except TypeError:
2303             # Python 2.7.8
2304             # (create_default_context present but HTTPSHandler has no context=)
2305             pass
2306
2307     if sys.version_info < (3, 2):
2308         return YoutubeDLHTTPSHandler(params, **kwargs)
2309     else:  # Python < 3.4
2310         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2311         context.verify_mode = (ssl.CERT_NONE
2312                                if opts_no_check_certificate
2313                                else ssl.CERT_REQUIRED)
2314         context.set_default_verify_paths()
2315         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2316
2317
2318 def bug_reports_message():
2319     if ytdl_is_updateable():
2320         update_cmd = 'type  youtube-dl -U  to update'
2321     else:
2322         update_cmd = 'see  https://yt-dl.org/update  on how to update'
2323     msg = '; please report this issue on https://yt-dl.org/bug .'
2324     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2325     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2326     return msg
2327
2328
2329 class YoutubeDLError(Exception):
2330     """Base exception for YoutubeDL errors."""
2331     pass
2332
2333
2334 class ExtractorError(YoutubeDLError):
2335     """Error during info extraction."""
2336
2337     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2338         """ tb, if given, is the original traceback (so that it can be printed out).
2339         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2340         """
2341
2342         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2343             expected = True
2344         if video_id is not None:
2345             msg = video_id + ': ' + msg
2346         if cause:
2347             msg += ' (caused by %r)' % cause
2348         if not expected:
2349             msg += bug_reports_message()
2350         super(ExtractorError, self).__init__(msg)
2351
2352         self.traceback = tb
2353         self.exc_info = sys.exc_info()  # preserve original exception
2354         self.cause = cause
2355         self.video_id = video_id
2356
2357     def format_traceback(self):
2358         if self.traceback is None:
2359             return None
2360         return ''.join(traceback.format_tb(self.traceback))
2361
2362
2363 class UnsupportedError(ExtractorError):
2364     def __init__(self, url):
2365         super(UnsupportedError, self).__init__(
2366             'Unsupported URL: %s' % url, expected=True)
2367         self.url = url
2368
2369
2370 class RegexNotFoundError(ExtractorError):
2371     """Error when a regex didn't match"""
2372     pass
2373
2374
2375 class GeoRestrictedError(ExtractorError):
2376     """Geographic restriction Error exception.
2377
2378     This exception may be thrown when a video is not available from your
2379     geographic location due to geographic restrictions imposed by a website.
2380     """
2381     def __init__(self, msg, countries=None):
2382         super(GeoRestrictedError, self).__init__(msg, expected=True)
2383         self.msg = msg
2384         self.countries = countries
2385
2386
2387 class DownloadError(YoutubeDLError):
2388     """Download Error exception.
2389
2390     This exception may be thrown by FileDownloader objects if they are not
2391     configured to continue on errors. They will contain the appropriate
2392     error message.
2393     """
2394
2395     def __init__(self, msg, exc_info=None):
2396         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2397         super(DownloadError, self).__init__(msg)
2398         self.exc_info = exc_info
2399
2400
2401 class SameFileError(YoutubeDLError):
2402     """Same File exception.
2403
2404     This exception will be thrown by FileDownloader objects if they detect
2405     multiple files would have to be downloaded to the same file on disk.
2406     """
2407     pass
2408
2409
2410 class PostProcessingError(YoutubeDLError):
2411     """Post Processing exception.
2412
2413     This exception may be raised by PostProcessor's .run() method to
2414     indicate an error in the postprocessing task.
2415     """
2416
2417     def __init__(self, msg):
2418         super(PostProcessingError, self).__init__(msg)
2419         self.msg = msg
2420
2421
2422 class MaxDownloadsReached(YoutubeDLError):
2423     """ --max-downloads limit has been reached. """
2424     pass
2425
2426
2427 class UnavailableVideoError(YoutubeDLError):
2428     """Unavailable Format exception.
2429
2430     This exception will be thrown when a video is requested
2431     in a format that is not available for that video.
2432     """
2433     pass
2434
2435
2436 class ContentTooShortError(YoutubeDLError):
2437     """Content Too Short exception.
2438
2439     This exception may be raised by FileDownloader objects when a file they
2440     download is too small for what the server announced first, indicating
2441     the connection was probably interrupted.
2442     """
2443
2444     def __init__(self, downloaded, expected):
2445         super(ContentTooShortError, self).__init__(
2446             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2447         )
2448         # Both in bytes
2449         self.downloaded = downloaded
2450         self.expected = expected
2451
2452
2453 class XAttrMetadataError(YoutubeDLError):
2454     def __init__(self, code=None, msg='Unknown error'):
2455         super(XAttrMetadataError, self).__init__(msg)
2456         self.code = code
2457         self.msg = msg
2458
2459         # Parsing code and msg
2460         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2461                 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2462             self.reason = 'NO_SPACE'
2463         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2464             self.reason = 'VALUE_TOO_LONG'
2465         else:
2466             self.reason = 'NOT_SUPPORTED'
2467
2468
2469 class XAttrUnavailableError(YoutubeDLError):
2470     pass
2471
2472
2473 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2474     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2475     # expected HTTP responses to meet HTTP/1.0 or later (see also
2476     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2477     if sys.version_info < (3, 0):
2478         kwargs['strict'] = True
2479     hc = http_class(*args, **compat_kwargs(kwargs))
2480     source_address = ydl_handler._params.get('source_address')
2481
2482     if source_address is not None:
2483         # This is to workaround _create_connection() from socket where it will try all
2484         # address data from getaddrinfo() including IPv6. This filters the result from
2485         # getaddrinfo() based on the source_address value.
2486         # This is based on the cpython socket.create_connection() function.
2487         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2488         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2489             host, port = address
2490             err = None
2491             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2492             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2493             ip_addrs = [addr for addr in addrs if addr[0] == af]
2494             if addrs and not ip_addrs:
2495                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2496                 raise socket.error(
2497                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2498                     % (ip_version, source_address[0]))
2499             for res in ip_addrs:
2500                 af, socktype, proto, canonname, sa = res
2501                 sock = None
2502                 try:
2503                     sock = socket.socket(af, socktype, proto)
2504                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2505                         sock.settimeout(timeout)
2506                     sock.bind(source_address)
2507                     sock.connect(sa)
2508                     err = None  # Explicitly break reference cycle
2509                     return sock
2510                 except socket.error as _:
2511                     err = _
2512                     if sock is not None:
2513                         sock.close()
2514             if err is not None:
2515                 raise err
2516             else:
2517                 raise socket.error('getaddrinfo returns an empty list')
2518         if hasattr(hc, '_create_connection'):
2519             hc._create_connection = _create_connection
2520         sa = (source_address, 0)
2521         if hasattr(hc, 'source_address'):  # Python 2.7+
2522             hc.source_address = sa
2523         else:  # Python 2.6
2524             def _hc_connect(self, *args, **kwargs):
2525                 sock = _create_connection(
2526                     (self.host, self.port), self.timeout, sa)
2527                 if is_https:
2528                     self.sock = ssl.wrap_socket(
2529                         sock, self.key_file, self.cert_file,
2530                         ssl_version=ssl.PROTOCOL_TLSv1)
2531                 else:
2532                     self.sock = sock
2533             hc.connect = functools.partial(_hc_connect, hc)
2534
2535     return hc
2536
2537
2538 def handle_youtubedl_headers(headers):
2539     filtered_headers = headers
2540
2541     if 'Youtubedl-no-compression' in filtered_headers:
2542         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2543         del filtered_headers['Youtubedl-no-compression']
2544
2545     return filtered_headers
2546
2547
2548 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2549     """Handler for HTTP requests and responses.
2550
2551     This class, when installed with an OpenerDirector, automatically adds
2552     the standard headers to every HTTP request and handles gzipped and
2553     deflated responses from web servers. If compression is to be avoided in
2554     a particular request, the original request in the program code only has
2555     to include the HTTP header "Youtubedl-no-compression", which will be
2556     removed before making the real request.
2557
2558     Part of this code was copied from:
2559
2560     http://techknack.net/python-urllib2-handlers/
2561
2562     Andrew Rowls, the author of that code, agreed to release it to the
2563     public domain.
2564     """
2565
2566     def __init__(self, params, *args, **kwargs):
2567         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2568         self._params = params
2569
2570     def http_open(self, req):
2571         conn_class = compat_http_client.HTTPConnection
2572
2573         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2574         if socks_proxy:
2575             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2576             del req.headers['Ytdl-socks-proxy']
2577
2578         return self.do_open(functools.partial(
2579             _create_http_connection, self, conn_class, False),
2580             req)
2581
2582     @staticmethod
2583     def deflate(data):
2584         try:
2585             return zlib.decompress(data, -zlib.MAX_WBITS)
2586         except zlib.error:
2587             return zlib.decompress(data)
2588
2589     def http_request(self, req):
2590         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2591         # always respected by websites, some tend to give out URLs with non percent-encoded
2592         # non-ASCII characters (see telemb.py, ard.py [#3412])
2593         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2594         # To work around aforementioned issue we will replace request's original URL with
2595         # percent-encoded one
2596         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2597         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2598         url = req.get_full_url()
2599         url_escaped = escape_url(url)
2600
2601         # Substitute URL if any change after escaping
2602         if url != url_escaped:
2603             req = update_Request(req, url=url_escaped)
2604
2605         for h, v in std_headers.items():
2606             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2607             # The dict keys are capitalized because of this bug by urllib
2608             if h.capitalize() not in req.headers:
2609                 req.add_header(h, v)
2610
2611         req.headers = handle_youtubedl_headers(req.headers)
2612
2613         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2614             # Python 2.6 is brain-dead when it comes to fragments
2615             req._Request__original = req._Request__original.partition('#')[0]
2616             req._Request__r_type = req._Request__r_type.partition('#')[0]
2617
2618         return req
2619
2620     def http_response(self, req, resp):
2621         old_resp = resp
2622         # gzip
2623         if resp.headers.get('Content-encoding', '') == 'gzip':
2624             content = resp.read()
2625             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2626             try:
2627                 uncompressed = io.BytesIO(gz.read())
2628             except IOError as original_ioerror:
2629                 # There may be junk add the end of the file
2630                 # See http://stackoverflow.com/q/4928560/35070 for details
2631                 for i in range(1, 1024):
2632                     try:
2633                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2634                         uncompressed = io.BytesIO(gz.read())
2635                     except IOError:
2636                         continue
2637                     break
2638                 else:
2639                     raise original_ioerror
2640             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2641             resp.msg = old_resp.msg
2642             del resp.headers['Content-encoding']
2643         # deflate
2644         if resp.headers.get('Content-encoding', '') == 'deflate':
2645             gz = io.BytesIO(self.deflate(resp.read()))
2646             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2647             resp.msg = old_resp.msg
2648             del resp.headers['Content-encoding']
2649         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2650         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2651         if 300 <= resp.code < 400:
2652             location = resp.headers.get('Location')
2653             if location:
2654                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2655                 if sys.version_info >= (3, 0):
2656                     location = location.encode('iso-8859-1').decode('utf-8')
2657                 else:
2658                     location = location.decode('utf-8')
2659                 location_escaped = escape_url(location)
2660                 if location != location_escaped:
2661                     del resp.headers['Location']
2662                     if sys.version_info < (3, 0):
2663                         location_escaped = location_escaped.encode('utf-8')
2664                     resp.headers['Location'] = location_escaped
2665         return resp
2666
2667     https_request = http_request
2668     https_response = http_response
2669
2670
2671 def make_socks_conn_class(base_class, socks_proxy):
2672     assert issubclass(base_class, (
2673         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2674
2675     url_components = compat_urlparse.urlparse(socks_proxy)
2676     if url_components.scheme.lower() == 'socks5':
2677         socks_type = ProxyType.SOCKS5
2678     elif url_components.scheme.lower() in ('socks', 'socks4'):
2679         socks_type = ProxyType.SOCKS4
2680     elif url_components.scheme.lower() == 'socks4a':
2681         socks_type = ProxyType.SOCKS4A
2682
2683     def unquote_if_non_empty(s):
2684         if not s:
2685             return s
2686         return compat_urllib_parse_unquote_plus(s)
2687
2688     proxy_args = (
2689         socks_type,
2690         url_components.hostname, url_components.port or 1080,
2691         True,  # Remote DNS
2692         unquote_if_non_empty(url_components.username),
2693         unquote_if_non_empty(url_components.password),
2694     )
2695
2696     class SocksConnection(base_class):
2697         def connect(self):
2698             self.sock = sockssocket()
2699             self.sock.setproxy(*proxy_args)
2700             if type(self.timeout) in (int, float):
2701                 self.sock.settimeout(self.timeout)
2702             self.sock.connect((self.host, self.port))
2703
2704             if isinstance(self, compat_http_client.HTTPSConnection):
2705                 if hasattr(self, '_context'):  # Python > 2.6
2706                     self.sock = self._context.wrap_socket(
2707                         self.sock, server_hostname=self.host)
2708                 else:
2709                     self.sock = ssl.wrap_socket(self.sock)
2710
2711     return SocksConnection
2712
2713
2714 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2715     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2716         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2717         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2718         self._params = params
2719
2720     def https_open(self, req):
2721         kwargs = {}
2722         conn_class = self._https_conn_class
2723
2724         if hasattr(self, '_context'):  # python > 2.6
2725             kwargs['context'] = self._context
2726         if hasattr(self, '_check_hostname'):  # python 3.x
2727             kwargs['check_hostname'] = self._check_hostname
2728
2729         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2730         if socks_proxy:
2731             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2732             del req.headers['Ytdl-socks-proxy']
2733
2734         return self.do_open(functools.partial(
2735             _create_http_connection, self, conn_class, True),
2736             req, **kwargs)
2737
2738
2739 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2740     """
2741     See [1] for cookie file format.
2742
2743     1. https://curl.haxx.se/docs/http-cookies.html
2744     """
2745     _HTTPONLY_PREFIX = '#HttpOnly_'
2746     _ENTRY_LEN = 7
2747     _HEADER = '''# Netscape HTTP Cookie File
2748 # This file is generated by youtube-dl.  Do not edit.
2749
2750 '''
2751     _CookieFileEntry = collections.namedtuple(
2752         'CookieFileEntry',
2753         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2754
2755     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2756         """
2757         Save cookies to a file.
2758
2759         Most of the code is taken from CPython 3.8 and slightly adapted
2760         to support cookie files with UTF-8 in both python 2 and 3.
2761         """
2762         if filename is None:
2763             if self.filename is not None:
2764                 filename = self.filename
2765             else:
2766                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2767
2768         # Store session cookies with `expires` set to 0 instead of an empty
2769         # string
2770         for cookie in self:
2771             if cookie.expires is None:
2772                 cookie.expires = 0
2773
2774         with io.open(filename, 'w', encoding='utf-8') as f:
2775             f.write(self._HEADER)
2776             now = time.time()
2777             for cookie in self:
2778                 if not ignore_discard and cookie.discard:
2779                     continue
2780                 if not ignore_expires and cookie.is_expired(now):
2781                     continue
2782                 if cookie.secure:
2783                     secure = 'TRUE'
2784                 else:
2785                     secure = 'FALSE'
2786                 if cookie.domain.startswith('.'):
2787                     initial_dot = 'TRUE'
2788                 else:
2789                     initial_dot = 'FALSE'
2790                 if cookie.expires is not None:
2791                     expires = compat_str(cookie.expires)
2792                 else:
2793                     expires = ''
2794                 if cookie.value is None:
2795                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2796                     # with no name, whereas http.cookiejar regards it as a
2797                     # cookie with no value.
2798                     name = ''
2799                     value = cookie.name
2800                 else:
2801                     name = cookie.name
2802                     value = cookie.value
2803                 f.write(
2804                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2805                                secure, expires, name, value]) + '\n')
2806
2807     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2808         """Load cookies from a file."""
2809         if filename is None:
2810             if self.filename is not None:
2811                 filename = self.filename
2812             else:
2813                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2814
2815         def prepare_line(line):
2816             if line.startswith(self._HTTPONLY_PREFIX):
2817                 line = line[len(self._HTTPONLY_PREFIX):]
2818             # comments and empty lines are fine
2819             if line.startswith('#') or not line.strip():
2820                 return line
2821             cookie_list = line.split('\t')
2822             if len(cookie_list) != self._ENTRY_LEN:
2823                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2824             cookie = self._CookieFileEntry(*cookie_list)
2825             if cookie.expires_at and not cookie.expires_at.isdigit():
2826                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2827             return line
2828
2829         cf = io.StringIO()
2830         with io.open(filename, encoding='utf-8') as f:
2831             for line in f:
2832                 try:
2833                     cf.write(prepare_line(line))
2834                 except compat_cookiejar.LoadError as e:
2835                     write_string(
2836                         'WARNING: skipping cookie file entry due to %s: %r\n'
2837                         % (e, line), sys.stderr)
2838                     continue
2839         cf.seek(0)
2840         self._really_load(cf, filename, ignore_discard, ignore_expires)
2841         # Session cookies are denoted by either `expires` field set to
2842         # an empty string or 0. MozillaCookieJar only recognizes the former
2843         # (see [1]). So we need force the latter to be recognized as session
2844         # cookies on our own.
2845         # Session cookies may be important for cookies-based authentication,
2846         # e.g. usually, when user does not check 'Remember me' check box while
2847         # logging in on a site, some important cookies are stored as session
2848         # cookies so that not recognizing them will result in failed login.
2849         # 1. https://bugs.python.org/issue17164
2850         for cookie in self:
2851             # Treat `expires=0` cookies as session cookies
2852             if cookie.expires == 0:
2853                 cookie.expires = None
2854                 cookie.discard = True
2855
2856
2857 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2858     def __init__(self, cookiejar=None):
2859         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2860
2861     def http_response(self, request, response):
2862         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2863         # characters in Set-Cookie HTTP header of last response (see
2864         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2865         # In order to at least prevent crashing we will percent encode Set-Cookie
2866         # header before HTTPCookieProcessor starts processing it.
2867         # if sys.version_info < (3, 0) and response.headers:
2868         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2869         #         set_cookie = response.headers.get(set_cookie_header)
2870         #         if set_cookie:
2871         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2872         #             if set_cookie != set_cookie_escaped:
2873         #                 del response.headers[set_cookie_header]
2874         #                 response.headers[set_cookie_header] = set_cookie_escaped
2875         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2876
2877     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2878     https_response = http_response
2879
2880
2881 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2882     if sys.version_info[0] < 3:
2883         def redirect_request(self, req, fp, code, msg, headers, newurl):
2884             # On python 2 urlh.geturl() may sometimes return redirect URL
2885             # as byte string instead of unicode. This workaround allows
2886             # to force it always return unicode.
2887             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2888
2889
2890 def extract_timezone(date_str):
2891     m = re.search(
2892         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2893         date_str)
2894     if not m:
2895         timezone = datetime.timedelta()
2896     else:
2897         date_str = date_str[:-len(m.group('tz'))]
2898         if not m.group('sign'):
2899             timezone = datetime.timedelta()
2900         else:
2901             sign = 1 if m.group('sign') == '+' else -1
2902             timezone = datetime.timedelta(
2903                 hours=sign * int(m.group('hours')),
2904                 minutes=sign * int(m.group('minutes')))
2905     return timezone, date_str
2906
2907
2908 def parse_iso8601(date_str, delimiter='T', timezone=None):
2909     """ Return a UNIX timestamp from the given date """
2910
2911     if date_str is None:
2912         return None
2913
2914     date_str = re.sub(r'\.[0-9]+', '', date_str)
2915
2916     if timezone is None:
2917         timezone, date_str = extract_timezone(date_str)
2918
2919     try:
2920         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2921         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2922         return calendar.timegm(dt.timetuple())
2923     except ValueError:
2924         pass
2925
2926
2927 def date_formats(day_first=True):
2928     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2929
2930
2931 def unified_strdate(date_str, day_first=True):
2932     """Return a string with the date in the format YYYYMMDD"""
2933
2934     if date_str is None:
2935         return None
2936     upload_date = None
2937     # Replace commas
2938     date_str = date_str.replace(',', ' ')
2939     # Remove AM/PM + timezone
2940     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2941     _, date_str = extract_timezone(date_str)
2942
2943     for expression in date_formats(day_first):
2944         try:
2945             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2946         except ValueError:
2947             pass
2948     if upload_date is None:
2949         timetuple = email.utils.parsedate_tz(date_str)
2950         if timetuple:
2951             try:
2952                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2953             except ValueError:
2954                 pass
2955     if upload_date is not None:
2956         return compat_str(upload_date)
2957
2958
2959 def unified_timestamp(date_str, day_first=True):
2960     if date_str is None:
2961         return None
2962
2963     date_str = re.sub(r'[,|]', '', date_str)
2964
2965     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2966     timezone, date_str = extract_timezone(date_str)
2967
2968     # Remove AM/PM + timezone
2969     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2970
2971     # Remove unrecognized timezones from ISO 8601 alike timestamps
2972     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2973     if m:
2974         date_str = date_str[:-len(m.group('tz'))]
2975
2976     # Python only supports microseconds, so remove nanoseconds
2977     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2978     if m:
2979         date_str = m.group(1)
2980
2981     for expression in date_formats(day_first):
2982         try:
2983             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2984             return calendar.timegm(dt.timetuple())
2985         except ValueError:
2986             pass
2987     timetuple = email.utils.parsedate_tz(date_str)
2988     if timetuple:
2989         return calendar.timegm(timetuple) + pm_delta * 3600
2990
2991
2992 def determine_ext(url, default_ext='unknown_video'):
2993     if url is None or '.' not in url:
2994         return default_ext
2995     guess = url.partition('?')[0].rpartition('.')[2]
2996     if re.match(r'^[A-Za-z0-9]+$', guess):
2997         return guess
2998     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2999     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3000         return guess.rstrip('/')
3001     else:
3002         return default_ext
3003
3004
3005 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3006     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3007
3008
3009 def date_from_str(date_str):
3010     """
3011     Return a datetime object from a string in the format YYYYMMDD or
3012     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3013     today = datetime.date.today()
3014     if date_str in ('now', 'today'):
3015         return today
3016     if date_str == 'yesterday':
3017         return today - datetime.timedelta(days=1)
3018     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3019     if match is not None:
3020         sign = match.group('sign')
3021         time = int(match.group('time'))
3022         if sign == '-':
3023             time = -time
3024         unit = match.group('unit')
3025         # A bad approximation?
3026         if unit == 'month':
3027             unit = 'day'
3028             time *= 30
3029         elif unit == 'year':
3030             unit = 'day'
3031             time *= 365
3032         unit += 's'
3033         delta = datetime.timedelta(**{unit: time})
3034         return today + delta
3035     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3036
3037
3038 def hyphenate_date(date_str):
3039     """
3040     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3041     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3042     if match is not None:
3043         return '-'.join(match.groups())
3044     else:
3045         return date_str
3046
3047
3048 class DateRange(object):
3049     """Represents a time interval between two dates"""
3050
3051     def __init__(self, start=None, end=None):
3052         """start and end must be strings in the format accepted by date"""
3053         if start is not None:
3054             self.start = date_from_str(start)
3055         else:
3056             self.start = datetime.datetime.min.date()
3057         if end is not None:
3058             self.end = date_from_str(end)
3059         else:
3060             self.end = datetime.datetime.max.date()
3061         if self.start > self.end:
3062             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3063
3064     @classmethod
3065     def day(cls, day):
3066         """Returns a range that only contains the given day"""
3067         return cls(day, day)
3068
3069     def __contains__(self, date):
3070         """Check if the date is in the range"""
3071         if not isinstance(date, datetime.date):
3072             date = date_from_str(date)
3073         return self.start <= date <= self.end
3074
3075     def __str__(self):
3076         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3077
3078
3079 def platform_name():
3080     """ Returns the platform name as a compat_str """
3081     res = platform.platform()
3082     if isinstance(res, bytes):
3083         res = res.decode(preferredencoding())
3084
3085     assert isinstance(res, compat_str)
3086     return res
3087
3088
3089 def _windows_write_string(s, out):
3090     """ Returns True if the string was written using special methods,
3091     False if it has yet to be written out."""
3092     # Adapted from http://stackoverflow.com/a/3259271/35070
3093
3094     import ctypes
3095     import ctypes.wintypes
3096
3097     WIN_OUTPUT_IDS = {
3098         1: -11,
3099         2: -12,
3100     }
3101
3102     try:
3103         fileno = out.fileno()
3104     except AttributeError:
3105         # If the output stream doesn't have a fileno, it's virtual
3106         return False
3107     except io.UnsupportedOperation:
3108         # Some strange Windows pseudo files?
3109         return False
3110     if fileno not in WIN_OUTPUT_IDS:
3111         return False
3112
3113     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3114         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3115         ('GetStdHandle', ctypes.windll.kernel32))
3116     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3117
3118     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3119         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3120         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3121         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3122     written = ctypes.wintypes.DWORD(0)
3123
3124     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3125     FILE_TYPE_CHAR = 0x0002
3126     FILE_TYPE_REMOTE = 0x8000
3127     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3128         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3129         ctypes.POINTER(ctypes.wintypes.DWORD))(
3130         ('GetConsoleMode', ctypes.windll.kernel32))
3131     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3132
3133     def not_a_console(handle):
3134         if handle == INVALID_HANDLE_VALUE or handle is None:
3135             return True
3136         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3137                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3138
3139     if not_a_console(h):
3140         return False
3141
3142     def next_nonbmp_pos(s):
3143         try:
3144             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3145         except StopIteration:
3146             return len(s)
3147
3148     while s:
3149         count = min(next_nonbmp_pos(s), 1024)
3150
3151         ret = WriteConsoleW(
3152             h, s, count if count else 2, ctypes.byref(written), None)
3153         if ret == 0:
3154             raise OSError('Failed to write string')
3155         if not count:  # We just wrote a non-BMP character
3156             assert written.value == 2
3157             s = s[1:]
3158         else:
3159             assert written.value > 0
3160             s = s[written.value:]
3161     return True
3162
3163
3164 def write_string(s, out=None, encoding=None):
3165     if out is None:
3166         out = sys.stderr
3167     assert type(s) == compat_str
3168
3169     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3170         if _windows_write_string(s, out):
3171             return
3172
3173     if ('b' in getattr(out, 'mode', '')
3174             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3175         byt = s.encode(encoding or preferredencoding(), 'ignore')
3176         out.write(byt)
3177     elif hasattr(out, 'buffer'):
3178         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3179         byt = s.encode(enc, 'ignore')
3180         out.buffer.write(byt)
3181     else:
3182         out.write(s)
3183     out.flush()
3184
3185
3186 def bytes_to_intlist(bs):
3187     if not bs:
3188         return []
3189     if isinstance(bs[0], int):  # Python 3
3190         return list(bs)
3191     else:
3192         return [ord(c) for c in bs]
3193
3194
3195 def intlist_to_bytes(xs):
3196     if not xs:
3197         return b''
3198     return compat_struct_pack('%dB' % len(xs), *xs)
3199
3200
3201 # Cross-platform file locking
3202 if sys.platform == 'win32':
3203     import ctypes.wintypes
3204     import msvcrt
3205
3206     class OVERLAPPED(ctypes.Structure):
3207         _fields_ = [
3208             ('Internal', ctypes.wintypes.LPVOID),
3209             ('InternalHigh', ctypes.wintypes.LPVOID),
3210             ('Offset', ctypes.wintypes.DWORD),
3211             ('OffsetHigh', ctypes.wintypes.DWORD),
3212             ('hEvent', ctypes.wintypes.HANDLE),
3213         ]
3214
3215     kernel32 = ctypes.windll.kernel32
3216     LockFileEx = kernel32.LockFileEx
3217     LockFileEx.argtypes = [
3218         ctypes.wintypes.HANDLE,     # hFile
3219         ctypes.wintypes.DWORD,      # dwFlags
3220         ctypes.wintypes.DWORD,      # dwReserved
3221         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3222         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3223         ctypes.POINTER(OVERLAPPED)  # Overlapped
3224     ]
3225     LockFileEx.restype = ctypes.wintypes.BOOL
3226     UnlockFileEx = kernel32.UnlockFileEx
3227     UnlockFileEx.argtypes = [
3228         ctypes.wintypes.HANDLE,     # hFile
3229         ctypes.wintypes.DWORD,      # dwReserved
3230         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3231         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3232         ctypes.POINTER(OVERLAPPED)  # Overlapped
3233     ]
3234     UnlockFileEx.restype = ctypes.wintypes.BOOL
3235     whole_low = 0xffffffff
3236     whole_high = 0x7fffffff
3237
3238     def _lock_file(f, exclusive):
3239         overlapped = OVERLAPPED()
3240         overlapped.Offset = 0
3241         overlapped.OffsetHigh = 0
3242         overlapped.hEvent = 0
3243         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3244         handle = msvcrt.get_osfhandle(f.fileno())
3245         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3246                           whole_low, whole_high, f._lock_file_overlapped_p):
3247             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3248
3249     def _unlock_file(f):
3250         assert f._lock_file_overlapped_p
3251         handle = msvcrt.get_osfhandle(f.fileno())
3252         if not UnlockFileEx(handle, 0,
3253                             whole_low, whole_high, f._lock_file_overlapped_p):
3254             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3255
3256 else:
3257     # Some platforms, such as Jython, is missing fcntl
3258     try:
3259         import fcntl
3260
3261         def _lock_file(f, exclusive):
3262             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3263
3264         def _unlock_file(f):
3265             fcntl.flock(f, fcntl.LOCK_UN)
3266     except ImportError:
3267         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3268
3269         def _lock_file(f, exclusive):
3270             raise IOError(UNSUPPORTED_MSG)
3271
3272         def _unlock_file(f):
3273             raise IOError(UNSUPPORTED_MSG)
3274
3275
3276 class locked_file(object):
3277     def __init__(self, filename, mode, encoding=None):
3278         assert mode in ['r', 'a', 'w']
3279         self.f = io.open(filename, mode, encoding=encoding)
3280         self.mode = mode
3281
3282     def __enter__(self):
3283         exclusive = self.mode != 'r'
3284         try:
3285             _lock_file(self.f, exclusive)
3286         except IOError:
3287             self.f.close()
3288             raise
3289         return self
3290
3291     def __exit__(self, etype, value, traceback):
3292         try:
3293             _unlock_file(self.f)
3294         finally:
3295             self.f.close()
3296
3297     def __iter__(self):
3298         return iter(self.f)
3299
3300     def write(self, *args):
3301         return self.f.write(*args)
3302
3303     def read(self, *args):
3304         return self.f.read(*args)
3305
3306
3307 def get_filesystem_encoding():
3308     encoding = sys.getfilesystemencoding()
3309     return encoding if encoding is not None else 'utf-8'
3310
3311
3312 def shell_quote(args):
3313     quoted_args = []
3314     encoding = get_filesystem_encoding()
3315     for a in args:
3316         if isinstance(a, bytes):
3317             # We may get a filename encoded with 'encodeFilename'
3318             a = a.decode(encoding)
3319         quoted_args.append(compat_shlex_quote(a))
3320     return ' '.join(quoted_args)
3321
3322
3323 def smuggle_url(url, data):
3324     """ Pass additional data in a URL for internal use. """
3325
3326     url, idata = unsmuggle_url(url, {})
3327     data.update(idata)
3328     sdata = compat_urllib_parse_urlencode(
3329         {'__youtubedl_smuggle': json.dumps(data)})
3330     return url + '#' + sdata
3331
3332
3333 def unsmuggle_url(smug_url, default=None):
3334     if '#__youtubedl_smuggle' not in smug_url:
3335         return smug_url, default
3336     url, _, sdata = smug_url.rpartition('#')
3337     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3338     data = json.loads(jsond)
3339     return url, data
3340
3341
3342 def format_bytes(bytes):
3343     if bytes is None:
3344         return 'N/A'
3345     if type(bytes) is str:
3346         bytes = float(bytes)
3347     if bytes == 0.0:
3348         exponent = 0
3349     else:
3350         exponent = int(math.log(bytes, 1024.0))
3351     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3352     converted = float(bytes) / float(1024 ** exponent)
3353     return '%.2f%s' % (converted, suffix)
3354
3355
3356 def lookup_unit_table(unit_table, s):
3357     units_re = '|'.join(re.escape(u) for u in unit_table)
3358     m = re.match(
3359         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3360     if not m:
3361         return None
3362     num_str = m.group('num').replace(',', '.')
3363     mult = unit_table[m.group('unit')]
3364     return int(float(num_str) * mult)
3365
3366
3367 def parse_filesize(s):
3368     if s is None:
3369         return None
3370
3371     # The lower-case forms are of course incorrect and unofficial,
3372     # but we support those too
3373     _UNIT_TABLE = {
3374         'B': 1,
3375         'b': 1,
3376         'bytes': 1,
3377         'KiB': 1024,
3378         'KB': 1000,
3379         'kB': 1024,
3380         'Kb': 1000,
3381         'kb': 1000,
3382         'kilobytes': 1000,
3383         'kibibytes': 1024,
3384         'MiB': 1024 ** 2,
3385         'MB': 1000 ** 2,
3386         'mB': 1024 ** 2,
3387         'Mb': 1000 ** 2,
3388         'mb': 1000 ** 2,
3389         'megabytes': 1000 ** 2,
3390         'mebibytes': 1024 ** 2,
3391         'GiB': 1024 ** 3,
3392         'GB': 1000 ** 3,
3393         'gB': 1024 ** 3,
3394         'Gb': 1000 ** 3,
3395         'gb': 1000 ** 3,
3396         'gigabytes': 1000 ** 3,
3397         'gibibytes': 1024 ** 3,
3398         'TiB': 1024 ** 4,
3399         'TB': 1000 ** 4,
3400         'tB': 1024 ** 4,
3401         'Tb': 1000 ** 4,
3402         'tb': 1000 ** 4,
3403         'terabytes': 1000 ** 4,
3404         'tebibytes': 1024 ** 4,
3405         'PiB': 1024 ** 5,
3406         'PB': 1000 ** 5,
3407         'pB': 1024 ** 5,
3408         'Pb': 1000 ** 5,
3409         'pb': 1000 ** 5,
3410         'petabytes': 1000 ** 5,
3411         'pebibytes': 1024 ** 5,
3412         'EiB': 1024 ** 6,
3413         'EB': 1000 ** 6,
3414         'eB': 1024 ** 6,
3415         'Eb': 1000 ** 6,
3416         'eb': 1000 ** 6,
3417         'exabytes': 1000 ** 6,
3418         'exbibytes': 1024 ** 6,
3419         'ZiB': 1024 ** 7,
3420         'ZB': 1000 ** 7,
3421         'zB': 1024 ** 7,
3422         'Zb': 1000 ** 7,
3423         'zb': 1000 ** 7,
3424         'zettabytes': 1000 ** 7,
3425         'zebibytes': 1024 ** 7,
3426         'YiB': 1024 ** 8,
3427         'YB': 1000 ** 8,
3428         'yB': 1024 ** 8,
3429         'Yb': 1000 ** 8,
3430         'yb': 1000 ** 8,
3431         'yottabytes': 1000 ** 8,
3432         'yobibytes': 1024 ** 8,
3433     }
3434
3435     return lookup_unit_table(_UNIT_TABLE, s)
3436
3437
3438 def parse_count(s):
3439     if s is None:
3440         return None
3441
3442     s = s.strip()
3443
3444     if re.match(r'^[\d,.]+$', s):
3445         return str_to_int(s)
3446
3447     _UNIT_TABLE = {
3448         'k': 1000,
3449         'K': 1000,
3450         'm': 1000 ** 2,
3451         'M': 1000 ** 2,
3452         'kk': 1000 ** 2,
3453         'KK': 1000 ** 2,
3454     }
3455
3456     return lookup_unit_table(_UNIT_TABLE, s)
3457
3458
3459 def parse_resolution(s):
3460     if s is None:
3461         return {}
3462
3463     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3464     if mobj:
3465         return {
3466             'width': int(mobj.group('w')),
3467             'height': int(mobj.group('h')),
3468         }
3469
3470     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3471     if mobj:
3472         return {'height': int(mobj.group(1))}
3473
3474     mobj = re.search(r'\b([48])[kK]\b', s)
3475     if mobj:
3476         return {'height': int(mobj.group(1)) * 540}
3477
3478     return {}
3479
3480
3481 def parse_bitrate(s):
3482     if not isinstance(s, compat_str):
3483         return
3484     mobj = re.search(r'\b(\d+)\s*kbps', s)
3485     if mobj:
3486         return int(mobj.group(1))
3487
3488
3489 def month_by_name(name, lang='en'):
3490     """ Return the number of a month by (locale-independently) English name """
3491
3492     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3493
3494     try:
3495         return month_names.index(name) + 1
3496     except ValueError:
3497         return None
3498
3499
3500 def month_by_abbreviation(abbrev):
3501     """ Return the number of a month by (locale-independently) English
3502         abbreviations """
3503
3504     try:
3505         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3506     except ValueError:
3507         return None
3508
3509
3510 def fix_xml_ampersands(xml_str):
3511     """Replace all the '&' by '&amp;' in XML"""
3512     return re.sub(
3513         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3514         '&amp;',
3515         xml_str)
3516
3517
3518 def setproctitle(title):
3519     assert isinstance(title, compat_str)
3520
3521     # ctypes in Jython is not complete
3522     # http://bugs.jython.org/issue2148
3523     if sys.platform.startswith('java'):
3524         return
3525
3526     try:
3527         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3528     except OSError:
3529         return
3530     except TypeError:
3531         # LoadLibrary in Windows Python 2.7.13 only expects
3532         # a bytestring, but since unicode_literals turns
3533         # every string into a unicode string, it fails.
3534         return
3535     title_bytes = title.encode('utf-8')
3536     buf = ctypes.create_string_buffer(len(title_bytes))
3537     buf.value = title_bytes
3538     try:
3539         libc.prctl(15, buf, 0, 0, 0)
3540     except AttributeError:
3541         return  # Strange libc, just skip this
3542
3543
3544 def remove_start(s, start):
3545     return s[len(start):] if s is not None and s.startswith(start) else s
3546
3547
3548 def remove_end(s, end):
3549     return s[:-len(end)] if s is not None and s.endswith(end) else s
3550
3551
3552 def remove_quotes(s):
3553     if s is None or len(s) < 2:
3554         return s
3555     for quote in ('"', "'", ):
3556         if s[0] == quote and s[-1] == quote:
3557             return s[1:-1]
3558     return s
3559
3560
3561 def url_basename(url):
3562     path = compat_urlparse.urlparse(url).path
3563     return path.strip('/').split('/')[-1]
3564
3565
3566 def base_url(url):
3567     return re.match(r'https?://[^?#&]+/', url).group()
3568
3569
3570 def urljoin(base, path):
3571     if isinstance(path, bytes):
3572         path = path.decode('utf-8')
3573     if not isinstance(path, compat_str) or not path:
3574         return None
3575     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3576         return path
3577     if isinstance(base, bytes):
3578         base = base.decode('utf-8')
3579     if not isinstance(base, compat_str) or not re.match(
3580             r'^(?:https?:)?//', base):
3581         return None
3582     return compat_urlparse.urljoin(base, path)
3583
3584
3585 class HEADRequest(compat_urllib_request.Request):
3586     def get_method(self):
3587         return 'HEAD'
3588
3589
3590 class PUTRequest(compat_urllib_request.Request):
3591     def get_method(self):
3592         return 'PUT'
3593
3594
3595 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3596     if get_attr:
3597         if v is not None:
3598             v = getattr(v, get_attr, None)
3599     if v == '':
3600         v = None
3601     if v is None:
3602         return default
3603     try:
3604         return int(v) * invscale // scale
3605     except (ValueError, TypeError):
3606         return default
3607
3608
3609 def str_or_none(v, default=None):
3610     return default if v is None else compat_str(v)
3611
3612
3613 def str_to_int(int_str):
3614     """ A more relaxed version of int_or_none """
3615     if isinstance(int_str, compat_integer_types):
3616         return int_str
3617     elif isinstance(int_str, compat_str):
3618         int_str = re.sub(r'[,\.\+]', '', int_str)
3619         return int_or_none(int_str)
3620
3621
3622 def float_or_none(v, scale=1, invscale=1, default=None):
3623     if v is None:
3624         return default
3625     try:
3626         return float(v) * invscale / scale
3627     except (ValueError, TypeError):
3628         return default
3629
3630
3631 def bool_or_none(v, default=None):
3632     return v if isinstance(v, bool) else default
3633
3634
3635 def strip_or_none(v, default=None):
3636     return v.strip() if isinstance(v, compat_str) else default
3637
3638
3639 def url_or_none(url):
3640     if not url or not isinstance(url, compat_str):
3641         return None
3642     url = url.strip()
3643     return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3644
3645
3646 def parse_duration(s):
3647     if not isinstance(s, compat_basestring):
3648         return None
3649
3650     s = s.strip()
3651
3652     days, hours, mins, secs, ms = [None] * 5
3653     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3654     if m:
3655         days, hours, mins, secs, ms = m.groups()
3656     else:
3657         m = re.match(
3658             r'''(?ix)(?:P?
3659                 (?:
3660                     [0-9]+\s*y(?:ears?)?\s*
3661                 )?
3662                 (?:
3663                     [0-9]+\s*m(?:onths?)?\s*
3664                 )?
3665                 (?:
3666                     [0-9]+\s*w(?:eeks?)?\s*
3667                 )?
3668                 (?:
3669                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3670                 )?
3671                 T)?
3672                 (?:
3673                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3674                 )?
3675                 (?:
3676                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3677                 )?
3678                 (?:
3679                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3680                 )?Z?$''', s)
3681         if m:
3682             days, hours, mins, secs, ms = m.groups()
3683         else:
3684             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3685             if m:
3686                 hours, mins = m.groups()
3687             else:
3688                 return None
3689
3690     duration = 0
3691     if secs:
3692         duration += float(secs)
3693     if mins:
3694         duration += float(mins) * 60
3695     if hours:
3696         duration += float(hours) * 60 * 60
3697     if days:
3698         duration += float(days) * 24 * 60 * 60
3699     if ms:
3700         duration += float(ms)
3701     return duration
3702
3703
3704 def prepend_extension(filename, ext, expected_real_ext=None):
3705     name, real_ext = os.path.splitext(filename)
3706     return (
3707         '{0}.{1}{2}'.format(name, ext, real_ext)
3708         if not expected_real_ext or real_ext[1:] == expected_real_ext
3709         else '{0}.{1}'.format(filename, ext))
3710
3711
3712 def replace_extension(filename, ext, expected_real_ext=None):
3713     name, real_ext = os.path.splitext(filename)
3714     return '{0}.{1}'.format(
3715         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3716         ext)
3717
3718
3719 def check_executable(exe, args=[]):
3720     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3721     args can be a list of arguments for a short output (like -version) """
3722     try:
3723         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3724     except OSError:
3725         return False
3726     return exe
3727
3728
3729 def get_exe_version(exe, args=['--version'],
3730                     version_re=None, unrecognized='present'):
3731     """ Returns the version of the specified executable,
3732     or False if the executable is not present """
3733     try:
3734         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3735         # SIGTTOU if youtube-dl is run in the background.
3736         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3737         out, _ = subprocess.Popen(
3738             [encodeArgument(exe)] + args,
3739             stdin=subprocess.PIPE,
3740             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3741     except OSError:
3742         return False
3743     if isinstance(out, bytes):  # Python 2.x
3744         out = out.decode('ascii', 'ignore')
3745     return detect_exe_version(out, version_re, unrecognized)
3746
3747
3748 def detect_exe_version(output, version_re=None, unrecognized='present'):
3749     assert isinstance(output, compat_str)
3750     if version_re is None:
3751         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3752     m = re.search(version_re, output)
3753     if m:
3754         return m.group(1)
3755     else:
3756         return unrecognized
3757
3758
3759 class PagedList(object):
3760     def __len__(self):
3761         # This is only useful for tests
3762         return len(self.getslice())
3763
3764
3765 class OnDemandPagedList(PagedList):
3766     def __init__(self, pagefunc, pagesize, use_cache=True):
3767         self._pagefunc = pagefunc
3768         self._pagesize = pagesize
3769         self._use_cache = use_cache
3770         if use_cache:
3771             self._cache = {}
3772
3773     def getslice(self, start=0, end=None):
3774         res = []
3775         for pagenum in itertools.count(start // self._pagesize):
3776             firstid = pagenum * self._pagesize
3777             nextfirstid = pagenum * self._pagesize + self._pagesize
3778             if start >= nextfirstid:
3779                 continue
3780
3781             page_results = None
3782             if self._use_cache:
3783                 page_results = self._cache.get(pagenum)
3784             if page_results is None:
3785                 page_results = list(self._pagefunc(pagenum))
3786             if self._use_cache:
3787                 self._cache[pagenum] = page_results
3788
3789             startv = (
3790                 start % self._pagesize
3791                 if firstid <= start < nextfirstid
3792                 else 0)
3793
3794             endv = (
3795                 ((end - 1) % self._pagesize) + 1
3796                 if (end is not None and firstid <= end <= nextfirstid)
3797                 else None)
3798
3799             if startv != 0 or endv is not None:
3800                 page_results = page_results[startv:endv]
3801             res.extend(page_results)
3802
3803             # A little optimization - if current page is not "full", ie. does
3804             # not contain page_size videos then we can assume that this page
3805             # is the last one - there are no more ids on further pages -
3806             # i.e. no need to query again.
3807             if len(page_results) + startv < self._pagesize:
3808                 break
3809
3810             # If we got the whole page, but the next page is not interesting,
3811             # break out early as well
3812             if end == nextfirstid:
3813                 break
3814         return res
3815
3816
3817 class InAdvancePagedList(PagedList):
3818     def __init__(self, pagefunc, pagecount, pagesize):
3819         self._pagefunc = pagefunc
3820         self._pagecount = pagecount
3821         self._pagesize = pagesize
3822
3823     def getslice(self, start=0, end=None):
3824         res = []
3825         start_page = start // self._pagesize
3826         end_page = (
3827             self._pagecount if end is None else (end // self._pagesize + 1))
3828         skip_elems = start - start_page * self._pagesize
3829         only_more = None if end is None else end - start
3830         for pagenum in range(start_page, end_page):
3831             page = list(self._pagefunc(pagenum))
3832             if skip_elems:
3833                 page = page[skip_elems:]
3834                 skip_elems = None
3835             if only_more is not None:
3836                 if len(page) < only_more:
3837                     only_more -= len(page)
3838                 else:
3839                     page = page[:only_more]
3840                     res.extend(page)
3841                     break
3842             res.extend(page)
3843         return res
3844
3845
3846 def uppercase_escape(s):
3847     unicode_escape = codecs.getdecoder('unicode_escape')
3848     return re.sub(
3849         r'\\U[0-9a-fA-F]{8}',
3850         lambda m: unicode_escape(m.group(0))[0],
3851         s)
3852
3853
3854 def lowercase_escape(s):
3855     unicode_escape = codecs.getdecoder('unicode_escape')
3856     return re.sub(
3857         r'\\u[0-9a-fA-F]{4}',
3858         lambda m: unicode_escape(m.group(0))[0],
3859         s)
3860
3861
3862 def escape_rfc3986(s):
3863     """Escape non-ASCII characters as suggested by RFC 3986"""
3864     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3865         s = s.encode('utf-8')
3866     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3867
3868
3869 def escape_url(url):
3870     """Escape URL as suggested by RFC 3986"""
3871     url_parsed = compat_urllib_parse_urlparse(url)
3872     return url_parsed._replace(
3873         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3874         path=escape_rfc3986(url_parsed.path),
3875         params=escape_rfc3986(url_parsed.params),
3876         query=escape_rfc3986(url_parsed.query),
3877         fragment=escape_rfc3986(url_parsed.fragment)
3878     ).geturl()
3879
3880
3881 def read_batch_urls(batch_fd):
3882     def fixup(url):
3883         if not isinstance(url, compat_str):
3884             url = url.decode('utf-8', 'replace')
3885         BOM_UTF8 = '\xef\xbb\xbf'
3886         if url.startswith(BOM_UTF8):
3887             url = url[len(BOM_UTF8):]
3888         url = url.strip()
3889         if url.startswith(('#', ';', ']')):
3890             return False
3891         return url
3892
3893     with contextlib.closing(batch_fd) as fd:
3894         return [url for url in map(fixup, fd) if url]
3895
3896
3897 def urlencode_postdata(*args, **kargs):
3898     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3899
3900
3901 def update_url_query(url, query):
3902     if not query:
3903         return url
3904     parsed_url = compat_urlparse.urlparse(url)
3905     qs = compat_parse_qs(parsed_url.query)
3906     qs.update(query)
3907     return compat_urlparse.urlunparse(parsed_url._replace(
3908         query=compat_urllib_parse_urlencode(qs, True)))
3909
3910
3911 def update_Request(req, url=None, data=None, headers={}, query={}):
3912     req_headers = req.headers.copy()
3913     req_headers.update(headers)
3914     req_data = data or req.data
3915     req_url = update_url_query(url or req.get_full_url(), query)
3916     req_get_method = req.get_method()
3917     if req_get_method == 'HEAD':
3918         req_type = HEADRequest
3919     elif req_get_method == 'PUT':
3920         req_type = PUTRequest
3921     else:
3922         req_type = compat_urllib_request.Request
3923     new_req = req_type(
3924         req_url, data=req_data, headers=req_headers,
3925         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3926     if hasattr(req, 'timeout'):
3927         new_req.timeout = req.timeout
3928     return new_req
3929
3930
3931 def _multipart_encode_impl(data, boundary):
3932     content_type = 'multipart/form-data; boundary=%s' % boundary
3933
3934     out = b''
3935     for k, v in data.items():
3936         out += b'--' + boundary.encode('ascii') + b'\r\n'
3937         if isinstance(k, compat_str):
3938             k = k.encode('utf-8')
3939         if isinstance(v, compat_str):
3940             v = v.encode('utf-8')
3941         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3942         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3943         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3944         if boundary.encode('ascii') in content:
3945             raise ValueError('Boundary overlaps with data')
3946         out += content
3947
3948     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3949
3950     return out, content_type
3951
3952
3953 def multipart_encode(data, boundary=None):
3954     '''
3955     Encode a dict to RFC 7578-compliant form-data
3956
3957     data:
3958         A dict where keys and values can be either Unicode or bytes-like
3959         objects.
3960     boundary:
3961         If specified a Unicode object, it's used as the boundary. Otherwise
3962         a random boundary is generated.
3963
3964     Reference: https://tools.ietf.org/html/rfc7578
3965     '''
3966     has_specified_boundary = boundary is not None
3967
3968     while True:
3969         if boundary is None:
3970             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3971
3972         try:
3973             out, content_type = _multipart_encode_impl(data, boundary)
3974             break
3975         except ValueError:
3976             if has_specified_boundary:
3977                 raise
3978             boundary = None
3979
3980     return out, content_type
3981
3982
3983 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3984     if isinstance(key_or_keys, (list, tuple)):
3985         for key in key_or_keys:
3986             if key not in d or d[key] is None or skip_false_values and not d[key]:
3987                 continue
3988             return d[key]
3989         return default
3990     return d.get(key_or_keys, default)
3991
3992
3993 def try_get(src, getter, expected_type=None):
3994     if not isinstance(getter, (list, tuple)):
3995         getter = [getter]
3996     for get in getter:
3997         try:
3998             v = get(src)
3999         except (AttributeError, KeyError, TypeError, IndexError):
4000             pass
4001         else:
4002             if expected_type is None or isinstance(v, expected_type):
4003                 return v
4004
4005
4006 def merge_dicts(*dicts):
4007     merged = {}
4008     for a_dict in dicts:
4009         for k, v in a_dict.items():
4010             if v is None:
4011                 continue
4012             if (k not in merged
4013                     or (isinstance(v, compat_str) and v
4014                         and isinstance(merged[k], compat_str)
4015                         and not merged[k])):
4016                 merged[k] = v
4017     return merged
4018
4019
4020 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4021     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4022
4023
4024 US_RATINGS = {
4025     'G': 0,
4026     'PG': 10,
4027     'PG-13': 13,
4028     'R': 16,
4029     'NC': 18,
4030 }
4031
4032
4033 TV_PARENTAL_GUIDELINES = {
4034     'TV-Y': 0,
4035     'TV-Y7': 7,
4036     'TV-G': 0,
4037     'TV-PG': 0,
4038     'TV-14': 14,
4039     'TV-MA': 17,
4040 }
4041
4042
4043 def parse_age_limit(s):
4044     if type(s) == int:
4045         return s if 0 <= s <= 21 else None
4046     if not isinstance(s, compat_basestring):
4047         return None
4048     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4049     if m:
4050         return int(m.group('age'))
4051     if s in US_RATINGS:
4052         return US_RATINGS[s]
4053     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4054     if m:
4055         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4056     return None
4057
4058
4059 def strip_jsonp(code):
4060     return re.sub(
4061         r'''(?sx)^
4062             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4063             (?:\s*&&\s*(?P=func_name))?
4064             \s*\(\s*(?P<callback_data>.*)\);?
4065             \s*?(?://[^\n]*)*$''',
4066         r'\g<callback_data>', code)
4067
4068
4069 def js_to_json(code):
4070     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4071     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4072     INTEGER_TABLE = (
4073         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4074         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4075     )
4076
4077     def fix_kv(m):
4078         v = m.group(0)
4079         if v in ('true', 'false', 'null'):
4080             return v
4081         elif v.startswith('/*') or v.startswith('//') or v == ',':
4082             return ""
4083
4084         if v[0] in ("'", '"'):
4085             v = re.sub(r'(?s)\\.|"', lambda m: {
4086                 '"': '\\"',
4087                 "\\'": "'",
4088                 '\\\n': '',
4089                 '\\x': '\\u00',
4090             }.get(m.group(0), m.group(0)), v[1:-1])
4091
4092         for regex, base in INTEGER_TABLE:
4093             im = re.match(regex, v)
4094             if im:
4095                 i = int(im.group(1), base)
4096                 return '"%d":' % i if v.endswith(':') else '%d' % i
4097
4098         return '"%s"' % v
4099
4100     return re.sub(r'''(?sx)
4101         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4102         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4103         {comment}|,(?={skip}[\]}}])|
4104         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4105         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4106         [0-9]+(?={skip}:)
4107         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4108
4109
4110 def qualities(quality_ids):
4111     """ Get a numeric quality value out of a list of possible values """
4112     def q(qid):
4113         try:
4114             return quality_ids.index(qid)
4115         except ValueError:
4116             return -1
4117     return q
4118
4119
4120 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4121
4122
4123 def limit_length(s, length):
4124     """ Add ellipses to overly long strings """
4125     if s is None:
4126         return None
4127     ELLIPSES = '...'
4128     if len(s) > length:
4129         return s[:length - len(ELLIPSES)] + ELLIPSES
4130     return s
4131
4132
4133 def version_tuple(v):
4134     return tuple(int(e) for e in re.split(r'[-.]', v))
4135
4136
4137 def is_outdated_version(version, limit, assume_new=True):
4138     if not version:
4139         return not assume_new
4140     try:
4141         return version_tuple(version) < version_tuple(limit)
4142     except ValueError:
4143         return not assume_new
4144
4145
4146 def ytdl_is_updateable():
4147     """ Returns if youtube-dl can be updated with -U """
4148     from zipimport import zipimporter
4149
4150     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4151
4152
4153 def args_to_str(args):
4154     # Get a short string representation for a subprocess command
4155     return ' '.join(compat_shlex_quote(a) for a in args)
4156
4157
4158 def error_to_compat_str(err):
4159     err_str = str(err)
4160     # On python 2 error byte string must be decoded with proper
4161     # encoding rather than ascii
4162     if sys.version_info[0] < 3:
4163         err_str = err_str.decode(preferredencoding())
4164     return err_str
4165
4166
4167 def mimetype2ext(mt):
4168     if mt is None:
4169         return None
4170
4171     ext = {
4172         'audio/mp4': 'm4a',
4173         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4174         # it's the most popular one
4175         'audio/mpeg': 'mp3',
4176     }.get(mt)
4177     if ext is not None:
4178         return ext
4179
4180     _, _, res = mt.rpartition('/')
4181     res = res.split(';')[0].strip().lower()
4182
4183     return {
4184         '3gpp': '3gp',
4185         'smptett+xml': 'tt',
4186         'ttaf+xml': 'dfxp',
4187         'ttml+xml': 'ttml',
4188         'x-flv': 'flv',
4189         'x-mp4-fragmented': 'mp4',
4190         'x-ms-sami': 'sami',
4191         'x-ms-wmv': 'wmv',
4192         'mpegurl': 'm3u8',
4193         'x-mpegurl': 'm3u8',
4194         'vnd.apple.mpegurl': 'm3u8',
4195         'dash+xml': 'mpd',
4196         'f4m+xml': 'f4m',
4197         'hds+xml': 'f4m',
4198         'vnd.ms-sstr+xml': 'ism',
4199         'quicktime': 'mov',
4200         'mp2t': 'ts',
4201         'x-wav': 'wav',
4202     }.get(res, res)
4203
4204
4205 def parse_codecs(codecs_str):
4206     # http://tools.ietf.org/html/rfc6381
4207     if not codecs_str:
4208         return {}
4209     splited_codecs = list(filter(None, map(
4210         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4211     vcodec, acodec = None, None
4212     for full_codec in splited_codecs:
4213         codec = full_codec.split('.')[0]
4214         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4215             if not vcodec:
4216                 vcodec = full_codec
4217         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4218             if not acodec:
4219                 acodec = full_codec
4220         else:
4221             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4222     if not vcodec and not acodec:
4223         if len(splited_codecs) == 2:
4224             return {
4225                 'vcodec': splited_codecs[0],
4226                 'acodec': splited_codecs[1],
4227             }
4228     else:
4229         return {
4230             'vcodec': vcodec or 'none',
4231             'acodec': acodec or 'none',
4232         }
4233     return {}
4234
4235
4236 def urlhandle_detect_ext(url_handle):
4237     getheader = url_handle.headers.get
4238
4239     cd = getheader('Content-Disposition')
4240     if cd:
4241         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4242         if m:
4243             e = determine_ext(m.group('filename'), default_ext=None)
4244             if e:
4245                 return e
4246
4247     return mimetype2ext(getheader('Content-Type'))
4248
4249
4250 def encode_data_uri(data, mime_type):
4251     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4252
4253
4254 def age_restricted(content_limit, age_limit):
4255     """ Returns True iff the content should be blocked """
4256
4257     if age_limit is None:  # No limit set
4258         return False
4259     if content_limit is None:
4260         return False  # Content available for everyone
4261     return age_limit < content_limit
4262
4263
4264 def is_html(first_bytes):
4265     """ Detect whether a file contains HTML by examining its first bytes. """
4266
4267     BOMS = [
4268         (b'\xef\xbb\xbf', 'utf-8'),
4269         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4270         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4271         (b'\xff\xfe', 'utf-16-le'),
4272         (b'\xfe\xff', 'utf-16-be'),
4273     ]
4274     for bom, enc in BOMS:
4275         if first_bytes.startswith(bom):
4276             s = first_bytes[len(bom):].decode(enc, 'replace')
4277             break
4278     else:
4279         s = first_bytes.decode('utf-8', 'replace')
4280
4281     return re.match(r'^\s*<', s)
4282
4283
4284 def determine_protocol(info_dict):
4285     protocol = info_dict.get('protocol')
4286     if protocol is not None:
4287         return protocol
4288
4289     url = info_dict['url']
4290     if url.startswith('rtmp'):
4291         return 'rtmp'
4292     elif url.startswith('mms'):
4293         return 'mms'
4294     elif url.startswith('rtsp'):
4295         return 'rtsp'
4296
4297     ext = determine_ext(url)
4298     if ext == 'm3u8':
4299         return 'm3u8'
4300     elif ext == 'f4m':
4301         return 'f4m'
4302
4303     return compat_urllib_parse_urlparse(url).scheme
4304
4305
4306 def render_table(header_row, data):
4307     """ Render a list of rows, each as a list of values """
4308     table = [header_row] + data
4309     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4310     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4311     return '\n'.join(format_str % tuple(row) for row in table)
4312
4313
4314 def _match_one(filter_part, dct):
4315     COMPARISON_OPERATORS = {
4316         '<': operator.lt,
4317         '<=': operator.le,
4318         '>': operator.gt,
4319         '>=': operator.ge,
4320         '=': operator.eq,
4321         '!=': operator.ne,
4322     }
4323     operator_rex = re.compile(r'''(?x)\s*
4324         (?P<key>[a-z_]+)
4325         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4326         (?:
4327             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4328             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4329             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4330         )
4331         \s*$
4332         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4333     m = operator_rex.search(filter_part)
4334     if m:
4335         op = COMPARISON_OPERATORS[m.group('op')]
4336         actual_value = dct.get(m.group('key'))
4337         if (m.group('quotedstrval') is not None
4338             or m.group('strval') is not None
4339             # If the original field is a string and matching comparisonvalue is
4340             # a number we should respect the origin of the original field
4341             # and process comparison value as a string (see
4342             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4343             or actual_value is not None and m.group('intval') is not None
4344                 and isinstance(actual_value, compat_str)):
4345             if m.group('op') not in ('=', '!='):
4346                 raise ValueError(
4347                     'Operator %s does not support string values!' % m.group('op'))
4348             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4349             quote = m.group('quote')
4350             if quote is not None:
4351                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4352         else:
4353             try:
4354                 comparison_value = int(m.group('intval'))
4355             except ValueError:
4356                 comparison_value = parse_filesize(m.group('intval'))
4357                 if comparison_value is None:
4358                     comparison_value = parse_filesize(m.group('intval') + 'B')
4359                 if comparison_value is None:
4360                     raise ValueError(
4361                         'Invalid integer value %r in filter part %r' % (
4362                             m.group('intval'), filter_part))
4363         if actual_value is None:
4364             return m.group('none_inclusive')
4365         return op(actual_value, comparison_value)
4366
4367     UNARY_OPERATORS = {
4368         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4369         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4370     }
4371     operator_rex = re.compile(r'''(?x)\s*
4372         (?P<op>%s)\s*(?P<key>[a-z_]+)
4373         \s*$
4374         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4375     m = operator_rex.search(filter_part)
4376     if m:
4377         op = UNARY_OPERATORS[m.group('op')]
4378         actual_value = dct.get(m.group('key'))
4379         return op(actual_value)
4380
4381     raise ValueError('Invalid filter part %r' % filter_part)
4382
4383
4384 def match_str(filter_str, dct):
4385     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4386
4387     return all(
4388         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4389
4390
4391 def match_filter_func(filter_str):
4392     def _match_func(info_dict):
4393         if match_str(filter_str, info_dict):
4394             return None
4395         else:
4396             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4397             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4398     return _match_func
4399
4400
4401 def parse_dfxp_time_expr(time_expr):
4402     if not time_expr:
4403         return
4404
4405     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4406     if mobj:
4407         return float(mobj.group('time_offset'))
4408
4409     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4410     if mobj:
4411         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4412
4413
4414 def srt_subtitles_timecode(seconds):
4415     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4416
4417
4418 def dfxp2srt(dfxp_data):
4419     '''
4420     @param dfxp_data A bytes-like object containing DFXP data
4421     @returns A unicode object containing converted SRT data
4422     '''
4423     LEGACY_NAMESPACES = (
4424         (b'http://www.w3.org/ns/ttml', [
4425             b'http://www.w3.org/2004/11/ttaf1',
4426             b'http://www.w3.org/2006/04/ttaf1',
4427             b'http://www.w3.org/2006/10/ttaf1',
4428         ]),
4429         (b'http://www.w3.org/ns/ttml#styling', [
4430             b'http://www.w3.org/ns/ttml#style',
4431         ]),
4432     )
4433
4434     SUPPORTED_STYLING = [
4435         'color',
4436         'fontFamily',
4437         'fontSize',
4438         'fontStyle',
4439         'fontWeight',
4440         'textDecoration'
4441     ]
4442
4443     _x = functools.partial(xpath_with_ns, ns_map={
4444         'xml': 'http://www.w3.org/XML/1998/namespace',
4445         'ttml': 'http://www.w3.org/ns/ttml',
4446         'tts': 'http://www.w3.org/ns/ttml#styling',
4447     })
4448
4449     styles = {}
4450     default_style = {}
4451
4452     class TTMLPElementParser(object):
4453         _out = ''
4454         _unclosed_elements = []
4455         _applied_styles = []
4456
4457         def start(self, tag, attrib):
4458             if tag in (_x('ttml:br'), 'br'):
4459                 self._out += '\n'
4460             else:
4461                 unclosed_elements = []
4462                 style = {}
4463                 element_style_id = attrib.get('style')
4464                 if default_style:
4465                     style.update(default_style)
4466                 if element_style_id:
4467                     style.update(styles.get(element_style_id, {}))
4468                 for prop in SUPPORTED_STYLING:
4469                     prop_val = attrib.get(_x('tts:' + prop))
4470                     if prop_val:
4471                         style[prop] = prop_val
4472                 if style:
4473                     font = ''
4474                     for k, v in sorted(style.items()):
4475                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4476                             continue
4477                         if k == 'color':
4478                             font += ' color="%s"' % v
4479                         elif k == 'fontSize':
4480                             font += ' size="%s"' % v
4481                         elif k == 'fontFamily':
4482                             font += ' face="%s"' % v
4483                         elif k == 'fontWeight' and v == 'bold':
4484                             self._out += '<b>'
4485                             unclosed_elements.append('b')
4486                         elif k == 'fontStyle' and v == 'italic':
4487                             self._out += '<i>'
4488                             unclosed_elements.append('i')
4489                         elif k == 'textDecoration' and v == 'underline':
4490                             self._out += '<u>'
4491                             unclosed_elements.append('u')
4492                     if font:
4493                         self._out += '<font' + font + '>'
4494                         unclosed_elements.append('font')
4495                     applied_style = {}
4496                     if self._applied_styles:
4497                         applied_style.update(self._applied_styles[-1])
4498                     applied_style.update(style)
4499                     self._applied_styles.append(applied_style)
4500                 self._unclosed_elements.append(unclosed_elements)
4501
4502         def end(self, tag):
4503             if tag not in (_x('ttml:br'), 'br'):
4504                 unclosed_elements = self._unclosed_elements.pop()
4505                 for element in reversed(unclosed_elements):
4506                     self._out += '</%s>' % element
4507                 if unclosed_elements and self._applied_styles:
4508                     self._applied_styles.pop()
4509
4510         def data(self, data):
4511             self._out += data
4512
4513         def close(self):
4514             return self._out.strip()
4515
4516     def parse_node(node):
4517         target = TTMLPElementParser()
4518         parser = xml.etree.ElementTree.XMLParser(target=target)
4519         parser.feed(xml.etree.ElementTree.tostring(node))
4520         return parser.close()
4521
4522     for k, v in LEGACY_NAMESPACES:
4523         for ns in v:
4524             dfxp_data = dfxp_data.replace(ns, k)
4525
4526     dfxp = compat_etree_fromstring(dfxp_data)
4527     out = []
4528     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4529
4530     if not paras:
4531         raise ValueError('Invalid dfxp/TTML subtitle')
4532
4533     repeat = False
4534     while True:
4535         for style in dfxp.findall(_x('.//ttml:style')):
4536             style_id = style.get('id') or style.get(_x('xml:id'))
4537             if not style_id:
4538                 continue
4539             parent_style_id = style.get('style')
4540             if parent_style_id:
4541                 if parent_style_id not in styles:
4542                     repeat = True
4543                     continue
4544                 styles[style_id] = styles[parent_style_id].copy()
4545             for prop in SUPPORTED_STYLING:
4546                 prop_val = style.get(_x('tts:' + prop))
4547                 if prop_val:
4548                     styles.setdefault(style_id, {})[prop] = prop_val
4549         if repeat:
4550             repeat = False
4551         else:
4552             break
4553
4554     for p in ('body', 'div'):
4555         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4556         if ele is None:
4557             continue
4558         style = styles.get(ele.get('style'))
4559         if not style:
4560             continue
4561         default_style.update(style)
4562
4563     for para, index in zip(paras, itertools.count(1)):
4564         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4565         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4566         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4567         if begin_time is None:
4568             continue
4569         if not end_time:
4570             if not dur:
4571                 continue
4572             end_time = begin_time + dur
4573         out.append('%d\n%s --> %s\n%s\n\n' % (
4574             index,
4575             srt_subtitles_timecode(begin_time),
4576             srt_subtitles_timecode(end_time),
4577             parse_node(para)))
4578
4579     return ''.join(out)
4580
4581
4582 def cli_option(params, command_option, param):
4583     param = params.get(param)
4584     if param:
4585         param = compat_str(param)
4586     return [command_option, param] if param is not None else []
4587
4588
4589 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4590     param = params.get(param)
4591     if param is None:
4592         return []
4593     assert isinstance(param, bool)
4594     if separator:
4595         return [command_option + separator + (true_value if param else false_value)]
4596     return [command_option, true_value if param else false_value]
4597
4598
4599 def cli_valueless_option(params, command_option, param, expected_value=True):
4600     param = params.get(param)
4601     return [command_option] if param == expected_value else []
4602
4603
4604 def cli_configuration_args(params, param, default=[]):
4605     ex_args = params.get(param)
4606     if ex_args is None:
4607         return default
4608     assert isinstance(ex_args, list)
4609     return ex_args
4610
4611
4612 class ISO639Utils(object):
4613     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4614     _lang_map = {
4615         'aa': 'aar',
4616         'ab': 'abk',
4617         'ae': 'ave',
4618         'af': 'afr',
4619         'ak': 'aka',
4620         'am': 'amh',
4621         'an': 'arg',
4622         'ar': 'ara',
4623         'as': 'asm',
4624         'av': 'ava',
4625         'ay': 'aym',
4626         'az': 'aze',
4627         'ba': 'bak',
4628         'be': 'bel',
4629         'bg': 'bul',
4630         'bh': 'bih',
4631         'bi': 'bis',
4632         'bm': 'bam',
4633         'bn': 'ben',
4634         'bo': 'bod',
4635         'br': 'bre',
4636         'bs': 'bos',
4637         'ca': 'cat',
4638         'ce': 'che',
4639         'ch': 'cha',
4640         'co': 'cos',
4641         'cr': 'cre',
4642         'cs': 'ces',
4643         'cu': 'chu',
4644         'cv': 'chv',
4645         'cy': 'cym',
4646         'da': 'dan',
4647         'de': 'deu',
4648         'dv': 'div',
4649         'dz': 'dzo',
4650         'ee': 'ewe',
4651         'el': 'ell',
4652         'en': 'eng',
4653         'eo': 'epo',
4654         'es': 'spa',
4655         'et': 'est',
4656         'eu': 'eus',
4657         'fa': 'fas',
4658         'ff': 'ful',
4659         'fi': 'fin',
4660         'fj': 'fij',
4661         'fo': 'fao',
4662         'fr': 'fra',
4663         'fy': 'fry',
4664         'ga': 'gle',
4665         'gd': 'gla',
4666         'gl': 'glg',
4667         'gn': 'grn',
4668         'gu': 'guj',
4669         'gv': 'glv',
4670         'ha': 'hau',
4671         'he': 'heb',
4672         'iw': 'heb',  # Replaced by he in 1989 revision
4673         'hi': 'hin',
4674         'ho': 'hmo',
4675         'hr': 'hrv',
4676         'ht': 'hat',
4677         'hu': 'hun',
4678         'hy': 'hye',
4679         'hz': 'her',
4680         'ia': 'ina',
4681         'id': 'ind',
4682         'in': 'ind',  # Replaced by id in 1989 revision
4683         'ie': 'ile',
4684         'ig': 'ibo',
4685         'ii': 'iii',
4686         'ik': 'ipk',
4687         'io': 'ido',
4688         'is': 'isl',
4689         'it': 'ita',
4690         'iu': 'iku',
4691         'ja': 'jpn',
4692         'jv': 'jav',
4693         'ka': 'kat',
4694         'kg': 'kon',
4695         'ki': 'kik',
4696         'kj': 'kua',
4697         'kk': 'kaz',
4698         'kl': 'kal',
4699         'km': 'khm',
4700         'kn': 'kan',
4701         'ko': 'kor',
4702         'kr': 'kau',
4703         'ks': 'kas',
4704         'ku': 'kur',
4705         'kv': 'kom',
4706         'kw': 'cor',
4707         'ky': 'kir',
4708         'la': 'lat',
4709         'lb': 'ltz',
4710         'lg': 'lug',
4711         'li': 'lim',
4712         'ln': 'lin',
4713         'lo': 'lao',
4714         'lt': 'lit',
4715         'lu': 'lub',
4716         'lv': 'lav',
4717         'mg': 'mlg',
4718         'mh': 'mah',
4719         'mi': 'mri',
4720         'mk': 'mkd',
4721         'ml': 'mal',
4722         'mn': 'mon',
4723         'mr': 'mar',
4724         'ms': 'msa',
4725         'mt': 'mlt',
4726         'my': 'mya',
4727         'na': 'nau',
4728         'nb': 'nob',
4729         'nd': 'nde',
4730         'ne': 'nep',
4731         'ng': 'ndo',
4732         'nl': 'nld',
4733         'nn': 'nno',
4734         'no': 'nor',
4735         'nr': 'nbl',
4736         'nv': 'nav',
4737         'ny': 'nya',
4738         'oc': 'oci',
4739         'oj': 'oji',
4740         'om': 'orm',
4741         'or': 'ori',
4742         'os': 'oss',
4743         'pa': 'pan',
4744         'pi': 'pli',
4745         'pl': 'pol',
4746         'ps': 'pus',
4747         'pt': 'por',
4748         'qu': 'que',
4749         'rm': 'roh',
4750         'rn': 'run',
4751         'ro': 'ron',
4752         'ru': 'rus',
4753         'rw': 'kin',
4754         'sa': 'san',
4755         'sc': 'srd',
4756         'sd': 'snd',
4757         'se': 'sme',
4758         'sg': 'sag',
4759         'si': 'sin',
4760         'sk': 'slk',
4761         'sl': 'slv',
4762         'sm': 'smo',
4763         'sn': 'sna',
4764         'so': 'som',
4765         'sq': 'sqi',
4766         'sr': 'srp',
4767         'ss': 'ssw',
4768         'st': 'sot',
4769         'su': 'sun',
4770         'sv': 'swe',
4771         'sw': 'swa',
4772         'ta': 'tam',
4773         'te': 'tel',
4774         'tg': 'tgk',
4775         'th': 'tha',
4776         'ti': 'tir',
4777         'tk': 'tuk',
4778         'tl': 'tgl',
4779         'tn': 'tsn',
4780         'to': 'ton',
4781         'tr': 'tur',
4782         'ts': 'tso',
4783         'tt': 'tat',
4784         'tw': 'twi',
4785         'ty': 'tah',
4786         'ug': 'uig',
4787         'uk': 'ukr',
4788         'ur': 'urd',
4789         'uz': 'uzb',
4790         've': 'ven',
4791         'vi': 'vie',
4792         'vo': 'vol',
4793         'wa': 'wln',
4794         'wo': 'wol',
4795         'xh': 'xho',
4796         'yi': 'yid',
4797         'ji': 'yid',  # Replaced by yi in 1989 revision
4798         'yo': 'yor',
4799         'za': 'zha',
4800         'zh': 'zho',
4801         'zu': 'zul',
4802     }
4803
4804     @classmethod
4805     def short2long(cls, code):
4806         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4807         return cls._lang_map.get(code[:2])
4808
4809     @classmethod
4810     def long2short(cls, code):
4811         """Convert language code from ISO 639-2/T to ISO 639-1"""
4812         for short_name, long_name in cls._lang_map.items():
4813             if long_name == code:
4814                 return short_name
4815
4816
4817 class ISO3166Utils(object):
4818     # From http://data.okfn.org/data/core/country-list
4819     _country_map = {
4820         'AF': 'Afghanistan',
4821         'AX': 'Åland Islands',
4822         'AL': 'Albania',
4823         'DZ': 'Algeria',
4824         'AS': 'American Samoa',
4825         'AD': 'Andorra',
4826         'AO': 'Angola',
4827         'AI': 'Anguilla',
4828         'AQ': 'Antarctica',
4829         'AG': 'Antigua and Barbuda',
4830         'AR': 'Argentina',
4831         'AM': 'Armenia',
4832         'AW': 'Aruba',
4833         'AU': 'Australia',
4834         'AT': 'Austria',
4835         'AZ': 'Azerbaijan',
4836         'BS': 'Bahamas',
4837         'BH': 'Bahrain',
4838         'BD': 'Bangladesh',
4839         'BB': 'Barbados',
4840         'BY': 'Belarus',
4841         'BE': 'Belgium',
4842         'BZ': 'Belize',
4843         'BJ': 'Benin',
4844         'BM': 'Bermuda',
4845         'BT': 'Bhutan',
4846         'BO': 'Bolivia, Plurinational State of',
4847         'BQ': 'Bonaire, Sint Eustatius and Saba',
4848         'BA': 'Bosnia and Herzegovina',
4849         'BW': 'Botswana',
4850         'BV': 'Bouvet Island',
4851         'BR': 'Brazil',
4852         'IO': 'British Indian Ocean Territory',
4853         'BN': 'Brunei Darussalam',
4854         'BG': 'Bulgaria',
4855         'BF': 'Burkina Faso',
4856         'BI': 'Burundi',
4857         'KH': 'Cambodia',
4858         'CM': 'Cameroon',
4859         'CA': 'Canada',
4860         'CV': 'Cape Verde',
4861         'KY': 'Cayman Islands',
4862         'CF': 'Central African Republic',
4863         'TD': 'Chad',
4864         'CL': 'Chile',
4865         'CN': 'China',
4866         'CX': 'Christmas Island',
4867         'CC': 'Cocos (Keeling) Islands',
4868         'CO': 'Colombia',
4869         'KM': 'Comoros',
4870         'CG': 'Congo',
4871         'CD': 'Congo, the Democratic Republic of the',
4872         'CK': 'Cook Islands',
4873         'CR': 'Costa Rica',
4874         'CI': 'Côte d\'Ivoire',
4875         'HR': 'Croatia',
4876         'CU': 'Cuba',
4877         'CW': 'Curaçao',
4878         'CY': 'Cyprus',
4879         'CZ': 'Czech Republic',
4880         'DK': 'Denmark',
4881         'DJ': 'Djibouti',
4882         'DM': 'Dominica',
4883         'DO': 'Dominican Republic',
4884         'EC': 'Ecuador',
4885         'EG': 'Egypt',
4886         'SV': 'El Salvador',
4887         'GQ': 'Equatorial Guinea',
4888         'ER': 'Eritrea',
4889         'EE': 'Estonia',
4890         'ET': 'Ethiopia',
4891         'FK': 'Falkland Islands (Malvinas)',
4892         'FO': 'Faroe Islands',
4893         'FJ': 'Fiji',
4894         'FI': 'Finland',
4895         'FR': 'France',
4896         'GF': 'French Guiana',
4897         'PF': 'French Polynesia',
4898         'TF': 'French Southern Territories',
4899         'GA': 'Gabon',
4900         'GM': 'Gambia',
4901         'GE': 'Georgia',
4902         'DE': 'Germany',
4903         'GH': 'Ghana',
4904         'GI': 'Gibraltar',
4905         'GR': 'Greece',
4906         'GL': 'Greenland',
4907         'GD': 'Grenada',
4908         'GP': 'Guadeloupe',
4909         'GU': 'Guam',
4910         'GT': 'Guatemala',
4911         'GG': 'Guernsey',
4912         'GN': 'Guinea',
4913         'GW': 'Guinea-Bissau',
4914         'GY': 'Guyana',
4915         'HT': 'Haiti',
4916         'HM': 'Heard Island and McDonald Islands',
4917         'VA': 'Holy See (Vatican City State)',
4918         'HN': 'Honduras',
4919         'HK': 'Hong Kong',
4920         'HU': 'Hungary',
4921         'IS': 'Iceland',
4922         'IN': 'India',
4923         'ID': 'Indonesia',
4924         'IR': 'Iran, Islamic Republic of',
4925         'IQ': 'Iraq',
4926         'IE': 'Ireland',
4927         'IM': 'Isle of Man',
4928         'IL': 'Israel',
4929         'IT': 'Italy',
4930         'JM': 'Jamaica',
4931         'JP': 'Japan',
4932         'JE': 'Jersey',
4933         'JO': 'Jordan',
4934         'KZ': 'Kazakhstan',
4935         'KE': 'Kenya',
4936         'KI': 'Kiribati',
4937         'KP': 'Korea, Democratic People\'s Republic of',
4938         'KR': 'Korea, Republic of',
4939         'KW': 'Kuwait',
4940         'KG': 'Kyrgyzstan',
4941         'LA': 'Lao People\'s Democratic Republic',
4942         'LV': 'Latvia',
4943         'LB': 'Lebanon',
4944         'LS': 'Lesotho',
4945         'LR': 'Liberia',
4946         'LY': 'Libya',
4947         'LI': 'Liechtenstein',
4948         'LT': 'Lithuania',
4949         'LU': 'Luxembourg',
4950         'MO': 'Macao',
4951         'MK': 'Macedonia, the Former Yugoslav Republic of',
4952         'MG': 'Madagascar',
4953         'MW': 'Malawi',
4954         'MY': 'Malaysia',
4955         'MV': 'Maldives',
4956         'ML': 'Mali',
4957         'MT': 'Malta',
4958         'MH': 'Marshall Islands',
4959         'MQ': 'Martinique',
4960         'MR': 'Mauritania',
4961         'MU': 'Mauritius',
4962         'YT': 'Mayotte',
4963         'MX': 'Mexico',
4964         'FM': 'Micronesia, Federated States of',
4965         'MD': 'Moldova, Republic of',
4966         'MC': 'Monaco',
4967         'MN': 'Mongolia',
4968         'ME': 'Montenegro',
4969         'MS': 'Montserrat',
4970         'MA': 'Morocco',
4971         'MZ': 'Mozambique',
4972         'MM': 'Myanmar',
4973         'NA': 'Namibia',
4974         'NR': 'Nauru',
4975         'NP': 'Nepal',
4976         'NL': 'Netherlands',
4977         'NC': 'New Caledonia',
4978         'NZ': 'New Zealand',
4979         'NI': 'Nicaragua',
4980         'NE': 'Niger',
4981         'NG': 'Nigeria',
4982         'NU': 'Niue',
4983         'NF': 'Norfolk Island',
4984         'MP': 'Northern Mariana Islands',
4985         'NO': 'Norway',
4986         'OM': 'Oman',
4987         'PK': 'Pakistan',
4988         'PW': 'Palau',
4989         'PS': 'Palestine, State of',
4990         'PA': 'Panama',
4991         'PG': 'Papua New Guinea',
4992         'PY': 'Paraguay',
4993         'PE': 'Peru',
4994         'PH': 'Philippines',
4995         'PN': 'Pitcairn',
4996         'PL': 'Poland',
4997         'PT': 'Portugal',
4998         'PR': 'Puerto Rico',
4999         'QA': 'Qatar',
5000         'RE': 'Réunion',
5001         'RO': 'Romania',
5002         'RU': 'Russian Federation',
5003         'RW': 'Rwanda',
5004         'BL': 'Saint Barthélemy',
5005         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5006         'KN': 'Saint Kitts and Nevis',
5007         'LC': 'Saint Lucia',
5008         'MF': 'Saint Martin (French part)',
5009         'PM': 'Saint Pierre and Miquelon',
5010         'VC': 'Saint Vincent and the Grenadines',
5011         'WS': 'Samoa',
5012         'SM': 'San Marino',
5013         'ST': 'Sao Tome and Principe',
5014         'SA': 'Saudi Arabia',
5015         'SN': 'Senegal',
5016         'RS': 'Serbia',
5017         'SC': 'Seychelles',
5018         'SL': 'Sierra Leone',
5019         'SG': 'Singapore',
5020         'SX': 'Sint Maarten (Dutch part)',
5021         'SK': 'Slovakia',
5022         'SI': 'Slovenia',
5023         'SB': 'Solomon Islands',
5024         'SO': 'Somalia',
5025         'ZA': 'South Africa',
5026         'GS': 'South Georgia and the South Sandwich Islands',
5027         'SS': 'South Sudan',
5028         'ES': 'Spain',
5029         'LK': 'Sri Lanka',
5030         'SD': 'Sudan',
5031         'SR': 'Suriname',
5032         'SJ': 'Svalbard and Jan Mayen',
5033         'SZ': 'Swaziland',
5034         'SE': 'Sweden',
5035         'CH': 'Switzerland',
5036         'SY': 'Syrian Arab Republic',
5037         'TW': 'Taiwan, Province of China',
5038         'TJ': 'Tajikistan',
5039         'TZ': 'Tanzania, United Republic of',
5040         'TH': 'Thailand',
5041         'TL': 'Timor-Leste',
5042         'TG': 'Togo',
5043         'TK': 'Tokelau',
5044         'TO': 'Tonga',
5045         'TT': 'Trinidad and Tobago',
5046         'TN': 'Tunisia',
5047         'TR': 'Turkey',
5048         'TM': 'Turkmenistan',
5049         'TC': 'Turks and Caicos Islands',
5050         'TV': 'Tuvalu',
5051         'UG': 'Uganda',
5052         'UA': 'Ukraine',
5053         'AE': 'United Arab Emirates',
5054         'GB': 'United Kingdom',
5055         'US': 'United States',
5056         'UM': 'United States Minor Outlying Islands',
5057         'UY': 'Uruguay',
5058         'UZ': 'Uzbekistan',
5059         'VU': 'Vanuatu',
5060         'VE': 'Venezuela, Bolivarian Republic of',
5061         'VN': 'Viet Nam',
5062         'VG': 'Virgin Islands, British',
5063         'VI': 'Virgin Islands, U.S.',
5064         'WF': 'Wallis and Futuna',
5065         'EH': 'Western Sahara',
5066         'YE': 'Yemen',
5067         'ZM': 'Zambia',
5068         'ZW': 'Zimbabwe',
5069     }
5070
5071     @classmethod
5072     def short2full(cls, code):
5073         """Convert an ISO 3166-2 country code to the corresponding full name"""
5074         return cls._country_map.get(code.upper())
5075
5076
5077 class GeoUtils(object):
5078     # Major IPv4 address blocks per country
5079     _country_ip_map = {
5080         'AD': '46.172.224.0/19',
5081         'AE': '94.200.0.0/13',
5082         'AF': '149.54.0.0/17',
5083         'AG': '209.59.64.0/18',
5084         'AI': '204.14.248.0/21',
5085         'AL': '46.99.0.0/16',
5086         'AM': '46.70.0.0/15',
5087         'AO': '105.168.0.0/13',
5088         'AP': '182.50.184.0/21',
5089         'AQ': '23.154.160.0/24',
5090         'AR': '181.0.0.0/12',
5091         'AS': '202.70.112.0/20',
5092         'AT': '77.116.0.0/14',
5093         'AU': '1.128.0.0/11',
5094         'AW': '181.41.0.0/18',
5095         'AX': '185.217.4.0/22',
5096         'AZ': '5.197.0.0/16',
5097         'BA': '31.176.128.0/17',
5098         'BB': '65.48.128.0/17',
5099         'BD': '114.130.0.0/16',
5100         'BE': '57.0.0.0/8',
5101         'BF': '102.178.0.0/15',
5102         'BG': '95.42.0.0/15',
5103         'BH': '37.131.0.0/17',
5104         'BI': '154.117.192.0/18',
5105         'BJ': '137.255.0.0/16',
5106         'BL': '185.212.72.0/23',
5107         'BM': '196.12.64.0/18',
5108         'BN': '156.31.0.0/16',
5109         'BO': '161.56.0.0/16',
5110         'BQ': '161.0.80.0/20',
5111         'BR': '191.128.0.0/12',
5112         'BS': '24.51.64.0/18',
5113         'BT': '119.2.96.0/19',
5114         'BW': '168.167.0.0/16',
5115         'BY': '178.120.0.0/13',
5116         'BZ': '179.42.192.0/18',
5117         'CA': '99.224.0.0/11',
5118         'CD': '41.243.0.0/16',
5119         'CF': '197.242.176.0/21',
5120         'CG': '160.113.0.0/16',
5121         'CH': '85.0.0.0/13',
5122         'CI': '102.136.0.0/14',
5123         'CK': '202.65.32.0/19',
5124         'CL': '152.172.0.0/14',
5125         'CM': '102.244.0.0/14',
5126         'CN': '36.128.0.0/10',
5127         'CO': '181.240.0.0/12',
5128         'CR': '201.192.0.0/12',
5129         'CU': '152.206.0.0/15',
5130         'CV': '165.90.96.0/19',
5131         'CW': '190.88.128.0/17',
5132         'CY': '31.153.0.0/16',
5133         'CZ': '88.100.0.0/14',
5134         'DE': '53.0.0.0/8',
5135         'DJ': '197.241.0.0/17',
5136         'DK': '87.48.0.0/12',
5137         'DM': '192.243.48.0/20',
5138         'DO': '152.166.0.0/15',
5139         'DZ': '41.96.0.0/12',
5140         'EC': '186.68.0.0/15',
5141         'EE': '90.190.0.0/15',
5142         'EG': '156.160.0.0/11',
5143         'ER': '196.200.96.0/20',
5144         'ES': '88.0.0.0/11',
5145         'ET': '196.188.0.0/14',
5146         'EU': '2.16.0.0/13',
5147         'FI': '91.152.0.0/13',
5148         'FJ': '144.120.0.0/16',
5149         'FK': '80.73.208.0/21',
5150         'FM': '119.252.112.0/20',
5151         'FO': '88.85.32.0/19',
5152         'FR': '90.0.0.0/9',
5153         'GA': '41.158.0.0/15',
5154         'GB': '25.0.0.0/8',
5155         'GD': '74.122.88.0/21',
5156         'GE': '31.146.0.0/16',
5157         'GF': '161.22.64.0/18',
5158         'GG': '62.68.160.0/19',
5159         'GH': '154.160.0.0/12',
5160         'GI': '95.164.0.0/16',
5161         'GL': '88.83.0.0/19',
5162         'GM': '160.182.0.0/15',
5163         'GN': '197.149.192.0/18',
5164         'GP': '104.250.0.0/19',
5165         'GQ': '105.235.224.0/20',
5166         'GR': '94.64.0.0/13',
5167         'GT': '168.234.0.0/16',
5168         'GU': '168.123.0.0/16',
5169         'GW': '197.214.80.0/20',
5170         'GY': '181.41.64.0/18',
5171         'HK': '113.252.0.0/14',
5172         'HN': '181.210.0.0/16',
5173         'HR': '93.136.0.0/13',
5174         'HT': '148.102.128.0/17',
5175         'HU': '84.0.0.0/14',
5176         'ID': '39.192.0.0/10',
5177         'IE': '87.32.0.0/12',
5178         'IL': '79.176.0.0/13',
5179         'IM': '5.62.80.0/20',
5180         'IN': '117.192.0.0/10',
5181         'IO': '203.83.48.0/21',
5182         'IQ': '37.236.0.0/14',
5183         'IR': '2.176.0.0/12',
5184         'IS': '82.221.0.0/16',
5185         'IT': '79.0.0.0/10',
5186         'JE': '87.244.64.0/18',
5187         'JM': '72.27.0.0/17',
5188         'JO': '176.29.0.0/16',
5189         'JP': '133.0.0.0/8',
5190         'KE': '105.48.0.0/12',
5191         'KG': '158.181.128.0/17',
5192         'KH': '36.37.128.0/17',
5193         'KI': '103.25.140.0/22',
5194         'KM': '197.255.224.0/20',
5195         'KN': '198.167.192.0/19',
5196         'KP': '175.45.176.0/22',
5197         'KR': '175.192.0.0/10',
5198         'KW': '37.36.0.0/14',
5199         'KY': '64.96.0.0/15',
5200         'KZ': '2.72.0.0/13',
5201         'LA': '115.84.64.0/18',
5202         'LB': '178.135.0.0/16',
5203         'LC': '24.92.144.0/20',
5204         'LI': '82.117.0.0/19',
5205         'LK': '112.134.0.0/15',
5206         'LR': '102.183.0.0/16',
5207         'LS': '129.232.0.0/17',
5208         'LT': '78.56.0.0/13',
5209         'LU': '188.42.0.0/16',
5210         'LV': '46.109.0.0/16',
5211         'LY': '41.252.0.0/14',
5212         'MA': '105.128.0.0/11',
5213         'MC': '88.209.64.0/18',
5214         'MD': '37.246.0.0/16',
5215         'ME': '178.175.0.0/17',
5216         'MF': '74.112.232.0/21',
5217         'MG': '154.126.0.0/17',
5218         'MH': '117.103.88.0/21',
5219         'MK': '77.28.0.0/15',
5220         'ML': '154.118.128.0/18',
5221         'MM': '37.111.0.0/17',
5222         'MN': '49.0.128.0/17',
5223         'MO': '60.246.0.0/16',
5224         'MP': '202.88.64.0/20',
5225         'MQ': '109.203.224.0/19',
5226         'MR': '41.188.64.0/18',
5227         'MS': '208.90.112.0/22',
5228         'MT': '46.11.0.0/16',
5229         'MU': '105.16.0.0/12',
5230         'MV': '27.114.128.0/18',
5231         'MW': '102.70.0.0/15',
5232         'MX': '187.192.0.0/11',
5233         'MY': '175.136.0.0/13',
5234         'MZ': '197.218.0.0/15',
5235         'NA': '41.182.0.0/16',
5236         'NC': '101.101.0.0/18',
5237         'NE': '197.214.0.0/18',
5238         'NF': '203.17.240.0/22',
5239         'NG': '105.112.0.0/12',
5240         'NI': '186.76.0.0/15',
5241         'NL': '145.96.0.0/11',
5242         'NO': '84.208.0.0/13',
5243         'NP': '36.252.0.0/15',
5244         'NR': '203.98.224.0/19',
5245         'NU': '49.156.48.0/22',
5246         'NZ': '49.224.0.0/14',
5247         'OM': '5.36.0.0/15',
5248         'PA': '186.72.0.0/15',
5249         'PE': '186.160.0.0/14',
5250         'PF': '123.50.64.0/18',
5251         'PG': '124.240.192.0/19',
5252         'PH': '49.144.0.0/13',
5253         'PK': '39.32.0.0/11',
5254         'PL': '83.0.0.0/11',
5255         'PM': '70.36.0.0/20',
5256         'PR': '66.50.0.0/16',
5257         'PS': '188.161.0.0/16',
5258         'PT': '85.240.0.0/13',
5259         'PW': '202.124.224.0/20',
5260         'PY': '181.120.0.0/14',
5261         'QA': '37.210.0.0/15',
5262         'RE': '102.35.0.0/16',
5263         'RO': '79.112.0.0/13',
5264         'RS': '93.86.0.0/15',
5265         'RU': '5.136.0.0/13',
5266         'RW': '41.186.0.0/16',
5267         'SA': '188.48.0.0/13',
5268         'SB': '202.1.160.0/19',
5269         'SC': '154.192.0.0/11',
5270         'SD': '102.120.0.0/13',
5271         'SE': '78.64.0.0/12',
5272         'SG': '8.128.0.0/10',
5273         'SI': '188.196.0.0/14',
5274         'SK': '78.98.0.0/15',
5275         'SL': '102.143.0.0/17',
5276         'SM': '89.186.32.0/19',
5277         'SN': '41.82.0.0/15',
5278         'SO': '154.115.192.0/18',
5279         'SR': '186.179.128.0/17',
5280         'SS': '105.235.208.0/21',
5281         'ST': '197.159.160.0/19',
5282         'SV': '168.243.0.0/16',
5283         'SX': '190.102.0.0/20',
5284         'SY': '5.0.0.0/16',
5285         'SZ': '41.84.224.0/19',
5286         'TC': '65.255.48.0/20',
5287         'TD': '154.68.128.0/19',
5288         'TG': '196.168.0.0/14',
5289         'TH': '171.96.0.0/13',
5290         'TJ': '85.9.128.0/18',
5291         'TK': '27.96.24.0/21',
5292         'TL': '180.189.160.0/20',
5293         'TM': '95.85.96.0/19',
5294         'TN': '197.0.0.0/11',
5295         'TO': '175.176.144.0/21',
5296         'TR': '78.160.0.0/11',
5297         'TT': '186.44.0.0/15',
5298         'TV': '202.2.96.0/19',
5299         'TW': '120.96.0.0/11',
5300         'TZ': '156.156.0.0/14',
5301         'UA': '37.52.0.0/14',
5302         'UG': '102.80.0.0/13',
5303         'US': '6.0.0.0/8',
5304         'UY': '167.56.0.0/13',
5305         'UZ': '84.54.64.0/18',
5306         'VA': '212.77.0.0/19',
5307         'VC': '207.191.240.0/21',
5308         'VE': '186.88.0.0/13',
5309         'VG': '66.81.192.0/20',
5310         'VI': '146.226.0.0/16',
5311         'VN': '14.160.0.0/11',
5312         'VU': '202.80.32.0/20',
5313         'WF': '117.20.32.0/21',
5314         'WS': '202.4.32.0/19',
5315         'YE': '134.35.0.0/16',
5316         'YT': '41.242.116.0/22',
5317         'ZA': '41.0.0.0/11',
5318         'ZM': '102.144.0.0/13',
5319         'ZW': '102.177.192.0/18',
5320     }
5321
5322     @classmethod
5323     def random_ipv4(cls, code_or_block):
5324         if len(code_or_block) == 2:
5325             block = cls._country_ip_map.get(code_or_block.upper())
5326             if not block:
5327                 return None
5328         else:
5329             block = code_or_block
5330         addr, preflen = block.split('/')
5331         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5332         addr_max = addr_min | (0xffffffff >> int(preflen))
5333         return compat_str(socket.inet_ntoa(
5334             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5335
5336
5337 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5338     def __init__(self, proxies=None):
5339         # Set default handlers
5340         for type in ('http', 'https'):
5341             setattr(self, '%s_open' % type,
5342                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5343                         meth(r, proxy, type))
5344         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5345
5346     def proxy_open(self, req, proxy, type):
5347         req_proxy = req.headers.get('Ytdl-request-proxy')
5348         if req_proxy is not None:
5349             proxy = req_proxy
5350             del req.headers['Ytdl-request-proxy']
5351
5352         if proxy == '__noproxy__':
5353             return None  # No Proxy
5354         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5355             req.add_header('Ytdl-socks-proxy', proxy)
5356             # youtube-dl's http/https handlers do wrapping the socket with socks
5357             return None
5358         return compat_urllib_request.ProxyHandler.proxy_open(
5359             self, req, proxy, type)
5360
5361
5362 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5363 # released into Public Domain
5364 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5365
5366 def long_to_bytes(n, blocksize=0):
5367     """long_to_bytes(n:long, blocksize:int) : string
5368     Convert a long integer to a byte string.
5369
5370     If optional blocksize is given and greater than zero, pad the front of the
5371     byte string with binary zeros so that the length is a multiple of
5372     blocksize.
5373     """
5374     # after much testing, this algorithm was deemed to be the fastest
5375     s = b''
5376     n = int(n)
5377     while n > 0:
5378         s = compat_struct_pack('>I', n & 0xffffffff) + s
5379         n = n >> 32
5380     # strip off leading zeros
5381     for i in range(len(s)):
5382         if s[i] != b'\000'[0]:
5383             break
5384     else:
5385         # only happens when n == 0
5386         s = b'\000'
5387         i = 0
5388     s = s[i:]
5389     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5390     # de-padding being done above, but sigh...
5391     if blocksize > 0 and len(s) % blocksize:
5392         s = (blocksize - len(s) % blocksize) * b'\000' + s
5393     return s
5394
5395
5396 def bytes_to_long(s):
5397     """bytes_to_long(string) : long
5398     Convert a byte string to a long integer.
5399
5400     This is (essentially) the inverse of long_to_bytes().
5401     """
5402     acc = 0
5403     length = len(s)
5404     if length % 4:
5405         extra = (4 - length % 4)
5406         s = b'\000' * extra + s
5407         length = length + extra
5408     for i in range(0, length, 4):
5409         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5410     return acc
5411
5412
5413 def ohdave_rsa_encrypt(data, exponent, modulus):
5414     '''
5415     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5416
5417     Input:
5418         data: data to encrypt, bytes-like object
5419         exponent, modulus: parameter e and N of RSA algorithm, both integer
5420     Output: hex string of encrypted data
5421
5422     Limitation: supports one block encryption only
5423     '''
5424
5425     payload = int(binascii.hexlify(data[::-1]), 16)
5426     encrypted = pow(payload, exponent, modulus)
5427     return '%x' % encrypted
5428
5429
5430 def pkcs1pad(data, length):
5431     """
5432     Padding input data with PKCS#1 scheme
5433
5434     @param {int[]} data        input data
5435     @param {int}   length      target length
5436     @returns {int[]}           padded data
5437     """
5438     if len(data) > length - 11:
5439         raise ValueError('Input data too long for PKCS#1 padding')
5440
5441     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5442     return [0, 2] + pseudo_random + [0] + data
5443
5444
5445 def encode_base_n(num, n, table=None):
5446     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5447     if not table:
5448         table = FULL_TABLE[:n]
5449
5450     if n > len(table):
5451         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5452
5453     if num == 0:
5454         return table[0]
5455
5456     ret = ''
5457     while num:
5458         ret = table[num % n] + ret
5459         num = num // n
5460     return ret
5461
5462
5463 def decode_packed_codes(code):
5464     mobj = re.search(PACKED_CODES_RE, code)
5465     obfucasted_code, base, count, symbols = mobj.groups()
5466     base = int(base)
5467     count = int(count)
5468     symbols = symbols.split('|')
5469     symbol_table = {}
5470
5471     while count:
5472         count -= 1
5473         base_n_count = encode_base_n(count, base)
5474         symbol_table[base_n_count] = symbols[count] or base_n_count
5475
5476     return re.sub(
5477         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5478         obfucasted_code)
5479
5480
5481 def caesar(s, alphabet, shift):
5482     if shift == 0:
5483         return s
5484     l = len(alphabet)
5485     return ''.join(
5486         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5487         for c in s)
5488
5489
5490 def rot47(s):
5491     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5492
5493
5494 def parse_m3u8_attributes(attrib):
5495     info = {}
5496     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5497         if val.startswith('"'):
5498             val = val[1:-1]
5499         info[key] = val
5500     return info
5501
5502
5503 def urshift(val, n):
5504     return val >> n if val >= 0 else (val + 0x100000000) >> n
5505
5506
5507 # Based on png2str() written by @gdkchan and improved by @yokrysty
5508 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5509 def decode_png(png_data):
5510     # Reference: https://www.w3.org/TR/PNG/
5511     header = png_data[8:]
5512
5513     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5514         raise IOError('Not a valid PNG file.')
5515
5516     int_map = {1: '>B', 2: '>H', 4: '>I'}
5517     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5518
5519     chunks = []
5520
5521     while header:
5522         length = unpack_integer(header[:4])
5523         header = header[4:]
5524
5525         chunk_type = header[:4]
5526         header = header[4:]
5527
5528         chunk_data = header[:length]
5529         header = header[length:]
5530
5531         header = header[4:]  # Skip CRC
5532
5533         chunks.append({
5534             'type': chunk_type,
5535             'length': length,
5536             'data': chunk_data
5537         })
5538
5539     ihdr = chunks[0]['data']
5540
5541     width = unpack_integer(ihdr[:4])
5542     height = unpack_integer(ihdr[4:8])
5543
5544     idat = b''
5545
5546     for chunk in chunks:
5547         if chunk['type'] == b'IDAT':
5548             idat += chunk['data']
5549
5550     if not idat:
5551         raise IOError('Unable to read PNG data.')
5552
5553     decompressed_data = bytearray(zlib.decompress(idat))
5554
5555     stride = width * 3
5556     pixels = []
5557
5558     def _get_pixel(idx):
5559         x = idx % stride
5560         y = idx // stride
5561         return pixels[y][x]
5562
5563     for y in range(height):
5564         basePos = y * (1 + stride)
5565         filter_type = decompressed_data[basePos]
5566
5567         current_row = []
5568
5569         pixels.append(current_row)
5570
5571         for x in range(stride):
5572             color = decompressed_data[1 + basePos + x]
5573             basex = y * stride + x
5574             left = 0
5575             up = 0
5576
5577             if x > 2:
5578                 left = _get_pixel(basex - 3)
5579             if y > 0:
5580                 up = _get_pixel(basex - stride)
5581
5582             if filter_type == 1:  # Sub
5583                 color = (color + left) & 0xff
5584             elif filter_type == 2:  # Up
5585                 color = (color + up) & 0xff
5586             elif filter_type == 3:  # Average
5587                 color = (color + ((left + up) >> 1)) & 0xff
5588             elif filter_type == 4:  # Paeth
5589                 a = left
5590                 b = up
5591                 c = 0
5592
5593                 if x > 2 and y > 0:
5594                     c = _get_pixel(basex - stride - 3)
5595
5596                 p = a + b - c
5597
5598                 pa = abs(p - a)
5599                 pb = abs(p - b)
5600                 pc = abs(p - c)
5601
5602                 if pa <= pb and pa <= pc:
5603                     color = (color + a) & 0xff
5604                 elif pb <= pc:
5605                     color = (color + b) & 0xff
5606                 else:
5607                     color = (color + c) & 0xff
5608
5609             current_row.append(color)
5610
5611     return width, height, pixels
5612
5613
5614 def write_xattr(path, key, value):
5615     # This mess below finds the best xattr tool for the job
5616     try:
5617         # try the pyxattr module...
5618         import xattr
5619
5620         if hasattr(xattr, 'set'):  # pyxattr
5621             # Unicode arguments are not supported in python-pyxattr until
5622             # version 0.5.0
5623             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5624             pyxattr_required_version = '0.5.0'
5625             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5626                 # TODO: fallback to CLI tools
5627                 raise XAttrUnavailableError(
5628                     'python-pyxattr is detected but is too old. '
5629                     'youtube-dl requires %s or above while your version is %s. '
5630                     'Falling back to other xattr implementations' % (
5631                         pyxattr_required_version, xattr.__version__))
5632
5633             setxattr = xattr.set
5634         else:  # xattr
5635             setxattr = xattr.setxattr
5636
5637         try:
5638             setxattr(path, key, value)
5639         except EnvironmentError as e:
5640             raise XAttrMetadataError(e.errno, e.strerror)
5641
5642     except ImportError:
5643         if compat_os_name == 'nt':
5644             # Write xattrs to NTFS Alternate Data Streams:
5645             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5646             assert ':' not in key
5647             assert os.path.exists(path)
5648
5649             ads_fn = path + ':' + key
5650             try:
5651                 with open(ads_fn, 'wb') as f:
5652                     f.write(value)
5653             except EnvironmentError as e:
5654                 raise XAttrMetadataError(e.errno, e.strerror)
5655         else:
5656             user_has_setfattr = check_executable('setfattr', ['--version'])
5657             user_has_xattr = check_executable('xattr', ['-h'])
5658
5659             if user_has_setfattr or user_has_xattr:
5660
5661                 value = value.decode('utf-8')
5662                 if user_has_setfattr:
5663                     executable = 'setfattr'
5664                     opts = ['-n', key, '-v', value]
5665                 elif user_has_xattr:
5666                     executable = 'xattr'
5667                     opts = ['-w', key, value]
5668
5669                 cmd = ([encodeFilename(executable, True)]
5670                        + [encodeArgument(o) for o in opts]
5671                        + [encodeFilename(path, True)])
5672
5673                 try:
5674                     p = subprocess.Popen(
5675                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5676                 except EnvironmentError as e:
5677                     raise XAttrMetadataError(e.errno, e.strerror)
5678                 stdout, stderr = p.communicate()
5679                 stderr = stderr.decode('utf-8', 'replace')
5680                 if p.returncode != 0:
5681                     raise XAttrMetadataError(p.returncode, stderr)
5682
5683             else:
5684                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5685                 if sys.platform.startswith('linux'):
5686                     raise XAttrUnavailableError(
5687                         "Couldn't find a tool to set the xattrs. "
5688                         "Install either the python 'pyxattr' or 'xattr' "
5689                         "modules, or the GNU 'attr' package "
5690                         "(which contains the 'setfattr' tool).")
5691                 else:
5692                     raise XAttrUnavailableError(
5693                         "Couldn't find a tool to set the xattrs. "
5694                         "Install either the python 'xattr' module, "
5695                         "or the 'xattr' binary.")
5696
5697
5698 def random_birthday(year_field, month_field, day_field):
5699     start_date = datetime.date(1950, 1, 1)
5700     end_date = datetime.date(1995, 12, 31)
5701     offset = random.randint(0, (end_date - start_date).days)
5702     random_date = start_date + datetime.timedelta(offset)
5703     return {
5704         year_field: str(random_date.year),
5705         month_field: str(random_date.month),
5706         day_field: str(random_date.day),
5707     }