Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import socket
  30 import ssl
  31 import subprocess
  32 import sys
  33 import tempfile
  34 import time
  35 import traceback
  36 import xml.etree.ElementTree
  37 import zlib
  38
  39 from .compat import (
  40     compat_HTMLParseError,
  41     compat_HTMLParser,
  42     compat_basestring,
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_ctypes_WINFUNCTYPE,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_http_client,
  51     compat_integer_types,
  52     compat_kwargs,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_parse_unquote_plus,
  64     compat_urllib_request,
  65     compat_urlparse,
  66     compat_xpath,
  67 )
  68
  69 from .socks import (
  70     ProxyType,
  71     sockssocket,
  72 )
  73
  74
  75 def register_socks_protocols():
  76     # "Register" SOCKS protocols
  77     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  78     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  79     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  80         if scheme not in compat_urlparse.uses_netloc:
  81             compat_urlparse.uses_netloc.append(scheme)
  82
  83
  84 # This is not clearly defined otherwise
  85 compiled_regex_type = type(re.compile(''))
  86
  87
  88 def random_user_agent():
  89     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  90     _CHROME_VERSIONS = (
  91         '74.0.3729.129',
  92         '76.0.3780.3',
  93         '76.0.3780.2',
  94         '74.0.3729.128',
  95         '76.0.3780.1',
  96         '76.0.3780.0',
  97         '75.0.3770.15',
  98         '74.0.3729.127',
  99         '74.0.3729.126',
 100         '76.0.3779.1',
 101         '76.0.3779.0',
 102         '75.0.3770.14',
 103         '74.0.3729.125',
 104         '76.0.3778.1',
 105         '76.0.3778.0',
 106         '75.0.3770.13',
 107         '74.0.3729.124',
 108         '74.0.3729.123',
 109         '73.0.3683.121',
 110         '76.0.3777.1',
 111         '76.0.3777.0',
 112         '75.0.3770.12',
 113         '74.0.3729.122',
 114         '76.0.3776.4',
 115         '75.0.3770.11',
 116         '74.0.3729.121',
 117         '76.0.3776.3',
 118         '76.0.3776.2',
 119         '73.0.3683.120',
 120         '74.0.3729.120',
 121         '74.0.3729.119',
 122         '74.0.3729.118',
 123         '76.0.3776.1',
 124         '76.0.3776.0',
 125         '76.0.3775.5',
 126         '75.0.3770.10',
 127         '74.0.3729.117',
 128         '76.0.3775.4',
 129         '76.0.3775.3',
 130         '74.0.3729.116',
 131         '75.0.3770.9',
 132         '76.0.3775.2',
 133         '76.0.3775.1',
 134         '76.0.3775.0',
 135         '75.0.3770.8',
 136         '74.0.3729.115',
 137         '74.0.3729.114',
 138         '76.0.3774.1',
 139         '76.0.3774.0',
 140         '75.0.3770.7',
 141         '74.0.3729.113',
 142         '74.0.3729.112',
 143         '74.0.3729.111',
 144         '76.0.3773.1',
 145         '76.0.3773.0',
 146         '75.0.3770.6',
 147         '74.0.3729.110',
 148         '74.0.3729.109',
 149         '76.0.3772.1',
 150         '76.0.3772.0',
 151         '75.0.3770.5',
 152         '74.0.3729.108',
 153         '74.0.3729.107',
 154         '76.0.3771.1',
 155         '76.0.3771.0',
 156         '75.0.3770.4',
 157         '74.0.3729.106',
 158         '74.0.3729.105',
 159         '75.0.3770.3',
 160         '74.0.3729.104',
 161         '74.0.3729.103',
 162         '74.0.3729.102',
 163         '75.0.3770.2',
 164         '74.0.3729.101',
 165         '75.0.3770.1',
 166         '75.0.3770.0',
 167         '74.0.3729.100',
 168         '75.0.3769.5',
 169         '75.0.3769.4',
 170         '74.0.3729.99',
 171         '75.0.3769.3',
 172         '75.0.3769.2',
 173         '75.0.3768.6',
 174         '74.0.3729.98',
 175         '75.0.3769.1',
 176         '75.0.3769.0',
 177         '74.0.3729.97',
 178         '73.0.3683.119',
 179         '73.0.3683.118',
 180         '74.0.3729.96',
 181         '75.0.3768.5',
 182         '75.0.3768.4',
 183         '75.0.3768.3',
 184         '75.0.3768.2',
 185         '74.0.3729.95',
 186         '74.0.3729.94',
 187         '75.0.3768.1',
 188         '75.0.3768.0',
 189         '74.0.3729.93',
 190         '74.0.3729.92',
 191         '73.0.3683.117',
 192         '74.0.3729.91',
 193         '75.0.3766.3',
 194         '74.0.3729.90',
 195         '75.0.3767.2',
 196         '75.0.3767.1',
 197         '75.0.3767.0',
 198         '74.0.3729.89',
 199         '73.0.3683.116',
 200         '75.0.3766.2',
 201         '74.0.3729.88',
 202         '75.0.3766.1',
 203         '75.0.3766.0',
 204         '74.0.3729.87',
 205         '73.0.3683.115',
 206         '74.0.3729.86',
 207         '75.0.3765.1',
 208         '75.0.3765.0',
 209         '74.0.3729.85',
 210         '73.0.3683.114',
 211         '74.0.3729.84',
 212         '75.0.3764.1',
 213         '75.0.3764.0',
 214         '74.0.3729.83',
 215         '73.0.3683.113',
 216         '75.0.3763.2',
 217         '75.0.3761.4',
 218         '74.0.3729.82',
 219         '75.0.3763.1',
 220         '75.0.3763.0',
 221         '74.0.3729.81',
 222         '73.0.3683.112',
 223         '75.0.3762.1',
 224         '75.0.3762.0',
 225         '74.0.3729.80',
 226         '75.0.3761.3',
 227         '74.0.3729.79',
 228         '73.0.3683.111',
 229         '75.0.3761.2',
 230         '74.0.3729.78',
 231         '74.0.3729.77',
 232         '75.0.3761.1',
 233         '75.0.3761.0',
 234         '73.0.3683.110',
 235         '74.0.3729.76',
 236         '74.0.3729.75',
 237         '75.0.3760.0',
 238         '74.0.3729.74',
 239         '75.0.3759.8',
 240         '75.0.3759.7',
 241         '75.0.3759.6',
 242         '74.0.3729.73',
 243         '75.0.3759.5',
 244         '74.0.3729.72',
 245         '73.0.3683.109',
 246         '75.0.3759.4',
 247         '75.0.3759.3',
 248         '74.0.3729.71',
 249         '75.0.3759.2',
 250         '74.0.3729.70',
 251         '73.0.3683.108',
 252         '74.0.3729.69',
 253         '75.0.3759.1',
 254         '75.0.3759.0',
 255         '74.0.3729.68',
 256         '73.0.3683.107',
 257         '74.0.3729.67',
 258         '75.0.3758.1',
 259         '75.0.3758.0',
 260         '74.0.3729.66',
 261         '73.0.3683.106',
 262         '74.0.3729.65',
 263         '75.0.3757.1',
 264         '75.0.3757.0',
 265         '74.0.3729.64',
 266         '73.0.3683.105',
 267         '74.0.3729.63',
 268         '75.0.3756.1',
 269         '75.0.3756.0',
 270         '74.0.3729.62',
 271         '73.0.3683.104',
 272         '75.0.3755.3',
 273         '75.0.3755.2',
 274         '73.0.3683.103',
 275         '75.0.3755.1',
 276         '75.0.3755.0',
 277         '74.0.3729.61',
 278         '73.0.3683.102',
 279         '74.0.3729.60',
 280         '75.0.3754.2',
 281         '74.0.3729.59',
 282         '75.0.3753.4',
 283         '74.0.3729.58',
 284         '75.0.3754.1',
 285         '75.0.3754.0',
 286         '74.0.3729.57',
 287         '73.0.3683.101',
 288         '75.0.3753.3',
 289         '75.0.3752.2',
 290         '75.0.3753.2',
 291         '74.0.3729.56',
 292         '75.0.3753.1',
 293         '75.0.3753.0',
 294         '74.0.3729.55',
 295         '73.0.3683.100',
 296         '74.0.3729.54',
 297         '75.0.3752.1',
 298         '75.0.3752.0',
 299         '74.0.3729.53',
 300         '73.0.3683.99',
 301         '74.0.3729.52',
 302         '75.0.3751.1',
 303         '75.0.3751.0',
 304         '74.0.3729.51',
 305         '73.0.3683.98',
 306         '74.0.3729.50',
 307         '75.0.3750.0',
 308         '74.0.3729.49',
 309         '74.0.3729.48',
 310         '74.0.3729.47',
 311         '75.0.3749.3',
 312         '74.0.3729.46',
 313         '73.0.3683.97',
 314         '75.0.3749.2',
 315         '74.0.3729.45',
 316         '75.0.3749.1',
 317         '75.0.3749.0',
 318         '74.0.3729.44',
 319         '73.0.3683.96',
 320         '74.0.3729.43',
 321         '74.0.3729.42',
 322         '75.0.3748.1',
 323         '75.0.3748.0',
 324         '74.0.3729.41',
 325         '75.0.3747.1',
 326         '73.0.3683.95',
 327         '75.0.3746.4',
 328         '74.0.3729.40',
 329         '74.0.3729.39',
 330         '75.0.3747.0',
 331         '75.0.3746.3',
 332         '75.0.3746.2',
 333         '74.0.3729.38',
 334         '75.0.3746.1',
 335         '75.0.3746.0',
 336         '74.0.3729.37',
 337         '73.0.3683.94',
 338         '75.0.3745.5',
 339         '75.0.3745.4',
 340         '75.0.3745.3',
 341         '75.0.3745.2',
 342         '74.0.3729.36',
 343         '75.0.3745.1',
 344         '75.0.3745.0',
 345         '75.0.3744.2',
 346         '74.0.3729.35',
 347         '73.0.3683.93',
 348         '74.0.3729.34',
 349         '75.0.3744.1',
 350         '75.0.3744.0',
 351         '74.0.3729.33',
 352         '73.0.3683.92',
 353         '74.0.3729.32',
 354         '74.0.3729.31',
 355         '73.0.3683.91',
 356         '75.0.3741.2',
 357         '75.0.3740.5',
 358         '74.0.3729.30',
 359         '75.0.3741.1',
 360         '75.0.3741.0',
 361         '74.0.3729.29',
 362         '75.0.3740.4',
 363         '73.0.3683.90',
 364         '74.0.3729.28',
 365         '75.0.3740.3',
 366         '73.0.3683.89',
 367         '75.0.3740.2',
 368         '74.0.3729.27',
 369         '75.0.3740.1',
 370         '75.0.3740.0',
 371         '74.0.3729.26',
 372         '73.0.3683.88',
 373         '73.0.3683.87',
 374         '74.0.3729.25',
 375         '75.0.3739.1',
 376         '75.0.3739.0',
 377         '73.0.3683.86',
 378         '74.0.3729.24',
 379         '73.0.3683.85',
 380         '75.0.3738.4',
 381         '75.0.3738.3',
 382         '75.0.3738.2',
 383         '75.0.3738.1',
 384         '75.0.3738.0',
 385         '74.0.3729.23',
 386         '73.0.3683.84',
 387         '74.0.3729.22',
 388         '74.0.3729.21',
 389         '75.0.3737.1',
 390         '75.0.3737.0',
 391         '74.0.3729.20',
 392         '73.0.3683.83',
 393         '74.0.3729.19',
 394         '75.0.3736.1',
 395         '75.0.3736.0',
 396         '74.0.3729.18',
 397         '73.0.3683.82',
 398         '74.0.3729.17',
 399         '75.0.3735.1',
 400         '75.0.3735.0',
 401         '74.0.3729.16',
 402         '73.0.3683.81',
 403         '75.0.3734.1',
 404         '75.0.3734.0',
 405         '74.0.3729.15',
 406         '73.0.3683.80',
 407         '74.0.3729.14',
 408         '75.0.3733.1',
 409         '75.0.3733.0',
 410         '75.0.3732.1',
 411         '74.0.3729.13',
 412         '74.0.3729.12',
 413         '73.0.3683.79',
 414         '74.0.3729.11',
 415         '75.0.3732.0',
 416         '74.0.3729.10',
 417         '73.0.3683.78',
 418         '74.0.3729.9',
 419         '74.0.3729.8',
 420         '74.0.3729.7',
 421         '75.0.3731.3',
 422         '75.0.3731.2',
 423         '75.0.3731.0',
 424         '74.0.3729.6',
 425         '73.0.3683.77',
 426         '73.0.3683.76',
 427         '75.0.3730.5',
 428         '75.0.3730.4',
 429         '73.0.3683.75',
 430         '74.0.3729.5',
 431         '73.0.3683.74',
 432         '75.0.3730.3',
 433         '75.0.3730.2',
 434         '74.0.3729.4',
 435         '73.0.3683.73',
 436         '73.0.3683.72',
 437         '75.0.3730.1',
 438         '75.0.3730.0',
 439         '74.0.3729.3',
 440         '73.0.3683.71',
 441         '74.0.3729.2',
 442         '73.0.3683.70',
 443         '74.0.3729.1',
 444         '74.0.3729.0',
 445         '74.0.3726.4',
 446         '73.0.3683.69',
 447         '74.0.3726.3',
 448         '74.0.3728.0',
 449         '74.0.3726.2',
 450         '73.0.3683.68',
 451         '74.0.3726.1',
 452         '74.0.3726.0',
 453         '74.0.3725.4',
 454         '73.0.3683.67',
 455         '73.0.3683.66',
 456         '74.0.3725.3',
 457         '74.0.3725.2',
 458         '74.0.3725.1',
 459         '74.0.3724.8',
 460         '74.0.3725.0',
 461         '73.0.3683.65',
 462         '74.0.3724.7',
 463         '74.0.3724.6',
 464         '74.0.3724.5',
 465         '74.0.3724.4',
 466         '74.0.3724.3',
 467         '74.0.3724.2',
 468         '74.0.3724.1',
 469         '74.0.3724.0',
 470         '73.0.3683.64',
 471         '74.0.3723.1',
 472         '74.0.3723.0',
 473         '73.0.3683.63',
 474         '74.0.3722.1',
 475         '74.0.3722.0',
 476         '73.0.3683.62',
 477         '74.0.3718.9',
 478         '74.0.3702.3',
 479         '74.0.3721.3',
 480         '74.0.3721.2',
 481         '74.0.3721.1',
 482         '74.0.3721.0',
 483         '74.0.3720.6',
 484         '73.0.3683.61',
 485         '72.0.3626.122',
 486         '73.0.3683.60',
 487         '74.0.3720.5',
 488         '72.0.3626.121',
 489         '74.0.3718.8',
 490         '74.0.3720.4',
 491         '74.0.3720.3',
 492         '74.0.3718.7',
 493         '74.0.3720.2',
 494         '74.0.3720.1',
 495         '74.0.3720.0',
 496         '74.0.3718.6',
 497         '74.0.3719.5',
 498         '73.0.3683.59',
 499         '74.0.3718.5',
 500         '74.0.3718.4',
 501         '74.0.3719.4',
 502         '74.0.3719.3',
 503         '74.0.3719.2',
 504         '74.0.3719.1',
 505         '73.0.3683.58',
 506         '74.0.3719.0',
 507         '73.0.3683.57',
 508         '73.0.3683.56',
 509         '74.0.3718.3',
 510         '73.0.3683.55',
 511         '74.0.3718.2',
 512         '74.0.3718.1',
 513         '74.0.3718.0',
 514         '73.0.3683.54',
 515         '74.0.3717.2',
 516         '73.0.3683.53',
 517         '74.0.3717.1',
 518         '74.0.3717.0',
 519         '73.0.3683.52',
 520         '74.0.3716.1',
 521         '74.0.3716.0',
 522         '73.0.3683.51',
 523         '74.0.3715.1',
 524         '74.0.3715.0',
 525         '73.0.3683.50',
 526         '74.0.3711.2',
 527         '74.0.3714.2',
 528         '74.0.3713.3',
 529         '74.0.3714.1',
 530         '74.0.3714.0',
 531         '73.0.3683.49',
 532         '74.0.3713.1',
 533         '74.0.3713.0',
 534         '72.0.3626.120',
 535         '73.0.3683.48',
 536         '74.0.3712.2',
 537         '74.0.3712.1',
 538         '74.0.3712.0',
 539         '73.0.3683.47',
 540         '72.0.3626.119',
 541         '73.0.3683.46',
 542         '74.0.3710.2',
 543         '72.0.3626.118',
 544         '74.0.3711.1',
 545         '74.0.3711.0',
 546         '73.0.3683.45',
 547         '72.0.3626.117',
 548         '74.0.3710.1',
 549         '74.0.3710.0',
 550         '73.0.3683.44',
 551         '72.0.3626.116',
 552         '74.0.3709.1',
 553         '74.0.3709.0',
 554         '74.0.3704.9',
 555         '73.0.3683.43',
 556         '72.0.3626.115',
 557         '74.0.3704.8',
 558         '74.0.3704.7',
 559         '74.0.3708.0',
 560         '74.0.3706.7',
 561         '74.0.3704.6',
 562         '73.0.3683.42',
 563         '72.0.3626.114',
 564         '74.0.3706.6',
 565         '72.0.3626.113',
 566         '74.0.3704.5',
 567         '74.0.3706.5',
 568         '74.0.3706.4',
 569         '74.0.3706.3',
 570         '74.0.3706.2',
 571         '74.0.3706.1',
 572         '74.0.3706.0',
 573         '73.0.3683.41',
 574         '72.0.3626.112',
 575         '74.0.3705.1',
 576         '74.0.3705.0',
 577         '73.0.3683.40',
 578         '72.0.3626.111',
 579         '73.0.3683.39',
 580         '74.0.3704.4',
 581         '73.0.3683.38',
 582         '74.0.3704.3',
 583         '74.0.3704.2',
 584         '74.0.3704.1',
 585         '74.0.3704.0',
 586         '73.0.3683.37',
 587         '72.0.3626.110',
 588         '72.0.3626.109',
 589         '74.0.3703.3',
 590         '74.0.3703.2',
 591         '73.0.3683.36',
 592         '74.0.3703.1',
 593         '74.0.3703.0',
 594         '73.0.3683.35',
 595         '72.0.3626.108',
 596         '74.0.3702.2',
 597         '74.0.3699.3',
 598         '74.0.3702.1',
 599         '74.0.3702.0',
 600         '73.0.3683.34',
 601         '72.0.3626.107',
 602         '73.0.3683.33',
 603         '74.0.3701.1',
 604         '74.0.3701.0',
 605         '73.0.3683.32',
 606         '73.0.3683.31',
 607         '72.0.3626.105',
 608         '74.0.3700.1',
 609         '74.0.3700.0',
 610         '73.0.3683.29',
 611         '72.0.3626.103',
 612         '74.0.3699.2',
 613         '74.0.3699.1',
 614         '74.0.3699.0',
 615         '73.0.3683.28',
 616         '72.0.3626.102',
 617         '73.0.3683.27',
 618         '73.0.3683.26',
 619         '74.0.3698.0',
 620         '74.0.3696.2',
 621         '72.0.3626.101',
 622         '73.0.3683.25',
 623         '74.0.3696.1',
 624         '74.0.3696.0',
 625         '74.0.3694.8',
 626         '72.0.3626.100',
 627         '74.0.3694.7',
 628         '74.0.3694.6',
 629         '74.0.3694.5',
 630         '74.0.3694.4',
 631         '72.0.3626.99',
 632         '72.0.3626.98',
 633         '74.0.3694.3',
 634         '73.0.3683.24',
 635         '72.0.3626.97',
 636         '72.0.3626.96',
 637         '72.0.3626.95',
 638         '73.0.3683.23',
 639         '72.0.3626.94',
 640         '73.0.3683.22',
 641         '73.0.3683.21',
 642         '72.0.3626.93',
 643         '74.0.3694.2',
 644         '72.0.3626.92',
 645         '74.0.3694.1',
 646         '74.0.3694.0',
 647         '74.0.3693.6',
 648         '73.0.3683.20',
 649         '72.0.3626.91',
 650         '74.0.3693.5',
 651         '74.0.3693.4',
 652         '74.0.3693.3',
 653         '74.0.3693.2',
 654         '73.0.3683.19',
 655         '74.0.3693.1',
 656         '74.0.3693.0',
 657         '73.0.3683.18',
 658         '72.0.3626.90',
 659         '74.0.3692.1',
 660         '74.0.3692.0',
 661         '73.0.3683.17',
 662         '72.0.3626.89',
 663         '74.0.3687.3',
 664         '74.0.3691.1',
 665         '74.0.3691.0',
 666         '73.0.3683.16',
 667         '72.0.3626.88',
 668         '72.0.3626.87',
 669         '73.0.3683.15',
 670         '74.0.3690.1',
 671         '74.0.3690.0',
 672         '73.0.3683.14',
 673         '72.0.3626.86',
 674         '73.0.3683.13',
 675         '73.0.3683.12',
 676         '74.0.3689.1',
 677         '74.0.3689.0',
 678         '73.0.3683.11',
 679         '72.0.3626.85',
 680         '73.0.3683.10',
 681         '72.0.3626.84',
 682         '73.0.3683.9',
 683         '74.0.3688.1',
 684         '74.0.3688.0',
 685         '73.0.3683.8',
 686         '72.0.3626.83',
 687         '74.0.3687.2',
 688         '74.0.3687.1',
 689         '74.0.3687.0',
 690         '73.0.3683.7',
 691         '72.0.3626.82',
 692         '74.0.3686.4',
 693         '72.0.3626.81',
 694         '74.0.3686.3',
 695         '74.0.3686.2',
 696         '74.0.3686.1',
 697         '74.0.3686.0',
 698         '73.0.3683.6',
 699         '72.0.3626.80',
 700         '74.0.3685.1',
 701         '74.0.3685.0',
 702         '73.0.3683.5',
 703         '72.0.3626.79',
 704         '74.0.3684.1',
 705         '74.0.3684.0',
 706         '73.0.3683.4',
 707         '72.0.3626.78',
 708         '72.0.3626.77',
 709         '73.0.3683.3',
 710         '73.0.3683.2',
 711         '72.0.3626.76',
 712         '73.0.3683.1',
 713         '73.0.3683.0',
 714         '72.0.3626.75',
 715         '71.0.3578.141',
 716         '73.0.3682.1',
 717         '73.0.3682.0',
 718         '72.0.3626.74',
 719         '71.0.3578.140',
 720         '73.0.3681.4',
 721         '73.0.3681.3',
 722         '73.0.3681.2',
 723         '73.0.3681.1',
 724         '73.0.3681.0',
 725         '72.0.3626.73',
 726         '71.0.3578.139',
 727         '72.0.3626.72',
 728         '72.0.3626.71',
 729         '73.0.3680.1',
 730         '73.0.3680.0',
 731         '72.0.3626.70',
 732         '71.0.3578.138',
 733         '73.0.3678.2',
 734         '73.0.3679.1',
 735         '73.0.3679.0',
 736         '72.0.3626.69',
 737         '71.0.3578.137',
 738         '73.0.3678.1',
 739         '73.0.3678.0',
 740         '71.0.3578.136',
 741         '73.0.3677.1',
 742         '73.0.3677.0',
 743         '72.0.3626.68',
 744         '72.0.3626.67',
 745         '71.0.3578.135',
 746         '73.0.3676.1',
 747         '73.0.3676.0',
 748         '73.0.3674.2',
 749         '72.0.3626.66',
 750         '71.0.3578.134',
 751         '73.0.3674.1',
 752         '73.0.3674.0',
 753         '72.0.3626.65',
 754         '71.0.3578.133',
 755         '73.0.3673.2',
 756         '73.0.3673.1',
 757         '73.0.3673.0',
 758         '72.0.3626.64',
 759         '71.0.3578.132',
 760         '72.0.3626.63',
 761         '72.0.3626.62',
 762         '72.0.3626.61',
 763         '72.0.3626.60',
 764         '73.0.3672.1',
 765         '73.0.3672.0',
 766         '72.0.3626.59',
 767         '71.0.3578.131',
 768         '73.0.3671.3',
 769         '73.0.3671.2',
 770         '73.0.3671.1',
 771         '73.0.3671.0',
 772         '72.0.3626.58',
 773         '71.0.3578.130',
 774         '73.0.3670.1',
 775         '73.0.3670.0',
 776         '72.0.3626.57',
 777         '71.0.3578.129',
 778         '73.0.3669.1',
 779         '73.0.3669.0',
 780         '72.0.3626.56',
 781         '71.0.3578.128',
 782         '73.0.3668.2',
 783         '73.0.3668.1',
 784         '73.0.3668.0',
 785         '72.0.3626.55',
 786         '71.0.3578.127',
 787         '73.0.3667.2',
 788         '73.0.3667.1',
 789         '73.0.3667.0',
 790         '72.0.3626.54',
 791         '71.0.3578.126',
 792         '73.0.3666.1',
 793         '73.0.3666.0',
 794         '72.0.3626.53',
 795         '71.0.3578.125',
 796         '73.0.3665.4',
 797         '73.0.3665.3',
 798         '72.0.3626.52',
 799         '73.0.3665.2',
 800         '73.0.3664.4',
 801         '73.0.3665.1',
 802         '73.0.3665.0',
 803         '72.0.3626.51',
 804         '71.0.3578.124',
 805         '72.0.3626.50',
 806         '73.0.3664.3',
 807         '73.0.3664.2',
 808         '73.0.3664.1',
 809         '73.0.3664.0',
 810         '73.0.3663.2',
 811         '72.0.3626.49',
 812         '71.0.3578.123',
 813         '73.0.3663.1',
 814         '73.0.3663.0',
 815         '72.0.3626.48',
 816         '71.0.3578.122',
 817         '73.0.3662.1',
 818         '73.0.3662.0',
 819         '72.0.3626.47',
 820         '71.0.3578.121',
 821         '73.0.3661.1',
 822         '72.0.3626.46',
 823         '73.0.3661.0',
 824         '72.0.3626.45',
 825         '71.0.3578.120',
 826         '73.0.3660.2',
 827         '73.0.3660.1',
 828         '73.0.3660.0',
 829         '72.0.3626.44',
 830         '71.0.3578.119',
 831         '73.0.3659.1',
 832         '73.0.3659.0',
 833         '72.0.3626.43',
 834         '71.0.3578.118',
 835         '73.0.3658.1',
 836         '73.0.3658.0',
 837         '72.0.3626.42',
 838         '71.0.3578.117',
 839         '73.0.3657.1',
 840         '73.0.3657.0',
 841         '72.0.3626.41',
 842         '71.0.3578.116',
 843         '73.0.3656.1',
 844         '73.0.3656.0',
 845         '72.0.3626.40',
 846         '71.0.3578.115',
 847         '73.0.3655.1',
 848         '73.0.3655.0',
 849         '72.0.3626.39',
 850         '71.0.3578.114',
 851         '73.0.3654.1',
 852         '73.0.3654.0',
 853         '72.0.3626.38',
 854         '71.0.3578.113',
 855         '73.0.3653.1',
 856         '73.0.3653.0',
 857         '72.0.3626.37',
 858         '71.0.3578.112',
 859         '73.0.3652.1',
 860         '73.0.3652.0',
 861         '72.0.3626.36',
 862         '71.0.3578.111',
 863         '73.0.3651.1',
 864         '73.0.3651.0',
 865         '72.0.3626.35',
 866         '71.0.3578.110',
 867         '73.0.3650.1',
 868         '73.0.3650.0',
 869         '72.0.3626.34',
 870         '71.0.3578.109',
 871         '73.0.3649.1',
 872         '73.0.3649.0',
 873         '72.0.3626.33',
 874         '71.0.3578.108',
 875         '73.0.3648.2',
 876         '73.0.3648.1',
 877         '73.0.3648.0',
 878         '72.0.3626.32',
 879         '71.0.3578.107',
 880         '73.0.3647.2',
 881         '73.0.3647.1',
 882         '73.0.3647.0',
 883         '72.0.3626.31',
 884         '71.0.3578.106',
 885         '73.0.3635.3',
 886         '73.0.3646.2',
 887         '73.0.3646.1',
 888         '73.0.3646.0',
 889         '72.0.3626.30',
 890         '71.0.3578.105',
 891         '72.0.3626.29',
 892         '73.0.3645.2',
 893         '73.0.3645.1',
 894         '73.0.3645.0',
 895         '72.0.3626.28',
 896         '71.0.3578.104',
 897         '72.0.3626.27',
 898         '72.0.3626.26',
 899         '72.0.3626.25',
 900         '72.0.3626.24',
 901         '73.0.3644.0',
 902         '73.0.3643.2',
 903         '72.0.3626.23',
 904         '71.0.3578.103',
 905         '73.0.3643.1',
 906         '73.0.3643.0',
 907         '72.0.3626.22',
 908         '71.0.3578.102',
 909         '73.0.3642.1',
 910         '73.0.3642.0',
 911         '72.0.3626.21',
 912         '71.0.3578.101',
 913         '73.0.3641.1',
 914         '73.0.3641.0',
 915         '72.0.3626.20',
 916         '71.0.3578.100',
 917         '72.0.3626.19',
 918         '73.0.3640.1',
 919         '73.0.3640.0',
 920         '72.0.3626.18',
 921         '73.0.3639.1',
 922         '71.0.3578.99',
 923         '73.0.3639.0',
 924         '72.0.3626.17',
 925         '73.0.3638.2',
 926         '72.0.3626.16',
 927         '73.0.3638.1',
 928         '73.0.3638.0',
 929         '72.0.3626.15',
 930         '71.0.3578.98',
 931         '73.0.3635.2',
 932         '71.0.3578.97',
 933         '73.0.3637.1',
 934         '73.0.3637.0',
 935         '72.0.3626.14',
 936         '71.0.3578.96',
 937         '71.0.3578.95',
 938         '72.0.3626.13',
 939         '71.0.3578.94',
 940         '73.0.3636.2',
 941         '71.0.3578.93',
 942         '73.0.3636.1',
 943         '73.0.3636.0',
 944         '72.0.3626.12',
 945         '71.0.3578.92',
 946         '73.0.3635.1',
 947         '73.0.3635.0',
 948         '72.0.3626.11',
 949         '71.0.3578.91',
 950         '73.0.3634.2',
 951         '73.0.3634.1',
 952         '73.0.3634.0',
 953         '72.0.3626.10',
 954         '71.0.3578.90',
 955         '71.0.3578.89',
 956         '73.0.3633.2',
 957         '73.0.3633.1',
 958         '73.0.3633.0',
 959         '72.0.3610.4',
 960         '72.0.3626.9',
 961         '71.0.3578.88',
 962         '73.0.3632.5',
 963         '73.0.3632.4',
 964         '73.0.3632.3',
 965         '73.0.3632.2',
 966         '73.0.3632.1',
 967         '73.0.3632.0',
 968         '72.0.3626.8',
 969         '71.0.3578.87',
 970         '73.0.3631.2',
 971         '73.0.3631.1',
 972         '73.0.3631.0',
 973         '72.0.3626.7',
 974         '71.0.3578.86',
 975         '72.0.3626.6',
 976         '73.0.3630.1',
 977         '73.0.3630.0',
 978         '72.0.3626.5',
 979         '71.0.3578.85',
 980         '72.0.3626.4',
 981         '73.0.3628.3',
 982         '73.0.3628.2',
 983         '73.0.3629.1',
 984         '73.0.3629.0',
 985         '72.0.3626.3',
 986         '71.0.3578.84',
 987         '73.0.3628.1',
 988         '73.0.3628.0',
 989         '71.0.3578.83',
 990         '73.0.3627.1',
 991         '73.0.3627.0',
 992         '72.0.3626.2',
 993         '71.0.3578.82',
 994         '71.0.3578.81',
 995         '71.0.3578.80',
 996         '72.0.3626.1',
 997         '72.0.3626.0',
 998         '71.0.3578.79',
 999         '70.0.3538.124',
1000         '71.0.3578.78',
1001         '72.0.3623.4',
1002         '72.0.3625.2',
1003         '72.0.3625.1',
1004         '72.0.3625.0',
1005         '71.0.3578.77',
1006         '70.0.3538.123',
1007         '72.0.3624.4',
1008         '72.0.3624.3',
1009         '72.0.3624.2',
1010         '71.0.3578.76',
1011         '72.0.3624.1',
1012         '72.0.3624.0',
1013         '72.0.3623.3',
1014         '71.0.3578.75',
1015         '70.0.3538.122',
1016         '71.0.3578.74',
1017         '72.0.3623.2',
1018         '72.0.3610.3',
1019         '72.0.3623.1',
1020         '72.0.3623.0',
1021         '72.0.3622.3',
1022         '72.0.3622.2',
1023         '71.0.3578.73',
1024         '70.0.3538.121',
1025         '72.0.3622.1',
1026         '72.0.3622.0',
1027         '71.0.3578.72',
1028         '70.0.3538.120',
1029         '72.0.3621.1',
1030         '72.0.3621.0',
1031         '71.0.3578.71',
1032         '70.0.3538.119',
1033         '72.0.3620.1',
1034         '72.0.3620.0',
1035         '71.0.3578.70',
1036         '70.0.3538.118',
1037         '71.0.3578.69',
1038         '72.0.3619.1',
1039         '72.0.3619.0',
1040         '71.0.3578.68',
1041         '70.0.3538.117',
1042         '71.0.3578.67',
1043         '72.0.3618.1',
1044         '72.0.3618.0',
1045         '71.0.3578.66',
1046         '70.0.3538.116',
1047         '72.0.3617.1',
1048         '72.0.3617.0',
1049         '71.0.3578.65',
1050         '70.0.3538.115',
1051         '72.0.3602.3',
1052         '71.0.3578.64',
1053         '72.0.3616.1',
1054         '72.0.3616.0',
1055         '71.0.3578.63',
1056         '70.0.3538.114',
1057         '71.0.3578.62',
1058         '72.0.3615.1',
1059         '72.0.3615.0',
1060         '71.0.3578.61',
1061         '70.0.3538.113',
1062         '72.0.3614.1',
1063         '72.0.3614.0',
1064         '71.0.3578.60',
1065         '70.0.3538.112',
1066         '72.0.3613.1',
1067         '72.0.3613.0',
1068         '71.0.3578.59',
1069         '70.0.3538.111',
1070         '72.0.3612.2',
1071         '72.0.3612.1',
1072         '72.0.3612.0',
1073         '70.0.3538.110',
1074         '71.0.3578.58',
1075         '70.0.3538.109',
1076         '72.0.3611.2',
1077         '72.0.3611.1',
1078         '72.0.3611.0',
1079         '71.0.3578.57',
1080         '70.0.3538.108',
1081         '72.0.3610.2',
1082         '71.0.3578.56',
1083         '71.0.3578.55',
1084         '72.0.3610.1',
1085         '72.0.3610.0',
1086         '71.0.3578.54',
1087         '70.0.3538.107',
1088         '71.0.3578.53',
1089         '72.0.3609.3',
1090         '71.0.3578.52',
1091         '72.0.3609.2',
1092         '71.0.3578.51',
1093         '72.0.3608.5',
1094         '72.0.3609.1',
1095         '72.0.3609.0',
1096         '71.0.3578.50',
1097         '70.0.3538.106',
1098         '72.0.3608.4',
1099         '72.0.3608.3',
1100         '72.0.3608.2',
1101         '71.0.3578.49',
1102         '72.0.3608.1',
1103         '72.0.3608.0',
1104         '70.0.3538.105',
1105         '71.0.3578.48',
1106         '72.0.3607.1',
1107         '72.0.3607.0',
1108         '71.0.3578.47',
1109         '70.0.3538.104',
1110         '72.0.3606.2',
1111         '72.0.3606.1',
1112         '72.0.3606.0',
1113         '71.0.3578.46',
1114         '70.0.3538.103',
1115         '70.0.3538.102',
1116         '72.0.3605.3',
1117         '72.0.3605.2',
1118         '72.0.3605.1',
1119         '72.0.3605.0',
1120         '71.0.3578.45',
1121         '70.0.3538.101',
1122         '71.0.3578.44',
1123         '71.0.3578.43',
1124         '70.0.3538.100',
1125         '70.0.3538.99',
1126         '71.0.3578.42',
1127         '72.0.3604.1',
1128         '72.0.3604.0',
1129         '71.0.3578.41',
1130         '70.0.3538.98',
1131         '71.0.3578.40',
1132         '72.0.3603.2',
1133         '72.0.3603.1',
1134         '72.0.3603.0',
1135         '71.0.3578.39',
1136         '70.0.3538.97',
1137         '72.0.3602.2',
1138         '71.0.3578.38',
1139         '71.0.3578.37',
1140         '72.0.3602.1',
1141         '72.0.3602.0',
1142         '71.0.3578.36',
1143         '70.0.3538.96',
1144         '72.0.3601.1',
1145         '72.0.3601.0',
1146         '71.0.3578.35',
1147         '70.0.3538.95',
1148         '72.0.3600.1',
1149         '72.0.3600.0',
1150         '71.0.3578.34',
1151         '70.0.3538.94',
1152         '72.0.3599.3',
1153         '72.0.3599.2',
1154         '72.0.3599.1',
1155         '72.0.3599.0',
1156         '71.0.3578.33',
1157         '70.0.3538.93',
1158         '72.0.3598.1',
1159         '72.0.3598.0',
1160         '71.0.3578.32',
1161         '70.0.3538.87',
1162         '72.0.3597.1',
1163         '72.0.3597.0',
1164         '72.0.3596.2',
1165         '71.0.3578.31',
1166         '70.0.3538.86',
1167         '71.0.3578.30',
1168         '71.0.3578.29',
1169         '72.0.3596.1',
1170         '72.0.3596.0',
1171         '71.0.3578.28',
1172         '70.0.3538.85',
1173         '72.0.3595.2',
1174         '72.0.3591.3',
1175         '72.0.3595.1',
1176         '72.0.3595.0',
1177         '71.0.3578.27',
1178         '70.0.3538.84',
1179         '72.0.3594.1',
1180         '72.0.3594.0',
1181         '71.0.3578.26',
1182         '70.0.3538.83',
1183         '72.0.3593.2',
1184         '72.0.3593.1',
1185         '72.0.3593.0',
1186         '71.0.3578.25',
1187         '70.0.3538.82',
1188         '72.0.3589.3',
1189         '72.0.3592.2',
1190         '72.0.3592.1',
1191         '72.0.3592.0',
1192         '71.0.3578.24',
1193         '72.0.3589.2',
1194         '70.0.3538.81',
1195         '70.0.3538.80',
1196         '72.0.3591.2',
1197         '72.0.3591.1',
1198         '72.0.3591.0',
1199         '71.0.3578.23',
1200         '70.0.3538.79',
1201         '71.0.3578.22',
1202         '72.0.3590.1',
1203         '72.0.3590.0',
1204         '71.0.3578.21',
1205         '70.0.3538.78',
1206         '70.0.3538.77',
1207         '72.0.3589.1',
1208         '72.0.3589.0',
1209         '71.0.3578.20',
1210         '70.0.3538.76',
1211         '71.0.3578.19',
1212         '70.0.3538.75',
1213         '72.0.3588.1',
1214         '72.0.3588.0',
1215         '71.0.3578.18',
1216         '70.0.3538.74',
1217         '72.0.3586.2',
1218         '72.0.3587.0',
1219         '71.0.3578.17',
1220         '70.0.3538.73',
1221         '72.0.3586.1',
1222         '72.0.3586.0',
1223         '71.0.3578.16',
1224         '70.0.3538.72',
1225         '72.0.3585.1',
1226         '72.0.3585.0',
1227         '71.0.3578.15',
1228         '70.0.3538.71',
1229         '71.0.3578.14',
1230         '72.0.3584.1',
1231         '72.0.3584.0',
1232         '71.0.3578.13',
1233         '70.0.3538.70',
1234         '72.0.3583.2',
1235         '71.0.3578.12',
1236         '72.0.3583.1',
1237         '72.0.3583.0',
1238         '71.0.3578.11',
1239         '70.0.3538.69',
1240         '71.0.3578.10',
1241         '72.0.3582.0',
1242         '72.0.3581.4',
1243         '71.0.3578.9',
1244         '70.0.3538.67',
1245         '72.0.3581.3',
1246         '72.0.3581.2',
1247         '72.0.3581.1',
1248         '72.0.3581.0',
1249         '71.0.3578.8',
1250         '70.0.3538.66',
1251         '72.0.3580.1',
1252         '72.0.3580.0',
1253         '71.0.3578.7',
1254         '70.0.3538.65',
1255         '71.0.3578.6',
1256         '72.0.3579.1',
1257         '72.0.3579.0',
1258         '71.0.3578.5',
1259         '70.0.3538.64',
1260         '71.0.3578.4',
1261         '71.0.3578.3',
1262         '71.0.3578.2',
1263         '71.0.3578.1',
1264         '71.0.3578.0',
1265         '70.0.3538.63',
1266         '69.0.3497.128',
1267         '70.0.3538.62',
1268         '70.0.3538.61',
1269         '70.0.3538.60',
1270         '70.0.3538.59',
1271         '71.0.3577.1',
1272         '71.0.3577.0',
1273         '70.0.3538.58',
1274         '69.0.3497.127',
1275         '71.0.3576.2',
1276         '71.0.3576.1',
1277         '71.0.3576.0',
1278         '70.0.3538.57',
1279         '70.0.3538.56',
1280         '71.0.3575.2',
1281         '70.0.3538.55',
1282         '69.0.3497.126',
1283         '70.0.3538.54',
1284         '71.0.3575.1',
1285         '71.0.3575.0',
1286         '71.0.3574.1',
1287         '71.0.3574.0',
1288         '70.0.3538.53',
1289         '69.0.3497.125',
1290         '70.0.3538.52',
1291         '71.0.3573.1',
1292         '71.0.3573.0',
1293         '70.0.3538.51',
1294         '69.0.3497.124',
1295         '71.0.3572.1',
1296         '71.0.3572.0',
1297         '70.0.3538.50',
1298         '69.0.3497.123',
1299         '71.0.3571.2',
1300         '70.0.3538.49',
1301         '69.0.3497.122',
1302         '71.0.3571.1',
1303         '71.0.3571.0',
1304         '70.0.3538.48',
1305         '69.0.3497.121',
1306         '71.0.3570.1',
1307         '71.0.3570.0',
1308         '70.0.3538.47',
1309         '69.0.3497.120',
1310         '71.0.3568.2',
1311         '71.0.3569.1',
1312         '71.0.3569.0',
1313         '70.0.3538.46',
1314         '69.0.3497.119',
1315         '70.0.3538.45',
1316         '71.0.3568.1',
1317         '71.0.3568.0',
1318         '70.0.3538.44',
1319         '69.0.3497.118',
1320         '70.0.3538.43',
1321         '70.0.3538.42',
1322         '71.0.3567.1',
1323         '71.0.3567.0',
1324         '70.0.3538.41',
1325         '69.0.3497.117',
1326         '71.0.3566.1',
1327         '71.0.3566.0',
1328         '70.0.3538.40',
1329         '69.0.3497.116',
1330         '71.0.3565.1',
1331         '71.0.3565.0',
1332         '70.0.3538.39',
1333         '69.0.3497.115',
1334         '71.0.3564.1',
1335         '71.0.3564.0',
1336         '70.0.3538.38',
1337         '69.0.3497.114',
1338         '71.0.3563.0',
1339         '71.0.3562.2',
1340         '70.0.3538.37',
1341         '69.0.3497.113',
1342         '70.0.3538.36',
1343         '70.0.3538.35',
1344         '71.0.3562.1',
1345         '71.0.3562.0',
1346         '70.0.3538.34',
1347         '69.0.3497.112',
1348         '70.0.3538.33',
1349         '71.0.3561.1',
1350         '71.0.3561.0',
1351         '70.0.3538.32',
1352         '69.0.3497.111',
1353         '71.0.3559.6',
1354         '71.0.3560.1',
1355         '71.0.3560.0',
1356         '71.0.3559.5',
1357         '71.0.3559.4',
1358         '70.0.3538.31',
1359         '69.0.3497.110',
1360         '71.0.3559.3',
1361         '70.0.3538.30',
1362         '69.0.3497.109',
1363         '71.0.3559.2',
1364         '71.0.3559.1',
1365         '71.0.3559.0',
1366         '70.0.3538.29',
1367         '69.0.3497.108',
1368         '71.0.3558.2',
1369         '71.0.3558.1',
1370         '71.0.3558.0',
1371         '70.0.3538.28',
1372         '69.0.3497.107',
1373         '71.0.3557.2',
1374         '71.0.3557.1',
1375         '71.0.3557.0',
1376         '70.0.3538.27',
1377         '69.0.3497.106',
1378         '71.0.3554.4',
1379         '70.0.3538.26',
1380         '71.0.3556.1',
1381         '71.0.3556.0',
1382         '70.0.3538.25',
1383         '71.0.3554.3',
1384         '69.0.3497.105',
1385         '71.0.3554.2',
1386         '70.0.3538.24',
1387         '69.0.3497.104',
1388         '71.0.3555.2',
1389         '70.0.3538.23',
1390         '71.0.3555.1',
1391         '71.0.3555.0',
1392         '70.0.3538.22',
1393         '69.0.3497.103',
1394         '71.0.3554.1',
1395         '71.0.3554.0',
1396         '70.0.3538.21',
1397         '69.0.3497.102',
1398         '71.0.3553.3',
1399         '70.0.3538.20',
1400         '69.0.3497.101',
1401         '71.0.3553.2',
1402         '69.0.3497.100',
1403         '71.0.3553.1',
1404         '71.0.3553.0',
1405         '70.0.3538.19',
1406         '69.0.3497.99',
1407         '69.0.3497.98',
1408         '69.0.3497.97',
1409         '71.0.3552.6',
1410         '71.0.3552.5',
1411         '71.0.3552.4',
1412         '71.0.3552.3',
1413         '71.0.3552.2',
1414         '71.0.3552.1',
1415         '71.0.3552.0',
1416         '70.0.3538.18',
1417         '69.0.3497.96',
1418         '71.0.3551.3',
1419         '71.0.3551.2',
1420         '71.0.3551.1',
1421         '71.0.3551.0',
1422         '70.0.3538.17',
1423         '69.0.3497.95',
1424         '71.0.3550.3',
1425         '71.0.3550.2',
1426         '71.0.3550.1',
1427         '71.0.3550.0',
1428         '70.0.3538.16',
1429         '69.0.3497.94',
1430         '71.0.3549.1',
1431         '71.0.3549.0',
1432         '70.0.3538.15',
1433         '69.0.3497.93',
1434         '69.0.3497.92',
1435         '71.0.3548.1',
1436         '71.0.3548.0',
1437         '70.0.3538.14',
1438         '69.0.3497.91',
1439         '71.0.3547.1',
1440         '71.0.3547.0',
1441         '70.0.3538.13',
1442         '69.0.3497.90',
1443         '71.0.3546.2',
1444         '69.0.3497.89',
1445         '71.0.3546.1',
1446         '71.0.3546.0',
1447         '70.0.3538.12',
1448         '69.0.3497.88',
1449         '71.0.3545.4',
1450         '71.0.3545.3',
1451         '71.0.3545.2',
1452         '71.0.3545.1',
1453         '71.0.3545.0',
1454         '70.0.3538.11',
1455         '69.0.3497.87',
1456         '71.0.3544.5',
1457         '71.0.3544.4',
1458         '71.0.3544.3',
1459         '71.0.3544.2',
1460         '71.0.3544.1',
1461         '71.0.3544.0',
1462         '69.0.3497.86',
1463         '70.0.3538.10',
1464         '69.0.3497.85',
1465         '70.0.3538.9',
1466         '69.0.3497.84',
1467         '71.0.3543.4',
1468         '70.0.3538.8',
1469         '71.0.3543.3',
1470         '71.0.3543.2',
1471         '71.0.3543.1',
1472         '71.0.3543.0',
1473         '70.0.3538.7',
1474         '69.0.3497.83',
1475         '71.0.3542.2',
1476         '71.0.3542.1',
1477         '71.0.3542.0',
1478         '70.0.3538.6',
1479         '69.0.3497.82',
1480         '69.0.3497.81',
1481         '71.0.3541.1',
1482         '71.0.3541.0',
1483         '70.0.3538.5',
1484         '69.0.3497.80',
1485         '71.0.3540.1',
1486         '71.0.3540.0',
1487         '70.0.3538.4',
1488         '69.0.3497.79',
1489         '70.0.3538.3',
1490         '71.0.3539.1',
1491         '71.0.3539.0',
1492         '69.0.3497.78',
1493         '68.0.3440.134',
1494         '69.0.3497.77',
1495         '70.0.3538.2',
1496         '70.0.3538.1',
1497         '70.0.3538.0',
1498         '69.0.3497.76',
1499         '68.0.3440.133',
1500         '69.0.3497.75',
1501         '70.0.3537.2',
1502         '70.0.3537.1',
1503         '70.0.3537.0',
1504         '69.0.3497.74',
1505         '68.0.3440.132',
1506         '70.0.3536.0',
1507         '70.0.3535.5',
1508         '70.0.3535.4',
1509         '70.0.3535.3',
1510         '69.0.3497.73',
1511         '68.0.3440.131',
1512         '70.0.3532.8',
1513         '70.0.3532.7',
1514         '69.0.3497.72',
1515         '69.0.3497.71',
1516         '70.0.3535.2',
1517         '70.0.3535.1',
1518         '70.0.3535.0',
1519         '69.0.3497.70',
1520         '68.0.3440.130',
1521         '69.0.3497.69',
1522         '68.0.3440.129',
1523         '70.0.3534.4',
1524         '70.0.3534.3',
1525         '70.0.3534.2',
1526         '70.0.3534.1',
1527         '70.0.3534.0',
1528         '69.0.3497.68',
1529         '68.0.3440.128',
1530         '70.0.3533.2',
1531         '70.0.3533.1',
1532         '70.0.3533.0',
1533         '69.0.3497.67',
1534         '68.0.3440.127',
1535         '70.0.3532.6',
1536         '70.0.3532.5',
1537         '70.0.3532.4',
1538         '69.0.3497.66',
1539         '68.0.3440.126',
1540         '70.0.3532.3',
1541         '70.0.3532.2',
1542         '70.0.3532.1',
1543         '69.0.3497.60',
1544         '69.0.3497.65',
1545         '69.0.3497.64',
1546         '70.0.3532.0',
1547         '70.0.3531.0',
1548         '70.0.3530.4',
1549         '70.0.3530.3',
1550         '70.0.3530.2',
1551         '69.0.3497.58',
1552         '68.0.3440.125',
1553         '69.0.3497.57',
1554         '69.0.3497.56',
1555         '69.0.3497.55',
1556         '69.0.3497.54',
1557         '70.0.3530.1',
1558         '70.0.3530.0',
1559         '69.0.3497.53',
1560         '68.0.3440.124',
1561         '69.0.3497.52',
1562         '70.0.3529.3',
1563         '70.0.3529.2',
1564         '70.0.3529.1',
1565         '70.0.3529.0',
1566         '69.0.3497.51',
1567         '70.0.3528.4',
1568         '68.0.3440.123',
1569         '70.0.3528.3',
1570         '70.0.3528.2',
1571         '70.0.3528.1',
1572         '70.0.3528.0',
1573         '69.0.3497.50',
1574         '68.0.3440.122',
1575         '70.0.3527.1',
1576         '70.0.3527.0',
1577         '69.0.3497.49',
1578         '68.0.3440.121',
1579         '70.0.3526.1',
1580         '70.0.3526.0',
1581         '68.0.3440.120',
1582         '69.0.3497.48',
1583         '69.0.3497.47',
1584         '68.0.3440.119',
1585         '68.0.3440.118',
1586         '70.0.3525.5',
1587         '70.0.3525.4',
1588         '70.0.3525.3',
1589         '68.0.3440.117',
1590         '69.0.3497.46',
1591         '70.0.3525.2',
1592         '70.0.3525.1',
1593         '70.0.3525.0',
1594         '69.0.3497.45',
1595         '68.0.3440.116',
1596         '70.0.3524.4',
1597         '70.0.3524.3',
1598         '69.0.3497.44',
1599         '70.0.3524.2',
1600         '70.0.3524.1',
1601         '70.0.3524.0',
1602         '70.0.3523.2',
1603         '69.0.3497.43',
1604         '68.0.3440.115',
1605         '70.0.3505.9',
1606         '69.0.3497.42',
1607         '70.0.3505.8',
1608         '70.0.3523.1',
1609         '70.0.3523.0',
1610         '69.0.3497.41',
1611         '68.0.3440.114',
1612         '70.0.3505.7',
1613         '69.0.3497.40',
1614         '70.0.3522.1',
1615         '70.0.3522.0',
1616         '70.0.3521.2',
1617         '69.0.3497.39',
1618         '68.0.3440.113',
1619         '70.0.3505.6',
1620         '70.0.3521.1',
1621         '70.0.3521.0',
1622         '69.0.3497.38',
1623         '68.0.3440.112',
1624         '70.0.3520.1',
1625         '70.0.3520.0',
1626         '69.0.3497.37',
1627         '68.0.3440.111',
1628         '70.0.3519.3',
1629         '70.0.3519.2',
1630         '70.0.3519.1',
1631         '70.0.3519.0',
1632         '69.0.3497.36',
1633         '68.0.3440.110',
1634         '70.0.3518.1',
1635         '70.0.3518.0',
1636         '69.0.3497.35',
1637         '69.0.3497.34',
1638         '68.0.3440.109',
1639         '70.0.3517.1',
1640         '70.0.3517.0',
1641         '69.0.3497.33',
1642         '68.0.3440.108',
1643         '69.0.3497.32',
1644         '70.0.3516.3',
1645         '70.0.3516.2',
1646         '70.0.3516.1',
1647         '70.0.3516.0',
1648         '69.0.3497.31',
1649         '68.0.3440.107',
1650         '70.0.3515.4',
1651         '68.0.3440.106',
1652         '70.0.3515.3',
1653         '70.0.3515.2',
1654         '70.0.3515.1',
1655         '70.0.3515.0',
1656         '69.0.3497.30',
1657         '68.0.3440.105',
1658         '68.0.3440.104',
1659         '70.0.3514.2',
1660         '70.0.3514.1',
1661         '70.0.3514.0',
1662         '69.0.3497.29',
1663         '68.0.3440.103',
1664         '70.0.3513.1',
1665         '70.0.3513.0',
1666         '69.0.3497.28',
1667     )
1668     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1669
1670
1671 std_headers = {
1672     'User-Agent': random_user_agent(),
1673     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1674     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1675     'Accept-Encoding': 'gzip, deflate',
1676     'Accept-Language': 'en-us,en;q=0.5',
1677 }
1678
1679
1680 USER_AGENTS = {
1681     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1682 }
1683
1684
1685 NO_DEFAULT = object()
1686
1687 ENGLISH_MONTH_NAMES = [
1688     'January', 'February', 'March', 'April', 'May', 'June',
1689     'July', 'August', 'September', 'October', 'November', 'December']
1690
1691 MONTH_NAMES = {
1692     'en': ENGLISH_MONTH_NAMES,
1693     'fr': [
1694         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1695         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1696 }
1697
1698 KNOWN_EXTENSIONS = (
1699     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1700     'flv', 'f4v', 'f4a', 'f4b',
1701     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1702     'mkv', 'mka', 'mk3d',
1703     'avi', 'divx',
1704     'mov',
1705     'asf', 'wmv', 'wma',
1706     '3gp', '3g2',
1707     'mp3',
1708     'flac',
1709     'ape',
1710     'wav',
1711     'f4f', 'f4m', 'm3u8', 'smil')
1712
1713 # needed for sanitizing filenames in restricted mode
1714 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1715                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1716                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1717
1718 DATE_FORMATS = (
1719     '%d %B %Y',
1720     '%d %b %Y',
1721     '%B %d %Y',
1722     '%B %dst %Y',
1723     '%B %dnd %Y',
1724     '%B %drd %Y',
1725     '%B %dth %Y',
1726     '%b %d %Y',
1727     '%b %dst %Y',
1728     '%b %dnd %Y',
1729     '%b %drd %Y',
1730     '%b %dth %Y',
1731     '%b %dst %Y %I:%M',
1732     '%b %dnd %Y %I:%M',
1733     '%b %drd %Y %I:%M',
1734     '%b %dth %Y %I:%M',
1735     '%Y %m %d',
1736     '%Y-%m-%d',
1737     '%Y/%m/%d',
1738     '%Y/%m/%d %H:%M',
1739     '%Y/%m/%d %H:%M:%S',
1740     '%Y-%m-%d %H:%M',
1741     '%Y-%m-%d %H:%M:%S',
1742     '%Y-%m-%d %H:%M:%S.%f',
1743     '%d.%m.%Y %H:%M',
1744     '%d.%m.%Y %H.%M',
1745     '%Y-%m-%dT%H:%M:%SZ',
1746     '%Y-%m-%dT%H:%M:%S.%fZ',
1747     '%Y-%m-%dT%H:%M:%S.%f0Z',
1748     '%Y-%m-%dT%H:%M:%S',
1749     '%Y-%m-%dT%H:%M:%S.%f',
1750     '%Y-%m-%dT%H:%M',
1751     '%b %d %Y at %H:%M',
1752     '%b %d %Y at %H:%M:%S',
1753     '%B %d %Y at %H:%M',
1754     '%B %d %Y at %H:%M:%S',
1755 )
1756
1757 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1758 DATE_FORMATS_DAY_FIRST.extend([
1759     '%d-%m-%Y',
1760     '%d.%m.%Y',
1761     '%d.%m.%y',
1762     '%d/%m/%Y',
1763     '%d/%m/%y',
1764     '%d/%m/%Y %H:%M:%S',
1765 ])
1766
1767 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1768 DATE_FORMATS_MONTH_FIRST.extend([
1769     '%m-%d-%Y',
1770     '%m.%d.%Y',
1771     '%m/%d/%Y',
1772     '%m/%d/%y',
1773     '%m/%d/%Y %H:%M:%S',
1774 ])
1775
1776 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1777 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1778
1779
1780 def preferredencoding():
1781     """Get preferred encoding.
1782
1783     Returns the best encoding scheme for the system, based on
1784     locale.getpreferredencoding() and some further tweaks.
1785     """
1786     try:
1787         pref = locale.getpreferredencoding()
1788         'TEST'.encode(pref)
1789     except Exception:
1790         pref = 'UTF-8'
1791
1792     return pref
1793
1794
1795 def write_json_file(obj, fn):
1796     """ Encode obj as JSON and write it to fn, atomically if possible """
1797
1798     fn = encodeFilename(fn)
1799     if sys.version_info < (3, 0) and sys.platform != 'win32':
1800         encoding = get_filesystem_encoding()
1801         # os.path.basename returns a bytes object, but NamedTemporaryFile
1802         # will fail if the filename contains non ascii characters unless we
1803         # use a unicode object
1804         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1805         # the same for os.path.dirname
1806         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1807     else:
1808         path_basename = os.path.basename
1809         path_dirname = os.path.dirname
1810
1811     args = {
1812         'suffix': '.tmp',
1813         'prefix': path_basename(fn) + '.',
1814         'dir': path_dirname(fn),
1815         'delete': False,
1816     }
1817
1818     # In Python 2.x, json.dump expects a bytestream.
1819     # In Python 3.x, it writes to a character stream
1820     if sys.version_info < (3, 0):
1821         args['mode'] = 'wb'
1822     else:
1823         args.update({
1824             'mode': 'w',
1825             'encoding': 'utf-8',
1826         })
1827
1828     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1829
1830     try:
1831         with tf:
1832             json.dump(obj, tf)
1833         if sys.platform == 'win32':
1834             # Need to remove existing file on Windows, else os.rename raises
1835             # WindowsError or FileExistsError.
1836             try:
1837                 os.unlink(fn)
1838             except OSError:
1839                 pass
1840         os.rename(tf.name, fn)
1841     except Exception:
1842         try:
1843             os.remove(tf.name)
1844         except OSError:
1845             pass
1846         raise
1847
1848
1849 if sys.version_info >= (2, 7):
1850     def find_xpath_attr(node, xpath, key, val=None):
1851         """ Find the xpath xpath[@key=val] """
1852         assert re.match(r'^[a-zA-Z_-]+$', key)
1853         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1854         return node.find(expr)
1855 else:
1856     def find_xpath_attr(node, xpath, key, val=None):
1857         for f in node.findall(compat_xpath(xpath)):
1858             if key not in f.attrib:
1859                 continue
1860             if val is None or f.attrib.get(key) == val:
1861                 return f
1862         return None
1863
1864 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1865 # the namespace parameter
1866
1867
1868 def xpath_with_ns(path, ns_map):
1869     components = [c.split(':') for c in path.split('/')]
1870     replaced = []
1871     for c in components:
1872         if len(c) == 1:
1873             replaced.append(c[0])
1874         else:
1875             ns, tag = c
1876             replaced.append('{%s}%s' % (ns_map[ns], tag))
1877     return '/'.join(replaced)
1878
1879
1880 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1881     def _find_xpath(xpath):
1882         return node.find(compat_xpath(xpath))
1883
1884     if isinstance(xpath, (str, compat_str)):
1885         n = _find_xpath(xpath)
1886     else:
1887         for xp in xpath:
1888             n = _find_xpath(xp)
1889             if n is not None:
1890                 break
1891
1892     if n is None:
1893         if default is not NO_DEFAULT:
1894             return default
1895         elif fatal:
1896             name = xpath if name is None else name
1897             raise ExtractorError('Could not find XML element %s' % name)
1898         else:
1899             return None
1900     return n
1901
1902
1903 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1904     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1905     if n is None or n == default:
1906         return n
1907     if n.text is None:
1908         if default is not NO_DEFAULT:
1909             return default
1910         elif fatal:
1911             name = xpath if name is None else name
1912             raise ExtractorError('Could not find XML element\'s text %s' % name)
1913         else:
1914             return None
1915     return n.text
1916
1917
1918 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1919     n = find_xpath_attr(node, xpath, key)
1920     if n is None:
1921         if default is not NO_DEFAULT:
1922             return default
1923         elif fatal:
1924             name = '%s[@%s]' % (xpath, key) if name is None else name
1925             raise ExtractorError('Could not find XML attribute %s' % name)
1926         else:
1927             return None
1928     return n.attrib[key]
1929
1930
1931 def get_element_by_id(id, html):
1932     """Return the content of the tag with the specified ID in the passed HTML document"""
1933     return get_element_by_attribute('id', id, html)
1934
1935
1936 def get_element_by_class(class_name, html):
1937     """Return the content of the first tag with the specified class in the passed HTML document"""
1938     retval = get_elements_by_class(class_name, html)
1939     return retval[0] if retval else None
1940
1941
1942 def get_element_by_attribute(attribute, value, html, escape_value=True):
1943     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1944     return retval[0] if retval else None
1945
1946
1947 def get_elements_by_class(class_name, html):
1948     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1949     return get_elements_by_attribute(
1950         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1951         html, escape_value=False)
1952
1953
1954 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1955     """Return the content of the tag with the specified attribute in the passed HTML document"""
1956
1957     value = re.escape(value) if escape_value else value
1958
1959     retlist = []
1960     for m in re.finditer(r'''(?xs)
1961         <([a-zA-Z0-9:._-]+)
1962          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1963          \s+%s=['"]?%s['"]?
1964          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1965         \s*>
1966         (?P<content>.*?)
1967         </\1>
1968     ''' % (re.escape(attribute), value), html):
1969         res = m.group('content')
1970
1971         if res.startswith('"') or res.startswith("'"):
1972             res = res[1:-1]
1973
1974         retlist.append(unescapeHTML(res))
1975
1976     return retlist
1977
1978
1979 class HTMLAttributeParser(compat_HTMLParser):
1980     """Trivial HTML parser to gather the attributes for a single element"""
1981     def __init__(self):
1982         self.attrs = {}
1983         compat_HTMLParser.__init__(self)
1984
1985     def handle_starttag(self, tag, attrs):
1986         self.attrs = dict(attrs)
1987
1988
1989 def extract_attributes(html_element):
1990     """Given a string for an HTML element such as
1991     <el
1992          a="foo" B="bar" c="&98;az" d=boz
1993          empty= noval entity="&amp;"
1994          sq='"' dq="'"
1995     >
1996     Decode and return a dictionary of attributes.
1997     {
1998         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
1999         'empty': '', 'noval': None, 'entity': '&',
2000         'sq': '"', 'dq': '\''
2001     }.
2002     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2003     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2004     """
2005     parser = HTMLAttributeParser()
2006     try:
2007         parser.feed(html_element)
2008         parser.close()
2009     # Older Python may throw HTMLParseError in case of malformed HTML
2010     except compat_HTMLParseError:
2011         pass
2012     return parser.attrs
2013
2014
2015 def clean_html(html):
2016     """Clean an HTML snippet into a readable string"""
2017
2018     if html is None:  # Convenience for sanitizing descriptions etc.
2019         return html
2020
2021     # Newline vs <br />
2022     html = html.replace('\n', ' ')
2023     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2024     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2025     # Strip html tags
2026     html = re.sub('<.*?>', '', html)
2027     # Replace html entities
2028     html = unescapeHTML(html)
2029     return html.strip()
2030
2031
2032 def sanitize_open(filename, open_mode):
2033     """Try to open the given filename, and slightly tweak it if this fails.
2034
2035     Attempts to open the given filename. If this fails, it tries to change
2036     the filename slightly, step by step, until it's either able to open it
2037     or it fails and raises a final exception, like the standard open()
2038     function.
2039
2040     It returns the tuple (stream, definitive_file_name).
2041     """
2042     try:
2043         if filename == '-':
2044             if sys.platform == 'win32':
2045                 import msvcrt
2046                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2047             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2048         stream = open(encodeFilename(filename), open_mode)
2049         return (stream, filename)
2050     except (IOError, OSError) as err:
2051         if err.errno in (errno.EACCES,):
2052             raise
2053
2054         # In case of error, try to remove win32 forbidden chars
2055         alt_filename = sanitize_path(filename)
2056         if alt_filename == filename:
2057             raise
2058         else:
2059             # An exception here should be caught in the caller
2060             stream = open(encodeFilename(alt_filename), open_mode)
2061             return (stream, alt_filename)
2062
2063
2064 def timeconvert(timestr):
2065     """Convert RFC 2822 defined time string into system timestamp"""
2066     timestamp = None
2067     timetuple = email.utils.parsedate_tz(timestr)
2068     if timetuple is not None:
2069         timestamp = email.utils.mktime_tz(timetuple)
2070     return timestamp
2071
2072
2073 def sanitize_filename(s, restricted=False, is_id=False):
2074     """Sanitizes a string so it could be used as part of a filename.
2075     If restricted is set, use a stricter subset of allowed characters.
2076     Set is_id if this is not an arbitrary string, but an ID that should be kept
2077     if possible.
2078     """
2079     def replace_insane(char):
2080         if restricted and char in ACCENT_CHARS:
2081             return ACCENT_CHARS[char]
2082         if char == '?' or ord(char) < 32 or ord(char) == 127:
2083             return ''
2084         elif char == '"':
2085             return '' if restricted else '\''
2086         elif char == ':':
2087             return '_-' if restricted else ' -'
2088         elif char in '\\/|*<>':
2089             return '_'
2090         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2091             return '_'
2092         if restricted and ord(char) > 127:
2093             return '_'
2094         return char
2095
2096     # Handle timestamps
2097     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2098     result = ''.join(map(replace_insane, s))
2099     if not is_id:
2100         while '__' in result:
2101             result = result.replace('__', '_')
2102         result = result.strip('_')
2103         # Common case of "Foreign band name - English song title"
2104         if restricted and result.startswith('-_'):
2105             result = result[2:]
2106         if result.startswith('-'):
2107             result = '_' + result[len('-'):]
2108         result = result.lstrip('.')
2109         if not result:
2110             result = '_'
2111     return result
2112
2113
2114 def sanitize_path(s):
2115     """Sanitizes and normalizes path on Windows"""
2116     if sys.platform != 'win32':
2117         return s
2118     drive_or_unc, _ = os.path.splitdrive(s)
2119     if sys.version_info < (2, 7) and not drive_or_unc:
2120         drive_or_unc, _ = os.path.splitunc(s)
2121     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2122     if drive_or_unc:
2123         norm_path.pop(0)
2124     sanitized_path = [
2125         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2126         for path_part in norm_path]
2127     if drive_or_unc:
2128         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2129     return os.path.join(*sanitized_path)
2130
2131
2132 def sanitize_url(url):
2133     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2134     # the number of unwanted failures due to missing protocol
2135     if url.startswith('//'):
2136         return 'http:%s' % url
2137     # Fix some common typos seen so far
2138     COMMON_TYPOS = (
2139         # https://github.com/ytdl-org/youtube-dl/issues/15649
2140         (r'^httpss://', r'https://'),
2141         # https://bx1.be/lives/direct-tv/
2142         (r'^rmtp([es]?)://', r'rtmp\1://'),
2143     )
2144     for mistake, fixup in COMMON_TYPOS:
2145         if re.match(mistake, url):
2146             return re.sub(mistake, fixup, url)
2147     return url
2148
2149
2150 def sanitized_Request(url, *args, **kwargs):
2151     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2152
2153
2154 def expand_path(s):
2155     """Expand shell variables and ~"""
2156     return os.path.expandvars(compat_expanduser(s))
2157
2158
2159 def orderedSet(iterable):
2160     """ Remove all duplicates from the input iterable """
2161     res = []
2162     for el in iterable:
2163         if el not in res:
2164             res.append(el)
2165     return res
2166
2167
2168 def _htmlentity_transform(entity_with_semicolon):
2169     """Transforms an HTML entity to a character."""
2170     entity = entity_with_semicolon[:-1]
2171
2172     # Known non-numeric HTML entity
2173     if entity in compat_html_entities.name2codepoint:
2174         return compat_chr(compat_html_entities.name2codepoint[entity])
2175
2176     # TODO: HTML5 allows entities without a semicolon. For example,
2177     # '&Eacuteric' should be decoded as 'Éric'.
2178     if entity_with_semicolon in compat_html_entities_html5:
2179         return compat_html_entities_html5[entity_with_semicolon]
2180
2181     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2182     if mobj is not None:
2183         numstr = mobj.group(1)
2184         if numstr.startswith('x'):
2185             base = 16
2186             numstr = '0%s' % numstr
2187         else:
2188             base = 10
2189         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2190         try:
2191             return compat_chr(int(numstr, base))
2192         except ValueError:
2193             pass
2194
2195     # Unknown entity in name, return its literal representation
2196     return '&%s;' % entity
2197
2198
2199 def unescapeHTML(s):
2200     if s is None:
2201         return None
2202     assert type(s) == compat_str
2203
2204     return re.sub(
2205         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2206
2207
2208 def get_subprocess_encoding():
2209     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2210         # For subprocess calls, encode with locale encoding
2211         # Refer to http://stackoverflow.com/a/9951851/35070
2212         encoding = preferredencoding()
2213     else:
2214         encoding = sys.getfilesystemencoding()
2215     if encoding is None:
2216         encoding = 'utf-8'
2217     return encoding
2218
2219
2220 def encodeFilename(s, for_subprocess=False):
2221     """
2222     @param s The name of the file
2223     """
2224
2225     assert type(s) == compat_str
2226
2227     # Python 3 has a Unicode API
2228     if sys.version_info >= (3, 0):
2229         return s
2230
2231     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2232     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2233     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2234     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2235         return s
2236
2237     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2238     if sys.platform.startswith('java'):
2239         return s
2240
2241     return s.encode(get_subprocess_encoding(), 'ignore')
2242
2243
2244 def decodeFilename(b, for_subprocess=False):
2245
2246     if sys.version_info >= (3, 0):
2247         return b
2248
2249     if not isinstance(b, bytes):
2250         return b
2251
2252     return b.decode(get_subprocess_encoding(), 'ignore')
2253
2254
2255 def encodeArgument(s):
2256     if not isinstance(s, compat_str):
2257         # Legacy code that uses byte strings
2258         # Uncomment the following line after fixing all post processors
2259         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2260         s = s.decode('ascii')
2261     return encodeFilename(s, True)
2262
2263
2264 def decodeArgument(b):
2265     return decodeFilename(b, True)
2266
2267
2268 def decodeOption(optval):
2269     if optval is None:
2270         return optval
2271     if isinstance(optval, bytes):
2272         optval = optval.decode(preferredencoding())
2273
2274     assert isinstance(optval, compat_str)
2275     return optval
2276
2277
2278 def formatSeconds(secs):
2279     if secs > 3600:
2280         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2281     elif secs > 60:
2282         return '%d:%02d' % (secs // 60, secs % 60)
2283     else:
2284         return '%d' % secs
2285
2286
2287 def make_HTTPS_handler(params, **kwargs):
2288     opts_no_check_certificate = params.get('nocheckcertificate', False)
2289     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2290         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2291         if opts_no_check_certificate:
2292             context.check_hostname = False
2293             context.verify_mode = ssl.CERT_NONE
2294         try:
2295             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2296         except TypeError:
2297             # Python 2.7.8
2298             # (create_default_context present but HTTPSHandler has no context=)
2299             pass
2300
2301     if sys.version_info < (3, 2):
2302         return YoutubeDLHTTPSHandler(params, **kwargs)
2303     else:  # Python < 3.4
2304         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2305         context.verify_mode = (ssl.CERT_NONE
2306                                if opts_no_check_certificate
2307                                else ssl.CERT_REQUIRED)
2308         context.set_default_verify_paths()
2309         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2310
2311
2312 def bug_reports_message():
2313     if ytdl_is_updateable():
2314         update_cmd = 'type  youtube-dl -U  to update'
2315     else:
2316         update_cmd = 'see  https://yt-dl.org/update  on how to update'
2317     msg = '; please report this issue on https://yt-dl.org/bug .'
2318     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2319     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2320     return msg
2321
2322
2323 class YoutubeDLError(Exception):
2324     """Base exception for YoutubeDL errors."""
2325     pass
2326
2327
2328 class ExtractorError(YoutubeDLError):
2329     """Error during info extraction."""
2330
2331     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2332         """ tb, if given, is the original traceback (so that it can be printed out).
2333         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2334         """
2335
2336         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2337             expected = True
2338         if video_id is not None:
2339             msg = video_id + ': ' + msg
2340         if cause:
2341             msg += ' (caused by %r)' % cause
2342         if not expected:
2343             msg += bug_reports_message()
2344         super(ExtractorError, self).__init__(msg)
2345
2346         self.traceback = tb
2347         self.exc_info = sys.exc_info()  # preserve original exception
2348         self.cause = cause
2349         self.video_id = video_id
2350
2351     def format_traceback(self):
2352         if self.traceback is None:
2353             return None
2354         return ''.join(traceback.format_tb(self.traceback))
2355
2356
2357 class UnsupportedError(ExtractorError):
2358     def __init__(self, url):
2359         super(UnsupportedError, self).__init__(
2360             'Unsupported URL: %s' % url, expected=True)
2361         self.url = url
2362
2363
2364 class RegexNotFoundError(ExtractorError):
2365     """Error when a regex didn't match"""
2366     pass
2367
2368
2369 class GeoRestrictedError(ExtractorError):
2370     """Geographic restriction Error exception.
2371
2372     This exception may be thrown when a video is not available from your
2373     geographic location due to geographic restrictions imposed by a website.
2374     """
2375     def __init__(self, msg, countries=None):
2376         super(GeoRestrictedError, self).__init__(msg, expected=True)
2377         self.msg = msg
2378         self.countries = countries
2379
2380
2381 class DownloadError(YoutubeDLError):
2382     """Download Error exception.
2383
2384     This exception may be thrown by FileDownloader objects if they are not
2385     configured to continue on errors. They will contain the appropriate
2386     error message.
2387     """
2388
2389     def __init__(self, msg, exc_info=None):
2390         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2391         super(DownloadError, self).__init__(msg)
2392         self.exc_info = exc_info
2393
2394
2395 class SameFileError(YoutubeDLError):
2396     """Same File exception.
2397
2398     This exception will be thrown by FileDownloader objects if they detect
2399     multiple files would have to be downloaded to the same file on disk.
2400     """
2401     pass
2402
2403
2404 class PostProcessingError(YoutubeDLError):
2405     """Post Processing exception.
2406
2407     This exception may be raised by PostProcessor's .run() method to
2408     indicate an error in the postprocessing task.
2409     """
2410
2411     def __init__(self, msg):
2412         super(PostProcessingError, self).__init__(msg)
2413         self.msg = msg
2414
2415
2416 class MaxDownloadsReached(YoutubeDLError):
2417     """ --max-downloads limit has been reached. """
2418     pass
2419
2420
2421 class UnavailableVideoError(YoutubeDLError):
2422     """Unavailable Format exception.
2423
2424     This exception will be thrown when a video is requested
2425     in a format that is not available for that video.
2426     """
2427     pass
2428
2429
2430 class ContentTooShortError(YoutubeDLError):
2431     """Content Too Short exception.
2432
2433     This exception may be raised by FileDownloader objects when a file they
2434     download is too small for what the server announced first, indicating
2435     the connection was probably interrupted.
2436     """
2437
2438     def __init__(self, downloaded, expected):
2439         super(ContentTooShortError, self).__init__(
2440             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2441         )
2442         # Both in bytes
2443         self.downloaded = downloaded
2444         self.expected = expected
2445
2446
2447 class XAttrMetadataError(YoutubeDLError):
2448     def __init__(self, code=None, msg='Unknown error'):
2449         super(XAttrMetadataError, self).__init__(msg)
2450         self.code = code
2451         self.msg = msg
2452
2453         # Parsing code and msg
2454         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2455                 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2456             self.reason = 'NO_SPACE'
2457         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2458             self.reason = 'VALUE_TOO_LONG'
2459         else:
2460             self.reason = 'NOT_SUPPORTED'
2461
2462
2463 class XAttrUnavailableError(YoutubeDLError):
2464     pass
2465
2466
2467 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2468     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2469     # expected HTTP responses to meet HTTP/1.0 or later (see also
2470     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2471     if sys.version_info < (3, 0):
2472         kwargs['strict'] = True
2473     hc = http_class(*args, **compat_kwargs(kwargs))
2474     source_address = ydl_handler._params.get('source_address')
2475
2476     if source_address is not None:
2477         # This is to workaround _create_connection() from socket where it will try all
2478         # address data from getaddrinfo() including IPv6. This filters the result from
2479         # getaddrinfo() based on the source_address value.
2480         # This is based on the cpython socket.create_connection() function.
2481         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2482         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2483             host, port = address
2484             err = None
2485             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2486             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2487             ip_addrs = [addr for addr in addrs if addr[0] == af]
2488             if addrs and not ip_addrs:
2489                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2490                 raise socket.error(
2491                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2492                     % (ip_version, source_address[0]))
2493             for res in ip_addrs:
2494                 af, socktype, proto, canonname, sa = res
2495                 sock = None
2496                 try:
2497                     sock = socket.socket(af, socktype, proto)
2498                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2499                         sock.settimeout(timeout)
2500                     sock.bind(source_address)
2501                     sock.connect(sa)
2502                     err = None  # Explicitly break reference cycle
2503                     return sock
2504                 except socket.error as _:
2505                     err = _
2506                     if sock is not None:
2507                         sock.close()
2508             if err is not None:
2509                 raise err
2510             else:
2511                 raise socket.error('getaddrinfo returns an empty list')
2512         if hasattr(hc, '_create_connection'):
2513             hc._create_connection = _create_connection
2514         sa = (source_address, 0)
2515         if hasattr(hc, 'source_address'):  # Python 2.7+
2516             hc.source_address = sa
2517         else:  # Python 2.6
2518             def _hc_connect(self, *args, **kwargs):
2519                 sock = _create_connection(
2520                     (self.host, self.port), self.timeout, sa)
2521                 if is_https:
2522                     self.sock = ssl.wrap_socket(
2523                         sock, self.key_file, self.cert_file,
2524                         ssl_version=ssl.PROTOCOL_TLSv1)
2525                 else:
2526                     self.sock = sock
2527             hc.connect = functools.partial(_hc_connect, hc)
2528
2529     return hc
2530
2531
2532 def handle_youtubedl_headers(headers):
2533     filtered_headers = headers
2534
2535     if 'Youtubedl-no-compression' in filtered_headers:
2536         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2537         del filtered_headers['Youtubedl-no-compression']
2538
2539     return filtered_headers
2540
2541
2542 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2543     """Handler for HTTP requests and responses.
2544
2545     This class, when installed with an OpenerDirector, automatically adds
2546     the standard headers to every HTTP request and handles gzipped and
2547     deflated responses from web servers. If compression is to be avoided in
2548     a particular request, the original request in the program code only has
2549     to include the HTTP header "Youtubedl-no-compression", which will be
2550     removed before making the real request.
2551
2552     Part of this code was copied from:
2553
2554     http://techknack.net/python-urllib2-handlers/
2555
2556     Andrew Rowls, the author of that code, agreed to release it to the
2557     public domain.
2558     """
2559
2560     def __init__(self, params, *args, **kwargs):
2561         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2562         self._params = params
2563
2564     def http_open(self, req):
2565         conn_class = compat_http_client.HTTPConnection
2566
2567         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2568         if socks_proxy:
2569             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2570             del req.headers['Ytdl-socks-proxy']
2571
2572         return self.do_open(functools.partial(
2573             _create_http_connection, self, conn_class, False),
2574             req)
2575
2576     @staticmethod
2577     def deflate(data):
2578         try:
2579             return zlib.decompress(data, -zlib.MAX_WBITS)
2580         except zlib.error:
2581             return zlib.decompress(data)
2582
2583     def http_request(self, req):
2584         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2585         # always respected by websites, some tend to give out URLs with non percent-encoded
2586         # non-ASCII characters (see telemb.py, ard.py [#3412])
2587         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2588         # To work around aforementioned issue we will replace request's original URL with
2589         # percent-encoded one
2590         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2591         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2592         url = req.get_full_url()
2593         url_escaped = escape_url(url)
2594
2595         # Substitute URL if any change after escaping
2596         if url != url_escaped:
2597             req = update_Request(req, url=url_escaped)
2598
2599         for h, v in std_headers.items():
2600             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2601             # The dict keys are capitalized because of this bug by urllib
2602             if h.capitalize() not in req.headers:
2603                 req.add_header(h, v)
2604
2605         req.headers = handle_youtubedl_headers(req.headers)
2606
2607         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2608             # Python 2.6 is brain-dead when it comes to fragments
2609             req._Request__original = req._Request__original.partition('#')[0]
2610             req._Request__r_type = req._Request__r_type.partition('#')[0]
2611
2612         return req
2613
2614     def http_response(self, req, resp):
2615         old_resp = resp
2616         # gzip
2617         if resp.headers.get('Content-encoding', '') == 'gzip':
2618             content = resp.read()
2619             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2620             try:
2621                 uncompressed = io.BytesIO(gz.read())
2622             except IOError as original_ioerror:
2623                 # There may be junk add the end of the file
2624                 # See http://stackoverflow.com/q/4928560/35070 for details
2625                 for i in range(1, 1024):
2626                     try:
2627                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2628                         uncompressed = io.BytesIO(gz.read())
2629                     except IOError:
2630                         continue
2631                     break
2632                 else:
2633                     raise original_ioerror
2634             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2635             resp.msg = old_resp.msg
2636             del resp.headers['Content-encoding']
2637         # deflate
2638         if resp.headers.get('Content-encoding', '') == 'deflate':
2639             gz = io.BytesIO(self.deflate(resp.read()))
2640             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2641             resp.msg = old_resp.msg
2642             del resp.headers['Content-encoding']
2643         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2644         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2645         if 300 <= resp.code < 400:
2646             location = resp.headers.get('Location')
2647             if location:
2648                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2649                 if sys.version_info >= (3, 0):
2650                     location = location.encode('iso-8859-1').decode('utf-8')
2651                 else:
2652                     location = location.decode('utf-8')
2653                 location_escaped = escape_url(location)
2654                 if location != location_escaped:
2655                     del resp.headers['Location']
2656                     if sys.version_info < (3, 0):
2657                         location_escaped = location_escaped.encode('utf-8')
2658                     resp.headers['Location'] = location_escaped
2659         return resp
2660
2661     https_request = http_request
2662     https_response = http_response
2663
2664
2665 def make_socks_conn_class(base_class, socks_proxy):
2666     assert issubclass(base_class, (
2667         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2668
2669     url_components = compat_urlparse.urlparse(socks_proxy)
2670     if url_components.scheme.lower() == 'socks5':
2671         socks_type = ProxyType.SOCKS5
2672     elif url_components.scheme.lower() in ('socks', 'socks4'):
2673         socks_type = ProxyType.SOCKS4
2674     elif url_components.scheme.lower() == 'socks4a':
2675         socks_type = ProxyType.SOCKS4A
2676
2677     def unquote_if_non_empty(s):
2678         if not s:
2679             return s
2680         return compat_urllib_parse_unquote_plus(s)
2681
2682     proxy_args = (
2683         socks_type,
2684         url_components.hostname, url_components.port or 1080,
2685         True,  # Remote DNS
2686         unquote_if_non_empty(url_components.username),
2687         unquote_if_non_empty(url_components.password),
2688     )
2689
2690     class SocksConnection(base_class):
2691         def connect(self):
2692             self.sock = sockssocket()
2693             self.sock.setproxy(*proxy_args)
2694             if type(self.timeout) in (int, float):
2695                 self.sock.settimeout(self.timeout)
2696             self.sock.connect((self.host, self.port))
2697
2698             if isinstance(self, compat_http_client.HTTPSConnection):
2699                 if hasattr(self, '_context'):  # Python > 2.6
2700                     self.sock = self._context.wrap_socket(
2701                         self.sock, server_hostname=self.host)
2702                 else:
2703                     self.sock = ssl.wrap_socket(self.sock)
2704
2705     return SocksConnection
2706
2707
2708 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2709     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2710         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2711         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2712         self._params = params
2713
2714     def https_open(self, req):
2715         kwargs = {}
2716         conn_class = self._https_conn_class
2717
2718         if hasattr(self, '_context'):  # python > 2.6
2719             kwargs['context'] = self._context
2720         if hasattr(self, '_check_hostname'):  # python 3.x
2721             kwargs['check_hostname'] = self._check_hostname
2722
2723         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2724         if socks_proxy:
2725             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2726             del req.headers['Ytdl-socks-proxy']
2727
2728         return self.do_open(functools.partial(
2729             _create_http_connection, self, conn_class, True),
2730             req, **kwargs)
2731
2732
2733 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2734     """
2735     See [1] for cookie file format.
2736
2737     1. https://curl.haxx.se/docs/http-cookies.html
2738     """
2739     _HTTPONLY_PREFIX = '#HttpOnly_'
2740     _ENTRY_LEN = 7
2741     _HEADER = '''# Netscape HTTP Cookie File
2742 # This file is generated by youtube-dl.  Do not edit.
2743
2744 '''
2745     _CookieFileEntry = collections.namedtuple(
2746         'CookieFileEntry',
2747         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2748
2749     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2750         """
2751         Save cookies to a file.
2752
2753         Most of the code is taken from CPython 3.8 and slightly adapted
2754         to support cookie files with UTF-8 in both python 2 and 3.
2755         """
2756         if filename is None:
2757             if self.filename is not None:
2758                 filename = self.filename
2759             else:
2760                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2761
2762         # Store session cookies with `expires` set to 0 instead of an empty
2763         # string
2764         for cookie in self:
2765             if cookie.expires is None:
2766                 cookie.expires = 0
2767
2768         with io.open(filename, 'w', encoding='utf-8') as f:
2769             f.write(self._HEADER)
2770             now = time.time()
2771             for cookie in self:
2772                 if not ignore_discard and cookie.discard:
2773                     continue
2774                 if not ignore_expires and cookie.is_expired(now):
2775                     continue
2776                 if cookie.secure:
2777                     secure = 'TRUE'
2778                 else:
2779                     secure = 'FALSE'
2780                 if cookie.domain.startswith('.'):
2781                     initial_dot = 'TRUE'
2782                 else:
2783                     initial_dot = 'FALSE'
2784                 if cookie.expires is not None:
2785                     expires = compat_str(cookie.expires)
2786                 else:
2787                     expires = ''
2788                 if cookie.value is None:
2789                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2790                     # with no name, whereas http.cookiejar regards it as a
2791                     # cookie with no value.
2792                     name = ''
2793                     value = cookie.name
2794                 else:
2795                     name = cookie.name
2796                     value = cookie.value
2797                 f.write(
2798                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2799                                secure, expires, name, value]) + '\n')
2800
2801     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2802         """Load cookies from a file."""
2803         if filename is None:
2804             if self.filename is not None:
2805                 filename = self.filename
2806             else:
2807                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2808
2809         def prepare_line(line):
2810             if line.startswith(self._HTTPONLY_PREFIX):
2811                 line = line[len(self._HTTPONLY_PREFIX):]
2812             # comments and empty lines are fine
2813             if line.startswith('#') or not line.strip():
2814                 return line
2815             cookie_list = line.split('\t')
2816             if len(cookie_list) != self._ENTRY_LEN:
2817                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2818             cookie = self._CookieFileEntry(*cookie_list)
2819             if cookie.expires_at and not cookie.expires_at.isdigit():
2820                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2821             return line
2822
2823         cf = io.StringIO()
2824         with io.open(filename, encoding='utf-8') as f:
2825             for line in f:
2826                 try:
2827                     cf.write(prepare_line(line))
2828                 except compat_cookiejar.LoadError as e:
2829                     write_string(
2830                         'WARNING: skipping cookie file entry due to %s: %r\n'
2831                         % (e, line), sys.stderr)
2832                     continue
2833         cf.seek(0)
2834         self._really_load(cf, filename, ignore_discard, ignore_expires)
2835         # Session cookies are denoted by either `expires` field set to
2836         # an empty string or 0. MozillaCookieJar only recognizes the former
2837         # (see [1]). So we need force the latter to be recognized as session
2838         # cookies on our own.
2839         # Session cookies may be important for cookies-based authentication,
2840         # e.g. usually, when user does not check 'Remember me' check box while
2841         # logging in on a site, some important cookies are stored as session
2842         # cookies so that not recognizing them will result in failed login.
2843         # 1. https://bugs.python.org/issue17164
2844         for cookie in self:
2845             # Treat `expires=0` cookies as session cookies
2846             if cookie.expires == 0:
2847                 cookie.expires = None
2848                 cookie.discard = True
2849
2850
2851 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2852     def __init__(self, cookiejar=None):
2853         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2854
2855     def http_response(self, request, response):
2856         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2857         # characters in Set-Cookie HTTP header of last response (see
2858         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2859         # In order to at least prevent crashing we will percent encode Set-Cookie
2860         # header before HTTPCookieProcessor starts processing it.
2861         # if sys.version_info < (3, 0) and response.headers:
2862         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2863         #         set_cookie = response.headers.get(set_cookie_header)
2864         #         if set_cookie:
2865         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2866         #             if set_cookie != set_cookie_escaped:
2867         #                 del response.headers[set_cookie_header]
2868         #                 response.headers[set_cookie_header] = set_cookie_escaped
2869         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2870
2871     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2872     https_response = http_response
2873
2874
2875 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2876     if sys.version_info[0] < 3:
2877         def redirect_request(self, req, fp, code, msg, headers, newurl):
2878             # On python 2 urlh.geturl() may sometimes return redirect URL
2879             # as byte string instead of unicode. This workaround allows
2880             # to force it always return unicode.
2881             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2882
2883
2884 def extract_timezone(date_str):
2885     m = re.search(
2886         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2887         date_str)
2888     if not m:
2889         timezone = datetime.timedelta()
2890     else:
2891         date_str = date_str[:-len(m.group('tz'))]
2892         if not m.group('sign'):
2893             timezone = datetime.timedelta()
2894         else:
2895             sign = 1 if m.group('sign') == '+' else -1
2896             timezone = datetime.timedelta(
2897                 hours=sign * int(m.group('hours')),
2898                 minutes=sign * int(m.group('minutes')))
2899     return timezone, date_str
2900
2901
2902 def parse_iso8601(date_str, delimiter='T', timezone=None):
2903     """ Return a UNIX timestamp from the given date """
2904
2905     if date_str is None:
2906         return None
2907
2908     date_str = re.sub(r'\.[0-9]+', '', date_str)
2909
2910     if timezone is None:
2911         timezone, date_str = extract_timezone(date_str)
2912
2913     try:
2914         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2915         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2916         return calendar.timegm(dt.timetuple())
2917     except ValueError:
2918         pass
2919
2920
2921 def date_formats(day_first=True):
2922     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2923
2924
2925 def unified_strdate(date_str, day_first=True):
2926     """Return a string with the date in the format YYYYMMDD"""
2927
2928     if date_str is None:
2929         return None
2930     upload_date = None
2931     # Replace commas
2932     date_str = date_str.replace(',', ' ')
2933     # Remove AM/PM + timezone
2934     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2935     _, date_str = extract_timezone(date_str)
2936
2937     for expression in date_formats(day_first):
2938         try:
2939             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2940         except ValueError:
2941             pass
2942     if upload_date is None:
2943         timetuple = email.utils.parsedate_tz(date_str)
2944         if timetuple:
2945             try:
2946                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2947             except ValueError:
2948                 pass
2949     if upload_date is not None:
2950         return compat_str(upload_date)
2951
2952
2953 def unified_timestamp(date_str, day_first=True):
2954     if date_str is None:
2955         return None
2956
2957     date_str = re.sub(r'[,|]', '', date_str)
2958
2959     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2960     timezone, date_str = extract_timezone(date_str)
2961
2962     # Remove AM/PM + timezone
2963     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2964
2965     # Remove unrecognized timezones from ISO 8601 alike timestamps
2966     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2967     if m:
2968         date_str = date_str[:-len(m.group('tz'))]
2969
2970     # Python only supports microseconds, so remove nanoseconds
2971     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2972     if m:
2973         date_str = m.group(1)
2974
2975     for expression in date_formats(day_first):
2976         try:
2977             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2978             return calendar.timegm(dt.timetuple())
2979         except ValueError:
2980             pass
2981     timetuple = email.utils.parsedate_tz(date_str)
2982     if timetuple:
2983         return calendar.timegm(timetuple) + pm_delta * 3600
2984
2985
2986 def determine_ext(url, default_ext='unknown_video'):
2987     if url is None or '.' not in url:
2988         return default_ext
2989     guess = url.partition('?')[0].rpartition('.')[2]
2990     if re.match(r'^[A-Za-z0-9]+$', guess):
2991         return guess
2992     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2993     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
2994         return guess.rstrip('/')
2995     else:
2996         return default_ext
2997
2998
2999 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3000     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3001
3002
3003 def date_from_str(date_str):
3004     """
3005     Return a datetime object from a string in the format YYYYMMDD or
3006     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3007     today = datetime.date.today()
3008     if date_str in ('now', 'today'):
3009         return today
3010     if date_str == 'yesterday':
3011         return today - datetime.timedelta(days=1)
3012     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3013     if match is not None:
3014         sign = match.group('sign')
3015         time = int(match.group('time'))
3016         if sign == '-':
3017             time = -time
3018         unit = match.group('unit')
3019         # A bad approximation?
3020         if unit == 'month':
3021             unit = 'day'
3022             time *= 30
3023         elif unit == 'year':
3024             unit = 'day'
3025             time *= 365
3026         unit += 's'
3027         delta = datetime.timedelta(**{unit: time})
3028         return today + delta
3029     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3030
3031
3032 def hyphenate_date(date_str):
3033     """
3034     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3035     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3036     if match is not None:
3037         return '-'.join(match.groups())
3038     else:
3039         return date_str
3040
3041
3042 class DateRange(object):
3043     """Represents a time interval between two dates"""
3044
3045     def __init__(self, start=None, end=None):
3046         """start and end must be strings in the format accepted by date"""
3047         if start is not None:
3048             self.start = date_from_str(start)
3049         else:
3050             self.start = datetime.datetime.min.date()
3051         if end is not None:
3052             self.end = date_from_str(end)
3053         else:
3054             self.end = datetime.datetime.max.date()
3055         if self.start > self.end:
3056             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3057
3058     @classmethod
3059     def day(cls, day):
3060         """Returns a range that only contains the given day"""
3061         return cls(day, day)
3062
3063     def __contains__(self, date):
3064         """Check if the date is in the range"""
3065         if not isinstance(date, datetime.date):
3066             date = date_from_str(date)
3067         return self.start <= date <= self.end
3068
3069     def __str__(self):
3070         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3071
3072
3073 def platform_name():
3074     """ Returns the platform name as a compat_str """
3075     res = platform.platform()
3076     if isinstance(res, bytes):
3077         res = res.decode(preferredencoding())
3078
3079     assert isinstance(res, compat_str)
3080     return res
3081
3082
3083 def _windows_write_string(s, out):
3084     """ Returns True if the string was written using special methods,
3085     False if it has yet to be written out."""
3086     # Adapted from http://stackoverflow.com/a/3259271/35070
3087
3088     import ctypes
3089     import ctypes.wintypes
3090
3091     WIN_OUTPUT_IDS = {
3092         1: -11,
3093         2: -12,
3094     }
3095
3096     try:
3097         fileno = out.fileno()
3098     except AttributeError:
3099         # If the output stream doesn't have a fileno, it's virtual
3100         return False
3101     except io.UnsupportedOperation:
3102         # Some strange Windows pseudo files?
3103         return False
3104     if fileno not in WIN_OUTPUT_IDS:
3105         return False
3106
3107     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3108         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3109         ('GetStdHandle', ctypes.windll.kernel32))
3110     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3111
3112     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3113         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3114         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3115         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3116     written = ctypes.wintypes.DWORD(0)
3117
3118     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3119     FILE_TYPE_CHAR = 0x0002
3120     FILE_TYPE_REMOTE = 0x8000
3121     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3122         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3123         ctypes.POINTER(ctypes.wintypes.DWORD))(
3124         ('GetConsoleMode', ctypes.windll.kernel32))
3125     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3126
3127     def not_a_console(handle):
3128         if handle == INVALID_HANDLE_VALUE or handle is None:
3129             return True
3130         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3131                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3132
3133     if not_a_console(h):
3134         return False
3135
3136     def next_nonbmp_pos(s):
3137         try:
3138             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3139         except StopIteration:
3140             return len(s)
3141
3142     while s:
3143         count = min(next_nonbmp_pos(s), 1024)
3144
3145         ret = WriteConsoleW(
3146             h, s, count if count else 2, ctypes.byref(written), None)
3147         if ret == 0:
3148             raise OSError('Failed to write string')
3149         if not count:  # We just wrote a non-BMP character
3150             assert written.value == 2
3151             s = s[1:]
3152         else:
3153             assert written.value > 0
3154             s = s[written.value:]
3155     return True
3156
3157
3158 def write_string(s, out=None, encoding=None):
3159     if out is None:
3160         out = sys.stderr
3161     assert type(s) == compat_str
3162
3163     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3164         if _windows_write_string(s, out):
3165             return
3166
3167     if ('b' in getattr(out, 'mode', '')
3168             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3169         byt = s.encode(encoding or preferredencoding(), 'ignore')
3170         out.write(byt)
3171     elif hasattr(out, 'buffer'):
3172         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3173         byt = s.encode(enc, 'ignore')
3174         out.buffer.write(byt)
3175     else:
3176         out.write(s)
3177     out.flush()
3178
3179
3180 def bytes_to_intlist(bs):
3181     if not bs:
3182         return []
3183     if isinstance(bs[0], int):  # Python 3
3184         return list(bs)
3185     else:
3186         return [ord(c) for c in bs]
3187
3188
3189 def intlist_to_bytes(xs):
3190     if not xs:
3191         return b''
3192     return compat_struct_pack('%dB' % len(xs), *xs)
3193
3194
3195 # Cross-platform file locking
3196 if sys.platform == 'win32':
3197     import ctypes.wintypes
3198     import msvcrt
3199
3200     class OVERLAPPED(ctypes.Structure):
3201         _fields_ = [
3202             ('Internal', ctypes.wintypes.LPVOID),
3203             ('InternalHigh', ctypes.wintypes.LPVOID),
3204             ('Offset', ctypes.wintypes.DWORD),
3205             ('OffsetHigh', ctypes.wintypes.DWORD),
3206             ('hEvent', ctypes.wintypes.HANDLE),
3207         ]
3208
3209     kernel32 = ctypes.windll.kernel32
3210     LockFileEx = kernel32.LockFileEx
3211     LockFileEx.argtypes = [
3212         ctypes.wintypes.HANDLE,     # hFile
3213         ctypes.wintypes.DWORD,      # dwFlags
3214         ctypes.wintypes.DWORD,      # dwReserved
3215         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3216         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3217         ctypes.POINTER(OVERLAPPED)  # Overlapped
3218     ]
3219     LockFileEx.restype = ctypes.wintypes.BOOL
3220     UnlockFileEx = kernel32.UnlockFileEx
3221     UnlockFileEx.argtypes = [
3222         ctypes.wintypes.HANDLE,     # hFile
3223         ctypes.wintypes.DWORD,      # dwReserved
3224         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3225         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3226         ctypes.POINTER(OVERLAPPED)  # Overlapped
3227     ]
3228     UnlockFileEx.restype = ctypes.wintypes.BOOL
3229     whole_low = 0xffffffff
3230     whole_high = 0x7fffffff
3231
3232     def _lock_file(f, exclusive):
3233         overlapped = OVERLAPPED()
3234         overlapped.Offset = 0
3235         overlapped.OffsetHigh = 0
3236         overlapped.hEvent = 0
3237         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3238         handle = msvcrt.get_osfhandle(f.fileno())
3239         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3240                           whole_low, whole_high, f._lock_file_overlapped_p):
3241             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3242
3243     def _unlock_file(f):
3244         assert f._lock_file_overlapped_p
3245         handle = msvcrt.get_osfhandle(f.fileno())
3246         if not UnlockFileEx(handle, 0,
3247                             whole_low, whole_high, f._lock_file_overlapped_p):
3248             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3249
3250 else:
3251     # Some platforms, such as Jython, is missing fcntl
3252     try:
3253         import fcntl
3254
3255         def _lock_file(f, exclusive):
3256             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3257
3258         def _unlock_file(f):
3259             fcntl.flock(f, fcntl.LOCK_UN)
3260     except ImportError:
3261         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3262
3263         def _lock_file(f, exclusive):
3264             raise IOError(UNSUPPORTED_MSG)
3265
3266         def _unlock_file(f):
3267             raise IOError(UNSUPPORTED_MSG)
3268
3269
3270 class locked_file(object):
3271     def __init__(self, filename, mode, encoding=None):
3272         assert mode in ['r', 'a', 'w']
3273         self.f = io.open(filename, mode, encoding=encoding)
3274         self.mode = mode
3275
3276     def __enter__(self):
3277         exclusive = self.mode != 'r'
3278         try:
3279             _lock_file(self.f, exclusive)
3280         except IOError:
3281             self.f.close()
3282             raise
3283         return self
3284
3285     def __exit__(self, etype, value, traceback):
3286         try:
3287             _unlock_file(self.f)
3288         finally:
3289             self.f.close()
3290
3291     def __iter__(self):
3292         return iter(self.f)
3293
3294     def write(self, *args):
3295         return self.f.write(*args)
3296
3297     def read(self, *args):
3298         return self.f.read(*args)
3299
3300
3301 def get_filesystem_encoding():
3302     encoding = sys.getfilesystemencoding()
3303     return encoding if encoding is not None else 'utf-8'
3304
3305
3306 def shell_quote(args):
3307     quoted_args = []
3308     encoding = get_filesystem_encoding()
3309     for a in args:
3310         if isinstance(a, bytes):
3311             # We may get a filename encoded with 'encodeFilename'
3312             a = a.decode(encoding)
3313         quoted_args.append(compat_shlex_quote(a))
3314     return ' '.join(quoted_args)
3315
3316
3317 def smuggle_url(url, data):
3318     """ Pass additional data in a URL for internal use. """
3319
3320     url, idata = unsmuggle_url(url, {})
3321     data.update(idata)
3322     sdata = compat_urllib_parse_urlencode(
3323         {'__youtubedl_smuggle': json.dumps(data)})
3324     return url + '#' + sdata
3325
3326
3327 def unsmuggle_url(smug_url, default=None):
3328     if '#__youtubedl_smuggle' not in smug_url:
3329         return smug_url, default
3330     url, _, sdata = smug_url.rpartition('#')
3331     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3332     data = json.loads(jsond)
3333     return url, data
3334
3335
3336 def format_bytes(bytes):
3337     if bytes is None:
3338         return 'N/A'
3339     if type(bytes) is str:
3340         bytes = float(bytes)
3341     if bytes == 0.0:
3342         exponent = 0
3343     else:
3344         exponent = int(math.log(bytes, 1024.0))
3345     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3346     converted = float(bytes) / float(1024 ** exponent)
3347     return '%.2f%s' % (converted, suffix)
3348
3349
3350 def lookup_unit_table(unit_table, s):
3351     units_re = '|'.join(re.escape(u) for u in unit_table)
3352     m = re.match(
3353         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3354     if not m:
3355         return None
3356     num_str = m.group('num').replace(',', '.')
3357     mult = unit_table[m.group('unit')]
3358     return int(float(num_str) * mult)
3359
3360
3361 def parse_filesize(s):
3362     if s is None:
3363         return None
3364
3365     # The lower-case forms are of course incorrect and unofficial,
3366     # but we support those too
3367     _UNIT_TABLE = {
3368         'B': 1,
3369         'b': 1,
3370         'bytes': 1,
3371         'KiB': 1024,
3372         'KB': 1000,
3373         'kB': 1024,
3374         'Kb': 1000,
3375         'kb': 1000,
3376         'kilobytes': 1000,
3377         'kibibytes': 1024,
3378         'MiB': 1024 ** 2,
3379         'MB': 1000 ** 2,
3380         'mB': 1024 ** 2,
3381         'Mb': 1000 ** 2,
3382         'mb': 1000 ** 2,
3383         'megabytes': 1000 ** 2,
3384         'mebibytes': 1024 ** 2,
3385         'GiB': 1024 ** 3,
3386         'GB': 1000 ** 3,
3387         'gB': 1024 ** 3,
3388         'Gb': 1000 ** 3,
3389         'gb': 1000 ** 3,
3390         'gigabytes': 1000 ** 3,
3391         'gibibytes': 1024 ** 3,
3392         'TiB': 1024 ** 4,
3393         'TB': 1000 ** 4,
3394         'tB': 1024 ** 4,
3395         'Tb': 1000 ** 4,
3396         'tb': 1000 ** 4,
3397         'terabytes': 1000 ** 4,
3398         'tebibytes': 1024 ** 4,
3399         'PiB': 1024 ** 5,
3400         'PB': 1000 ** 5,
3401         'pB': 1024 ** 5,
3402         'Pb': 1000 ** 5,
3403         'pb': 1000 ** 5,
3404         'petabytes': 1000 ** 5,
3405         'pebibytes': 1024 ** 5,
3406         'EiB': 1024 ** 6,
3407         'EB': 1000 ** 6,
3408         'eB': 1024 ** 6,
3409         'Eb': 1000 ** 6,
3410         'eb': 1000 ** 6,
3411         'exabytes': 1000 ** 6,
3412         'exbibytes': 1024 ** 6,
3413         'ZiB': 1024 ** 7,
3414         'ZB': 1000 ** 7,
3415         'zB': 1024 ** 7,
3416         'Zb': 1000 ** 7,
3417         'zb': 1000 ** 7,
3418         'zettabytes': 1000 ** 7,
3419         'zebibytes': 1024 ** 7,
3420         'YiB': 1024 ** 8,
3421         'YB': 1000 ** 8,
3422         'yB': 1024 ** 8,
3423         'Yb': 1000 ** 8,
3424         'yb': 1000 ** 8,
3425         'yottabytes': 1000 ** 8,
3426         'yobibytes': 1024 ** 8,
3427     }
3428
3429     return lookup_unit_table(_UNIT_TABLE, s)
3430
3431
3432 def parse_count(s):
3433     if s is None:
3434         return None
3435
3436     s = s.strip()
3437
3438     if re.match(r'^[\d,.]+$', s):
3439         return str_to_int(s)
3440
3441     _UNIT_TABLE = {
3442         'k': 1000,
3443         'K': 1000,
3444         'm': 1000 ** 2,
3445         'M': 1000 ** 2,
3446         'kk': 1000 ** 2,
3447         'KK': 1000 ** 2,
3448     }
3449
3450     return lookup_unit_table(_UNIT_TABLE, s)
3451
3452
3453 def parse_resolution(s):
3454     if s is None:
3455         return {}
3456
3457     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3458     if mobj:
3459         return {
3460             'width': int(mobj.group('w')),
3461             'height': int(mobj.group('h')),
3462         }
3463
3464     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3465     if mobj:
3466         return {'height': int(mobj.group(1))}
3467
3468     mobj = re.search(r'\b([48])[kK]\b', s)
3469     if mobj:
3470         return {'height': int(mobj.group(1)) * 540}
3471
3472     return {}
3473
3474
3475 def parse_bitrate(s):
3476     if not isinstance(s, compat_str):
3477         return
3478     mobj = re.search(r'\b(\d+)\s*kbps', s)
3479     if mobj:
3480         return int(mobj.group(1))
3481
3482
3483 def month_by_name(name, lang='en'):
3484     """ Return the number of a month by (locale-independently) English name """
3485
3486     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3487
3488     try:
3489         return month_names.index(name) + 1
3490     except ValueError:
3491         return None
3492
3493
3494 def month_by_abbreviation(abbrev):
3495     """ Return the number of a month by (locale-independently) English
3496         abbreviations """
3497
3498     try:
3499         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3500     except ValueError:
3501         return None
3502
3503
3504 def fix_xml_ampersands(xml_str):
3505     """Replace all the '&' by '&amp;' in XML"""
3506     return re.sub(
3507         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3508         '&amp;',
3509         xml_str)
3510
3511
3512 def setproctitle(title):
3513     assert isinstance(title, compat_str)
3514
3515     # ctypes in Jython is not complete
3516     # http://bugs.jython.org/issue2148
3517     if sys.platform.startswith('java'):
3518         return
3519
3520     try:
3521         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3522     except OSError:
3523         return
3524     except TypeError:
3525         # LoadLibrary in Windows Python 2.7.13 only expects
3526         # a bytestring, but since unicode_literals turns
3527         # every string into a unicode string, it fails.
3528         return
3529     title_bytes = title.encode('utf-8')
3530     buf = ctypes.create_string_buffer(len(title_bytes))
3531     buf.value = title_bytes
3532     try:
3533         libc.prctl(15, buf, 0, 0, 0)
3534     except AttributeError:
3535         return  # Strange libc, just skip this
3536
3537
3538 def remove_start(s, start):
3539     return s[len(start):] if s is not None and s.startswith(start) else s
3540
3541
3542 def remove_end(s, end):
3543     return s[:-len(end)] if s is not None and s.endswith(end) else s
3544
3545
3546 def remove_quotes(s):
3547     if s is None or len(s) < 2:
3548         return s
3549     for quote in ('"', "'", ):
3550         if s[0] == quote and s[-1] == quote:
3551             return s[1:-1]
3552     return s
3553
3554
3555 def url_basename(url):
3556     path = compat_urlparse.urlparse(url).path
3557     return path.strip('/').split('/')[-1]
3558
3559
3560 def base_url(url):
3561     return re.match(r'https?://[^?#&]+/', url).group()
3562
3563
3564 def urljoin(base, path):
3565     if isinstance(path, bytes):
3566         path = path.decode('utf-8')
3567     if not isinstance(path, compat_str) or not path:
3568         return None
3569     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3570         return path
3571     if isinstance(base, bytes):
3572         base = base.decode('utf-8')
3573     if not isinstance(base, compat_str) or not re.match(
3574             r'^(?:https?:)?//', base):
3575         return None
3576     return compat_urlparse.urljoin(base, path)
3577
3578
3579 class HEADRequest(compat_urllib_request.Request):
3580     def get_method(self):
3581         return 'HEAD'
3582
3583
3584 class PUTRequest(compat_urllib_request.Request):
3585     def get_method(self):
3586         return 'PUT'
3587
3588
3589 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3590     if get_attr:
3591         if v is not None:
3592             v = getattr(v, get_attr, None)
3593     if v == '':
3594         v = None
3595     if v is None:
3596         return default
3597     try:
3598         return int(v) * invscale // scale
3599     except (ValueError, TypeError):
3600         return default
3601
3602
3603 def str_or_none(v, default=None):
3604     return default if v is None else compat_str(v)
3605
3606
3607 def str_to_int(int_str):
3608     """ A more relaxed version of int_or_none """
3609     if isinstance(int_str, compat_integer_types):
3610         return int_str
3611     elif isinstance(int_str, compat_str):
3612         int_str = re.sub(r'[,\.\+]', '', int_str)
3613         return int_or_none(int_str)
3614
3615
3616 def float_or_none(v, scale=1, invscale=1, default=None):
3617     if v is None:
3618         return default
3619     try:
3620         return float(v) * invscale / scale
3621     except (ValueError, TypeError):
3622         return default
3623
3624
3625 def bool_or_none(v, default=None):
3626     return v if isinstance(v, bool) else default
3627
3628
3629 def strip_or_none(v, default=None):
3630     return v.strip() if isinstance(v, compat_str) else default
3631
3632
3633 def url_or_none(url):
3634     if not url or not isinstance(url, compat_str):
3635         return None
3636     url = url.strip()
3637     return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3638
3639
3640 def parse_duration(s):
3641     if not isinstance(s, compat_basestring):
3642         return None
3643
3644     s = s.strip()
3645
3646     days, hours, mins, secs, ms = [None] * 5
3647     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3648     if m:
3649         days, hours, mins, secs, ms = m.groups()
3650     else:
3651         m = re.match(
3652             r'''(?ix)(?:P?
3653                 (?:
3654                     [0-9]+\s*y(?:ears?)?\s*
3655                 )?
3656                 (?:
3657                     [0-9]+\s*m(?:onths?)?\s*
3658                 )?
3659                 (?:
3660                     [0-9]+\s*w(?:eeks?)?\s*
3661                 )?
3662                 (?:
3663                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3664                 )?
3665                 T)?
3666                 (?:
3667                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3668                 )?
3669                 (?:
3670                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3671                 )?
3672                 (?:
3673                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3674                 )?Z?$''', s)
3675         if m:
3676             days, hours, mins, secs, ms = m.groups()
3677         else:
3678             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3679             if m:
3680                 hours, mins = m.groups()
3681             else:
3682                 return None
3683
3684     duration = 0
3685     if secs:
3686         duration += float(secs)
3687     if mins:
3688         duration += float(mins) * 60
3689     if hours:
3690         duration += float(hours) * 60 * 60
3691     if days:
3692         duration += float(days) * 24 * 60 * 60
3693     if ms:
3694         duration += float(ms)
3695     return duration
3696
3697
3698 def prepend_extension(filename, ext, expected_real_ext=None):
3699     name, real_ext = os.path.splitext(filename)
3700     return (
3701         '{0}.{1}{2}'.format(name, ext, real_ext)
3702         if not expected_real_ext or real_ext[1:] == expected_real_ext
3703         else '{0}.{1}'.format(filename, ext))
3704
3705
3706 def replace_extension(filename, ext, expected_real_ext=None):
3707     name, real_ext = os.path.splitext(filename)
3708     return '{0}.{1}'.format(
3709         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3710         ext)
3711
3712
3713 def check_executable(exe, args=[]):
3714     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3715     args can be a list of arguments for a short output (like -version) """
3716     try:
3717         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3718     except OSError:
3719         return False
3720     return exe
3721
3722
3723 def get_exe_version(exe, args=['--version'],
3724                     version_re=None, unrecognized='present'):
3725     """ Returns the version of the specified executable,
3726     or False if the executable is not present """
3727     try:
3728         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3729         # SIGTTOU if youtube-dl is run in the background.
3730         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3731         out, _ = subprocess.Popen(
3732             [encodeArgument(exe)] + args,
3733             stdin=subprocess.PIPE,
3734             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3735     except OSError:
3736         return False
3737     if isinstance(out, bytes):  # Python 2.x
3738         out = out.decode('ascii', 'ignore')
3739     return detect_exe_version(out, version_re, unrecognized)
3740
3741
3742 def detect_exe_version(output, version_re=None, unrecognized='present'):
3743     assert isinstance(output, compat_str)
3744     if version_re is None:
3745         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3746     m = re.search(version_re, output)
3747     if m:
3748         return m.group(1)
3749     else:
3750         return unrecognized
3751
3752
3753 class PagedList(object):
3754     def __len__(self):
3755         # This is only useful for tests
3756         return len(self.getslice())
3757
3758
3759 class OnDemandPagedList(PagedList):
3760     def __init__(self, pagefunc, pagesize, use_cache=True):
3761         self._pagefunc = pagefunc
3762         self._pagesize = pagesize
3763         self._use_cache = use_cache
3764         if use_cache:
3765             self._cache = {}
3766
3767     def getslice(self, start=0, end=None):
3768         res = []
3769         for pagenum in itertools.count(start // self._pagesize):
3770             firstid = pagenum * self._pagesize
3771             nextfirstid = pagenum * self._pagesize + self._pagesize
3772             if start >= nextfirstid:
3773                 continue
3774
3775             page_results = None
3776             if self._use_cache:
3777                 page_results = self._cache.get(pagenum)
3778             if page_results is None:
3779                 page_results = list(self._pagefunc(pagenum))
3780             if self._use_cache:
3781                 self._cache[pagenum] = page_results
3782
3783             startv = (
3784                 start % self._pagesize
3785                 if firstid <= start < nextfirstid
3786                 else 0)
3787
3788             endv = (
3789                 ((end - 1) % self._pagesize) + 1
3790                 if (end is not None and firstid <= end <= nextfirstid)
3791                 else None)
3792
3793             if startv != 0 or endv is not None:
3794                 page_results = page_results[startv:endv]
3795             res.extend(page_results)
3796
3797             # A little optimization - if current page is not "full", ie. does
3798             # not contain page_size videos then we can assume that this page
3799             # is the last one - there are no more ids on further pages -
3800             # i.e. no need to query again.
3801             if len(page_results) + startv < self._pagesize:
3802                 break
3803
3804             # If we got the whole page, but the next page is not interesting,
3805             # break out early as well
3806             if end == nextfirstid:
3807                 break
3808         return res
3809
3810
3811 class InAdvancePagedList(PagedList):
3812     def __init__(self, pagefunc, pagecount, pagesize):
3813         self._pagefunc = pagefunc
3814         self._pagecount = pagecount
3815         self._pagesize = pagesize
3816
3817     def getslice(self, start=0, end=None):
3818         res = []
3819         start_page = start // self._pagesize
3820         end_page = (
3821             self._pagecount if end is None else (end // self._pagesize + 1))
3822         skip_elems = start - start_page * self._pagesize
3823         only_more = None if end is None else end - start
3824         for pagenum in range(start_page, end_page):
3825             page = list(self._pagefunc(pagenum))
3826             if skip_elems:
3827                 page = page[skip_elems:]
3828                 skip_elems = None
3829             if only_more is not None:
3830                 if len(page) < only_more:
3831                     only_more -= len(page)
3832                 else:
3833                     page = page[:only_more]
3834                     res.extend(page)
3835                     break
3836             res.extend(page)
3837         return res
3838
3839
3840 def uppercase_escape(s):
3841     unicode_escape = codecs.getdecoder('unicode_escape')
3842     return re.sub(
3843         r'\\U[0-9a-fA-F]{8}',
3844         lambda m: unicode_escape(m.group(0))[0],
3845         s)
3846
3847
3848 def lowercase_escape(s):
3849     unicode_escape = codecs.getdecoder('unicode_escape')
3850     return re.sub(
3851         r'\\u[0-9a-fA-F]{4}',
3852         lambda m: unicode_escape(m.group(0))[0],
3853         s)
3854
3855
3856 def escape_rfc3986(s):
3857     """Escape non-ASCII characters as suggested by RFC 3986"""
3858     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3859         s = s.encode('utf-8')
3860     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3861
3862
3863 def escape_url(url):
3864     """Escape URL as suggested by RFC 3986"""
3865     url_parsed = compat_urllib_parse_urlparse(url)
3866     return url_parsed._replace(
3867         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3868         path=escape_rfc3986(url_parsed.path),
3869         params=escape_rfc3986(url_parsed.params),
3870         query=escape_rfc3986(url_parsed.query),
3871         fragment=escape_rfc3986(url_parsed.fragment)
3872     ).geturl()
3873
3874
3875 def read_batch_urls(batch_fd):
3876     def fixup(url):
3877         if not isinstance(url, compat_str):
3878             url = url.decode('utf-8', 'replace')
3879         BOM_UTF8 = '\xef\xbb\xbf'
3880         if url.startswith(BOM_UTF8):
3881             url = url[len(BOM_UTF8):]
3882         url = url.strip()
3883         if url.startswith(('#', ';', ']')):
3884             return False
3885         return url
3886
3887     with contextlib.closing(batch_fd) as fd:
3888         return [url for url in map(fixup, fd) if url]
3889
3890
3891 def urlencode_postdata(*args, **kargs):
3892     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3893
3894
3895 def update_url_query(url, query):
3896     if not query:
3897         return url
3898     parsed_url = compat_urlparse.urlparse(url)
3899     qs = compat_parse_qs(parsed_url.query)
3900     qs.update(query)
3901     return compat_urlparse.urlunparse(parsed_url._replace(
3902         query=compat_urllib_parse_urlencode(qs, True)))
3903
3904
3905 def update_Request(req, url=None, data=None, headers={}, query={}):
3906     req_headers = req.headers.copy()
3907     req_headers.update(headers)
3908     req_data = data or req.data
3909     req_url = update_url_query(url or req.get_full_url(), query)
3910     req_get_method = req.get_method()
3911     if req_get_method == 'HEAD':
3912         req_type = HEADRequest
3913     elif req_get_method == 'PUT':
3914         req_type = PUTRequest
3915     else:
3916         req_type = compat_urllib_request.Request
3917     new_req = req_type(
3918         req_url, data=req_data, headers=req_headers,
3919         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3920     if hasattr(req, 'timeout'):
3921         new_req.timeout = req.timeout
3922     return new_req
3923
3924
3925 def _multipart_encode_impl(data, boundary):
3926     content_type = 'multipart/form-data; boundary=%s' % boundary
3927
3928     out = b''
3929     for k, v in data.items():
3930         out += b'--' + boundary.encode('ascii') + b'\r\n'
3931         if isinstance(k, compat_str):
3932             k = k.encode('utf-8')
3933         if isinstance(v, compat_str):
3934             v = v.encode('utf-8')
3935         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3936         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3937         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3938         if boundary.encode('ascii') in content:
3939             raise ValueError('Boundary overlaps with data')
3940         out += content
3941
3942     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3943
3944     return out, content_type
3945
3946
3947 def multipart_encode(data, boundary=None):
3948     '''
3949     Encode a dict to RFC 7578-compliant form-data
3950
3951     data:
3952         A dict where keys and values can be either Unicode or bytes-like
3953         objects.
3954     boundary:
3955         If specified a Unicode object, it's used as the boundary. Otherwise
3956         a random boundary is generated.
3957
3958     Reference: https://tools.ietf.org/html/rfc7578
3959     '''
3960     has_specified_boundary = boundary is not None
3961
3962     while True:
3963         if boundary is None:
3964             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3965
3966         try:
3967             out, content_type = _multipart_encode_impl(data, boundary)
3968             break
3969         except ValueError:
3970             if has_specified_boundary:
3971                 raise
3972             boundary = None
3973
3974     return out, content_type
3975
3976
3977 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3978     if isinstance(key_or_keys, (list, tuple)):
3979         for key in key_or_keys:
3980             if key not in d or d[key] is None or skip_false_values and not d[key]:
3981                 continue
3982             return d[key]
3983         return default
3984     return d.get(key_or_keys, default)
3985
3986
3987 def try_get(src, getter, expected_type=None):
3988     if not isinstance(getter, (list, tuple)):
3989         getter = [getter]
3990     for get in getter:
3991         try:
3992             v = get(src)
3993         except (AttributeError, KeyError, TypeError, IndexError):
3994             pass
3995         else:
3996             if expected_type is None or isinstance(v, expected_type):
3997                 return v
3998
3999
4000 def merge_dicts(*dicts):
4001     merged = {}
4002     for a_dict in dicts:
4003         for k, v in a_dict.items():
4004             if v is None:
4005                 continue
4006             if (k not in merged
4007                     or (isinstance(v, compat_str) and v
4008                         and isinstance(merged[k], compat_str)
4009                         and not merged[k])):
4010                 merged[k] = v
4011     return merged
4012
4013
4014 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4015     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4016
4017
4018 US_RATINGS = {
4019     'G': 0,
4020     'PG': 10,
4021     'PG-13': 13,
4022     'R': 16,
4023     'NC': 18,
4024 }
4025
4026
4027 TV_PARENTAL_GUIDELINES = {
4028     'TV-Y': 0,
4029     'TV-Y7': 7,
4030     'TV-G': 0,
4031     'TV-PG': 0,
4032     'TV-14': 14,
4033     'TV-MA': 17,
4034 }
4035
4036
4037 def parse_age_limit(s):
4038     if type(s) == int:
4039         return s if 0 <= s <= 21 else None
4040     if not isinstance(s, compat_basestring):
4041         return None
4042     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4043     if m:
4044         return int(m.group('age'))
4045     if s in US_RATINGS:
4046         return US_RATINGS[s]
4047     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4048     if m:
4049         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4050     return None
4051
4052
4053 def strip_jsonp(code):
4054     return re.sub(
4055         r'''(?sx)^
4056             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4057             (?:\s*&&\s*(?P=func_name))?
4058             \s*\(\s*(?P<callback_data>.*)\);?
4059             \s*?(?://[^\n]*)*$''',
4060         r'\g<callback_data>', code)
4061
4062
4063 def js_to_json(code):
4064     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4065     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4066     INTEGER_TABLE = (
4067         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4068         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4069     )
4070
4071     def fix_kv(m):
4072         v = m.group(0)
4073         if v in ('true', 'false', 'null'):
4074             return v
4075         elif v.startswith('/*') or v.startswith('//') or v == ',':
4076             return ""
4077
4078         if v[0] in ("'", '"'):
4079             v = re.sub(r'(?s)\\.|"', lambda m: {
4080                 '"': '\\"',
4081                 "\\'": "'",
4082                 '\\\n': '',
4083                 '\\x': '\\u00',
4084             }.get(m.group(0), m.group(0)), v[1:-1])
4085
4086         for regex, base in INTEGER_TABLE:
4087             im = re.match(regex, v)
4088             if im:
4089                 i = int(im.group(1), base)
4090                 return '"%d":' % i if v.endswith(':') else '%d' % i
4091
4092         return '"%s"' % v
4093
4094     return re.sub(r'''(?sx)
4095         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4096         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4097         {comment}|,(?={skip}[\]}}])|
4098         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4099         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4100         [0-9]+(?={skip}:)
4101         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4102
4103
4104 def qualities(quality_ids):
4105     """ Get a numeric quality value out of a list of possible values """
4106     def q(qid):
4107         try:
4108             return quality_ids.index(qid)
4109         except ValueError:
4110             return -1
4111     return q
4112
4113
4114 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4115
4116
4117 def limit_length(s, length):
4118     """ Add ellipses to overly long strings """
4119     if s is None:
4120         return None
4121     ELLIPSES = '...'
4122     if len(s) > length:
4123         return s[:length - len(ELLIPSES)] + ELLIPSES
4124     return s
4125
4126
4127 def version_tuple(v):
4128     return tuple(int(e) for e in re.split(r'[-.]', v))
4129
4130
4131 def is_outdated_version(version, limit, assume_new=True):
4132     if not version:
4133         return not assume_new
4134     try:
4135         return version_tuple(version) < version_tuple(limit)
4136     except ValueError:
4137         return not assume_new
4138
4139
4140 def ytdl_is_updateable():
4141     """ Returns if youtube-dl can be updated with -U """
4142     from zipimport import zipimporter
4143
4144     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4145
4146
4147 def args_to_str(args):
4148     # Get a short string representation for a subprocess command
4149     return ' '.join(compat_shlex_quote(a) for a in args)
4150
4151
4152 def error_to_compat_str(err):
4153     err_str = str(err)
4154     # On python 2 error byte string must be decoded with proper
4155     # encoding rather than ascii
4156     if sys.version_info[0] < 3:
4157         err_str = err_str.decode(preferredencoding())
4158     return err_str
4159
4160
4161 def mimetype2ext(mt):
4162     if mt is None:
4163         return None
4164
4165     ext = {
4166         'audio/mp4': 'm4a',
4167         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4168         # it's the most popular one
4169         'audio/mpeg': 'mp3',
4170     }.get(mt)
4171     if ext is not None:
4172         return ext
4173
4174     _, _, res = mt.rpartition('/')
4175     res = res.split(';')[0].strip().lower()
4176
4177     return {
4178         '3gpp': '3gp',
4179         'smptett+xml': 'tt',
4180         'ttaf+xml': 'dfxp',
4181         'ttml+xml': 'ttml',
4182         'x-flv': 'flv',
4183         'x-mp4-fragmented': 'mp4',
4184         'x-ms-sami': 'sami',
4185         'x-ms-wmv': 'wmv',
4186         'mpegurl': 'm3u8',
4187         'x-mpegurl': 'm3u8',
4188         'vnd.apple.mpegurl': 'm3u8',
4189         'dash+xml': 'mpd',
4190         'f4m+xml': 'f4m',
4191         'hds+xml': 'f4m',
4192         'vnd.ms-sstr+xml': 'ism',
4193         'quicktime': 'mov',
4194         'mp2t': 'ts',
4195     }.get(res, res)
4196
4197
4198 def parse_codecs(codecs_str):
4199     # http://tools.ietf.org/html/rfc6381
4200     if not codecs_str:
4201         return {}
4202     splited_codecs = list(filter(None, map(
4203         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4204     vcodec, acodec = None, None
4205     for full_codec in splited_codecs:
4206         codec = full_codec.split('.')[0]
4207         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4208             if not vcodec:
4209                 vcodec = full_codec
4210         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4211             if not acodec:
4212                 acodec = full_codec
4213         else:
4214             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4215     if not vcodec and not acodec:
4216         if len(splited_codecs) == 2:
4217             return {
4218                 'vcodec': splited_codecs[0],
4219                 'acodec': splited_codecs[1],
4220             }
4221     else:
4222         return {
4223             'vcodec': vcodec or 'none',
4224             'acodec': acodec or 'none',
4225         }
4226     return {}
4227
4228
4229 def urlhandle_detect_ext(url_handle):
4230     getheader = url_handle.headers.get
4231
4232     cd = getheader('Content-Disposition')
4233     if cd:
4234         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4235         if m:
4236             e = determine_ext(m.group('filename'), default_ext=None)
4237             if e:
4238                 return e
4239
4240     return mimetype2ext(getheader('Content-Type'))
4241
4242
4243 def encode_data_uri(data, mime_type):
4244     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4245
4246
4247 def age_restricted(content_limit, age_limit):
4248     """ Returns True iff the content should be blocked """
4249
4250     if age_limit is None:  # No limit set
4251         return False
4252     if content_limit is None:
4253         return False  # Content available for everyone
4254     return age_limit < content_limit
4255
4256
4257 def is_html(first_bytes):
4258     """ Detect whether a file contains HTML by examining its first bytes. """
4259
4260     BOMS = [
4261         (b'\xef\xbb\xbf', 'utf-8'),
4262         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4263         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4264         (b'\xff\xfe', 'utf-16-le'),
4265         (b'\xfe\xff', 'utf-16-be'),
4266     ]
4267     for bom, enc in BOMS:
4268         if first_bytes.startswith(bom):
4269             s = first_bytes[len(bom):].decode(enc, 'replace')
4270             break
4271     else:
4272         s = first_bytes.decode('utf-8', 'replace')
4273
4274     return re.match(r'^\s*<', s)
4275
4276
4277 def determine_protocol(info_dict):
4278     protocol = info_dict.get('protocol')
4279     if protocol is not None:
4280         return protocol
4281
4282     url = info_dict['url']
4283     if url.startswith('rtmp'):
4284         return 'rtmp'
4285     elif url.startswith('mms'):
4286         return 'mms'
4287     elif url.startswith('rtsp'):
4288         return 'rtsp'
4289
4290     ext = determine_ext(url)
4291     if ext == 'm3u8':
4292         return 'm3u8'
4293     elif ext == 'f4m':
4294         return 'f4m'
4295
4296     return compat_urllib_parse_urlparse(url).scheme
4297
4298
4299 def render_table(header_row, data):
4300     """ Render a list of rows, each as a list of values """
4301     table = [header_row] + data
4302     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4303     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4304     return '\n'.join(format_str % tuple(row) for row in table)
4305
4306
4307 def _match_one(filter_part, dct):
4308     COMPARISON_OPERATORS = {
4309         '<': operator.lt,
4310         '<=': operator.le,
4311         '>': operator.gt,
4312         '>=': operator.ge,
4313         '=': operator.eq,
4314         '!=': operator.ne,
4315     }
4316     operator_rex = re.compile(r'''(?x)\s*
4317         (?P<key>[a-z_]+)
4318         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4319         (?:
4320             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4321             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4322             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4323         )
4324         \s*$
4325         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4326     m = operator_rex.search(filter_part)
4327     if m:
4328         op = COMPARISON_OPERATORS[m.group('op')]
4329         actual_value = dct.get(m.group('key'))
4330         if (m.group('quotedstrval') is not None
4331             or m.group('strval') is not None
4332             # If the original field is a string and matching comparisonvalue is
4333             # a number we should respect the origin of the original field
4334             # and process comparison value as a string (see
4335             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4336             or actual_value is not None and m.group('intval') is not None
4337                 and isinstance(actual_value, compat_str)):
4338             if m.group('op') not in ('=', '!='):
4339                 raise ValueError(
4340                     'Operator %s does not support string values!' % m.group('op'))
4341             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4342             quote = m.group('quote')
4343             if quote is not None:
4344                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4345         else:
4346             try:
4347                 comparison_value = int(m.group('intval'))
4348             except ValueError:
4349                 comparison_value = parse_filesize(m.group('intval'))
4350                 if comparison_value is None:
4351                     comparison_value = parse_filesize(m.group('intval') + 'B')
4352                 if comparison_value is None:
4353                     raise ValueError(
4354                         'Invalid integer value %r in filter part %r' % (
4355                             m.group('intval'), filter_part))
4356         if actual_value is None:
4357             return m.group('none_inclusive')
4358         return op(actual_value, comparison_value)
4359
4360     UNARY_OPERATORS = {
4361         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4362         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4363     }
4364     operator_rex = re.compile(r'''(?x)\s*
4365         (?P<op>%s)\s*(?P<key>[a-z_]+)
4366         \s*$
4367         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4368     m = operator_rex.search(filter_part)
4369     if m:
4370         op = UNARY_OPERATORS[m.group('op')]
4371         actual_value = dct.get(m.group('key'))
4372         return op(actual_value)
4373
4374     raise ValueError('Invalid filter part %r' % filter_part)
4375
4376
4377 def match_str(filter_str, dct):
4378     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4379
4380     return all(
4381         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4382
4383
4384 def match_filter_func(filter_str):
4385     def _match_func(info_dict):
4386         if match_str(filter_str, info_dict):
4387             return None
4388         else:
4389             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4390             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4391     return _match_func
4392
4393
4394 def parse_dfxp_time_expr(time_expr):
4395     if not time_expr:
4396         return
4397
4398     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4399     if mobj:
4400         return float(mobj.group('time_offset'))
4401
4402     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4403     if mobj:
4404         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4405
4406
4407 def srt_subtitles_timecode(seconds):
4408     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4409
4410
4411 def dfxp2srt(dfxp_data):
4412     '''
4413     @param dfxp_data A bytes-like object containing DFXP data
4414     @returns A unicode object containing converted SRT data
4415     '''
4416     LEGACY_NAMESPACES = (
4417         (b'http://www.w3.org/ns/ttml', [
4418             b'http://www.w3.org/2004/11/ttaf1',
4419             b'http://www.w3.org/2006/04/ttaf1',
4420             b'http://www.w3.org/2006/10/ttaf1',
4421         ]),
4422         (b'http://www.w3.org/ns/ttml#styling', [
4423             b'http://www.w3.org/ns/ttml#style',
4424         ]),
4425     )
4426
4427     SUPPORTED_STYLING = [
4428         'color',
4429         'fontFamily',
4430         'fontSize',
4431         'fontStyle',
4432         'fontWeight',
4433         'textDecoration'
4434     ]
4435
4436     _x = functools.partial(xpath_with_ns, ns_map={
4437         'xml': 'http://www.w3.org/XML/1998/namespace',
4438         'ttml': 'http://www.w3.org/ns/ttml',
4439         'tts': 'http://www.w3.org/ns/ttml#styling',
4440     })
4441
4442     styles = {}
4443     default_style = {}
4444
4445     class TTMLPElementParser(object):
4446         _out = ''
4447         _unclosed_elements = []
4448         _applied_styles = []
4449
4450         def start(self, tag, attrib):
4451             if tag in (_x('ttml:br'), 'br'):
4452                 self._out += '\n'
4453             else:
4454                 unclosed_elements = []
4455                 style = {}
4456                 element_style_id = attrib.get('style')
4457                 if default_style:
4458                     style.update(default_style)
4459                 if element_style_id:
4460                     style.update(styles.get(element_style_id, {}))
4461                 for prop in SUPPORTED_STYLING:
4462                     prop_val = attrib.get(_x('tts:' + prop))
4463                     if prop_val:
4464                         style[prop] = prop_val
4465                 if style:
4466                     font = ''
4467                     for k, v in sorted(style.items()):
4468                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4469                             continue
4470                         if k == 'color':
4471                             font += ' color="%s"' % v
4472                         elif k == 'fontSize':
4473                             font += ' size="%s"' % v
4474                         elif k == 'fontFamily':
4475                             font += ' face="%s"' % v
4476                         elif k == 'fontWeight' and v == 'bold':
4477                             self._out += '<b>'
4478                             unclosed_elements.append('b')
4479                         elif k == 'fontStyle' and v == 'italic':
4480                             self._out += '<i>'
4481                             unclosed_elements.append('i')
4482                         elif k == 'textDecoration' and v == 'underline':
4483                             self._out += '<u>'
4484                             unclosed_elements.append('u')
4485                     if font:
4486                         self._out += '<font' + font + '>'
4487                         unclosed_elements.append('font')
4488                     applied_style = {}
4489                     if self._applied_styles:
4490                         applied_style.update(self._applied_styles[-1])
4491                     applied_style.update(style)
4492                     self._applied_styles.append(applied_style)
4493                 self._unclosed_elements.append(unclosed_elements)
4494
4495         def end(self, tag):
4496             if tag not in (_x('ttml:br'), 'br'):
4497                 unclosed_elements = self._unclosed_elements.pop()
4498                 for element in reversed(unclosed_elements):
4499                     self._out += '</%s>' % element
4500                 if unclosed_elements and self._applied_styles:
4501                     self._applied_styles.pop()
4502
4503         def data(self, data):
4504             self._out += data
4505
4506         def close(self):
4507             return self._out.strip()
4508
4509     def parse_node(node):
4510         target = TTMLPElementParser()
4511         parser = xml.etree.ElementTree.XMLParser(target=target)
4512         parser.feed(xml.etree.ElementTree.tostring(node))
4513         return parser.close()
4514
4515     for k, v in LEGACY_NAMESPACES:
4516         for ns in v:
4517             dfxp_data = dfxp_data.replace(ns, k)
4518
4519     dfxp = compat_etree_fromstring(dfxp_data)
4520     out = []
4521     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4522
4523     if not paras:
4524         raise ValueError('Invalid dfxp/TTML subtitle')
4525
4526     repeat = False
4527     while True:
4528         for style in dfxp.findall(_x('.//ttml:style')):
4529             style_id = style.get('id') or style.get(_x('xml:id'))
4530             if not style_id:
4531                 continue
4532             parent_style_id = style.get('style')
4533             if parent_style_id:
4534                 if parent_style_id not in styles:
4535                     repeat = True
4536                     continue
4537                 styles[style_id] = styles[parent_style_id].copy()
4538             for prop in SUPPORTED_STYLING:
4539                 prop_val = style.get(_x('tts:' + prop))
4540                 if prop_val:
4541                     styles.setdefault(style_id, {})[prop] = prop_val
4542         if repeat:
4543             repeat = False
4544         else:
4545             break
4546
4547     for p in ('body', 'div'):
4548         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4549         if ele is None:
4550             continue
4551         style = styles.get(ele.get('style'))
4552         if not style:
4553             continue
4554         default_style.update(style)
4555
4556     for para, index in zip(paras, itertools.count(1)):
4557         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4558         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4559         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4560         if begin_time is None:
4561             continue
4562         if not end_time:
4563             if not dur:
4564                 continue
4565             end_time = begin_time + dur
4566         out.append('%d\n%s --> %s\n%s\n\n' % (
4567             index,
4568             srt_subtitles_timecode(begin_time),
4569             srt_subtitles_timecode(end_time),
4570             parse_node(para)))
4571
4572     return ''.join(out)
4573
4574
4575 def cli_option(params, command_option, param):
4576     param = params.get(param)
4577     if param:
4578         param = compat_str(param)
4579     return [command_option, param] if param is not None else []
4580
4581
4582 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4583     param = params.get(param)
4584     if param is None:
4585         return []
4586     assert isinstance(param, bool)
4587     if separator:
4588         return [command_option + separator + (true_value if param else false_value)]
4589     return [command_option, true_value if param else false_value]
4590
4591
4592 def cli_valueless_option(params, command_option, param, expected_value=True):
4593     param = params.get(param)
4594     return [command_option] if param == expected_value else []
4595
4596
4597 def cli_configuration_args(params, param, default=[]):
4598     ex_args = params.get(param)
4599     if ex_args is None:
4600         return default
4601     assert isinstance(ex_args, list)
4602     return ex_args
4603
4604
4605 class ISO639Utils(object):
4606     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4607     _lang_map = {
4608         'aa': 'aar',
4609         'ab': 'abk',
4610         'ae': 'ave',
4611         'af': 'afr',
4612         'ak': 'aka',
4613         'am': 'amh',
4614         'an': 'arg',
4615         'ar': 'ara',
4616         'as': 'asm',
4617         'av': 'ava',
4618         'ay': 'aym',
4619         'az': 'aze',
4620         'ba': 'bak',
4621         'be': 'bel',
4622         'bg': 'bul',
4623         'bh': 'bih',
4624         'bi': 'bis',
4625         'bm': 'bam',
4626         'bn': 'ben',
4627         'bo': 'bod',
4628         'br': 'bre',
4629         'bs': 'bos',
4630         'ca': 'cat',
4631         'ce': 'che',
4632         'ch': 'cha',
4633         'co': 'cos',
4634         'cr': 'cre',
4635         'cs': 'ces',
4636         'cu': 'chu',
4637         'cv': 'chv',
4638         'cy': 'cym',
4639         'da': 'dan',
4640         'de': 'deu',
4641         'dv': 'div',
4642         'dz': 'dzo',
4643         'ee': 'ewe',
4644         'el': 'ell',
4645         'en': 'eng',
4646         'eo': 'epo',
4647         'es': 'spa',
4648         'et': 'est',
4649         'eu': 'eus',
4650         'fa': 'fas',
4651         'ff': 'ful',
4652         'fi': 'fin',
4653         'fj': 'fij',
4654         'fo': 'fao',
4655         'fr': 'fra',
4656         'fy': 'fry',
4657         'ga': 'gle',
4658         'gd': 'gla',
4659         'gl': 'glg',
4660         'gn': 'grn',
4661         'gu': 'guj',
4662         'gv': 'glv',
4663         'ha': 'hau',
4664         'he': 'heb',
4665         'iw': 'heb',  # Replaced by he in 1989 revision
4666         'hi': 'hin',
4667         'ho': 'hmo',
4668         'hr': 'hrv',
4669         'ht': 'hat',
4670         'hu': 'hun',
4671         'hy': 'hye',
4672         'hz': 'her',
4673         'ia': 'ina',
4674         'id': 'ind',
4675         'in': 'ind',  # Replaced by id in 1989 revision
4676         'ie': 'ile',
4677         'ig': 'ibo',
4678         'ii': 'iii',
4679         'ik': 'ipk',
4680         'io': 'ido',
4681         'is': 'isl',
4682         'it': 'ita',
4683         'iu': 'iku',
4684         'ja': 'jpn',
4685         'jv': 'jav',
4686         'ka': 'kat',
4687         'kg': 'kon',
4688         'ki': 'kik',
4689         'kj': 'kua',
4690         'kk': 'kaz',
4691         'kl': 'kal',
4692         'km': 'khm',
4693         'kn': 'kan',
4694         'ko': 'kor',
4695         'kr': 'kau',
4696         'ks': 'kas',
4697         'ku': 'kur',
4698         'kv': 'kom',
4699         'kw': 'cor',
4700         'ky': 'kir',
4701         'la': 'lat',
4702         'lb': 'ltz',
4703         'lg': 'lug',
4704         'li': 'lim',
4705         'ln': 'lin',
4706         'lo': 'lao',
4707         'lt': 'lit',
4708         'lu': 'lub',
4709         'lv': 'lav',
4710         'mg': 'mlg',
4711         'mh': 'mah',
4712         'mi': 'mri',
4713         'mk': 'mkd',
4714         'ml': 'mal',
4715         'mn': 'mon',
4716         'mr': 'mar',
4717         'ms': 'msa',
4718         'mt': 'mlt',
4719         'my': 'mya',
4720         'na': 'nau',
4721         'nb': 'nob',
4722         'nd': 'nde',
4723         'ne': 'nep',
4724         'ng': 'ndo',
4725         'nl': 'nld',
4726         'nn': 'nno',
4727         'no': 'nor',
4728         'nr': 'nbl',
4729         'nv': 'nav',
4730         'ny': 'nya',
4731         'oc': 'oci',
4732         'oj': 'oji',
4733         'om': 'orm',
4734         'or': 'ori',
4735         'os': 'oss',
4736         'pa': 'pan',
4737         'pi': 'pli',
4738         'pl': 'pol',
4739         'ps': 'pus',
4740         'pt': 'por',
4741         'qu': 'que',
4742         'rm': 'roh',
4743         'rn': 'run',
4744         'ro': 'ron',
4745         'ru': 'rus',
4746         'rw': 'kin',
4747         'sa': 'san',
4748         'sc': 'srd',
4749         'sd': 'snd',
4750         'se': 'sme',
4751         'sg': 'sag',
4752         'si': 'sin',
4753         'sk': 'slk',
4754         'sl': 'slv',
4755         'sm': 'smo',
4756         'sn': 'sna',
4757         'so': 'som',
4758         'sq': 'sqi',
4759         'sr': 'srp',
4760         'ss': 'ssw',
4761         'st': 'sot',
4762         'su': 'sun',
4763         'sv': 'swe',
4764         'sw': 'swa',
4765         'ta': 'tam',
4766         'te': 'tel',
4767         'tg': 'tgk',
4768         'th': 'tha',
4769         'ti': 'tir',
4770         'tk': 'tuk',
4771         'tl': 'tgl',
4772         'tn': 'tsn',
4773         'to': 'ton',
4774         'tr': 'tur',
4775         'ts': 'tso',
4776         'tt': 'tat',
4777         'tw': 'twi',
4778         'ty': 'tah',
4779         'ug': 'uig',
4780         'uk': 'ukr',
4781         'ur': 'urd',
4782         'uz': 'uzb',
4783         've': 'ven',
4784         'vi': 'vie',
4785         'vo': 'vol',
4786         'wa': 'wln',
4787         'wo': 'wol',
4788         'xh': 'xho',
4789         'yi': 'yid',
4790         'ji': 'yid',  # Replaced by yi in 1989 revision
4791         'yo': 'yor',
4792         'za': 'zha',
4793         'zh': 'zho',
4794         'zu': 'zul',
4795     }
4796
4797     @classmethod
4798     def short2long(cls, code):
4799         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4800         return cls._lang_map.get(code[:2])
4801
4802     @classmethod
4803     def long2short(cls, code):
4804         """Convert language code from ISO 639-2/T to ISO 639-1"""
4805         for short_name, long_name in cls._lang_map.items():
4806             if long_name == code:
4807                 return short_name
4808
4809
4810 class ISO3166Utils(object):
4811     # From http://data.okfn.org/data/core/country-list
4812     _country_map = {
4813         'AF': 'Afghanistan',
4814         'AX': 'Åland Islands',
4815         'AL': 'Albania',
4816         'DZ': 'Algeria',
4817         'AS': 'American Samoa',
4818         'AD': 'Andorra',
4819         'AO': 'Angola',
4820         'AI': 'Anguilla',
4821         'AQ': 'Antarctica',
4822         'AG': 'Antigua and Barbuda',
4823         'AR': 'Argentina',
4824         'AM': 'Armenia',
4825         'AW': 'Aruba',
4826         'AU': 'Australia',
4827         'AT': 'Austria',
4828         'AZ': 'Azerbaijan',
4829         'BS': 'Bahamas',
4830         'BH': 'Bahrain',
4831         'BD': 'Bangladesh',
4832         'BB': 'Barbados',
4833         'BY': 'Belarus',
4834         'BE': 'Belgium',
4835         'BZ': 'Belize',
4836         'BJ': 'Benin',
4837         'BM': 'Bermuda',
4838         'BT': 'Bhutan',
4839         'BO': 'Bolivia, Plurinational State of',
4840         'BQ': 'Bonaire, Sint Eustatius and Saba',
4841         'BA': 'Bosnia and Herzegovina',
4842         'BW': 'Botswana',
4843         'BV': 'Bouvet Island',
4844         'BR': 'Brazil',
4845         'IO': 'British Indian Ocean Territory',
4846         'BN': 'Brunei Darussalam',
4847         'BG': 'Bulgaria',
4848         'BF': 'Burkina Faso',
4849         'BI': 'Burundi',
4850         'KH': 'Cambodia',
4851         'CM': 'Cameroon',
4852         'CA': 'Canada',
4853         'CV': 'Cape Verde',
4854         'KY': 'Cayman Islands',
4855         'CF': 'Central African Republic',
4856         'TD': 'Chad',
4857         'CL': 'Chile',
4858         'CN': 'China',
4859         'CX': 'Christmas Island',
4860         'CC': 'Cocos (Keeling) Islands',
4861         'CO': 'Colombia',
4862         'KM': 'Comoros',
4863         'CG': 'Congo',
4864         'CD': 'Congo, the Democratic Republic of the',
4865         'CK': 'Cook Islands',
4866         'CR': 'Costa Rica',
4867         'CI': 'Côte d\'Ivoire',
4868         'HR': 'Croatia',
4869         'CU': 'Cuba',
4870         'CW': 'Curaçao',
4871         'CY': 'Cyprus',
4872         'CZ': 'Czech Republic',
4873         'DK': 'Denmark',
4874         'DJ': 'Djibouti',
4875         'DM': 'Dominica',
4876         'DO': 'Dominican Republic',
4877         'EC': 'Ecuador',
4878         'EG': 'Egypt',
4879         'SV': 'El Salvador',
4880         'GQ': 'Equatorial Guinea',
4881         'ER': 'Eritrea',
4882         'EE': 'Estonia',
4883         'ET': 'Ethiopia',
4884         'FK': 'Falkland Islands (Malvinas)',
4885         'FO': 'Faroe Islands',
4886         'FJ': 'Fiji',
4887         'FI': 'Finland',
4888         'FR': 'France',
4889         'GF': 'French Guiana',
4890         'PF': 'French Polynesia',
4891         'TF': 'French Southern Territories',
4892         'GA': 'Gabon',
4893         'GM': 'Gambia',
4894         'GE': 'Georgia',
4895         'DE': 'Germany',
4896         'GH': 'Ghana',
4897         'GI': 'Gibraltar',
4898         'GR': 'Greece',
4899         'GL': 'Greenland',
4900         'GD': 'Grenada',
4901         'GP': 'Guadeloupe',
4902         'GU': 'Guam',
4903         'GT': 'Guatemala',
4904         'GG': 'Guernsey',
4905         'GN': 'Guinea',
4906         'GW': 'Guinea-Bissau',
4907         'GY': 'Guyana',
4908         'HT': 'Haiti',
4909         'HM': 'Heard Island and McDonald Islands',
4910         'VA': 'Holy See (Vatican City State)',
4911         'HN': 'Honduras',
4912         'HK': 'Hong Kong',
4913         'HU': 'Hungary',
4914         'IS': 'Iceland',
4915         'IN': 'India',
4916         'ID': 'Indonesia',
4917         'IR': 'Iran, Islamic Republic of',
4918         'IQ': 'Iraq',
4919         'IE': 'Ireland',
4920         'IM': 'Isle of Man',
4921         'IL': 'Israel',
4922         'IT': 'Italy',
4923         'JM': 'Jamaica',
4924         'JP': 'Japan',
4925         'JE': 'Jersey',
4926         'JO': 'Jordan',
4927         'KZ': 'Kazakhstan',
4928         'KE': 'Kenya',
4929         'KI': 'Kiribati',
4930         'KP': 'Korea, Democratic People\'s Republic of',
4931         'KR': 'Korea, Republic of',
4932         'KW': 'Kuwait',
4933         'KG': 'Kyrgyzstan',
4934         'LA': 'Lao People\'s Democratic Republic',
4935         'LV': 'Latvia',
4936         'LB': 'Lebanon',
4937         'LS': 'Lesotho',
4938         'LR': 'Liberia',
4939         'LY': 'Libya',
4940         'LI': 'Liechtenstein',
4941         'LT': 'Lithuania',
4942         'LU': 'Luxembourg',
4943         'MO': 'Macao',
4944         'MK': 'Macedonia, the Former Yugoslav Republic of',
4945         'MG': 'Madagascar',
4946         'MW': 'Malawi',
4947         'MY': 'Malaysia',
4948         'MV': 'Maldives',
4949         'ML': 'Mali',
4950         'MT': 'Malta',
4951         'MH': 'Marshall Islands',
4952         'MQ': 'Martinique',
4953         'MR': 'Mauritania',
4954         'MU': 'Mauritius',
4955         'YT': 'Mayotte',
4956         'MX': 'Mexico',
4957         'FM': 'Micronesia, Federated States of',
4958         'MD': 'Moldova, Republic of',
4959         'MC': 'Monaco',
4960         'MN': 'Mongolia',
4961         'ME': 'Montenegro',
4962         'MS': 'Montserrat',
4963         'MA': 'Morocco',
4964         'MZ': 'Mozambique',
4965         'MM': 'Myanmar',
4966         'NA': 'Namibia',
4967         'NR': 'Nauru',
4968         'NP': 'Nepal',
4969         'NL': 'Netherlands',
4970         'NC': 'New Caledonia',
4971         'NZ': 'New Zealand',
4972         'NI': 'Nicaragua',
4973         'NE': 'Niger',
4974         'NG': 'Nigeria',
4975         'NU': 'Niue',
4976         'NF': 'Norfolk Island',
4977         'MP': 'Northern Mariana Islands',
4978         'NO': 'Norway',
4979         'OM': 'Oman',
4980         'PK': 'Pakistan',
4981         'PW': 'Palau',
4982         'PS': 'Palestine, State of',
4983         'PA': 'Panama',
4984         'PG': 'Papua New Guinea',
4985         'PY': 'Paraguay',
4986         'PE': 'Peru',
4987         'PH': 'Philippines',
4988         'PN': 'Pitcairn',
4989         'PL': 'Poland',
4990         'PT': 'Portugal',
4991         'PR': 'Puerto Rico',
4992         'QA': 'Qatar',
4993         'RE': 'Réunion',
4994         'RO': 'Romania',
4995         'RU': 'Russian Federation',
4996         'RW': 'Rwanda',
4997         'BL': 'Saint Barthélemy',
4998         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4999         'KN': 'Saint Kitts and Nevis',
5000         'LC': 'Saint Lucia',
5001         'MF': 'Saint Martin (French part)',
5002         'PM': 'Saint Pierre and Miquelon',
5003         'VC': 'Saint Vincent and the Grenadines',
5004         'WS': 'Samoa',
5005         'SM': 'San Marino',
5006         'ST': 'Sao Tome and Principe',
5007         'SA': 'Saudi Arabia',
5008         'SN': 'Senegal',
5009         'RS': 'Serbia',
5010         'SC': 'Seychelles',
5011         'SL': 'Sierra Leone',
5012         'SG': 'Singapore',
5013         'SX': 'Sint Maarten (Dutch part)',
5014         'SK': 'Slovakia',
5015         'SI': 'Slovenia',
5016         'SB': 'Solomon Islands',
5017         'SO': 'Somalia',
5018         'ZA': 'South Africa',
5019         'GS': 'South Georgia and the South Sandwich Islands',
5020         'SS': 'South Sudan',
5021         'ES': 'Spain',
5022         'LK': 'Sri Lanka',
5023         'SD': 'Sudan',
5024         'SR': 'Suriname',
5025         'SJ': 'Svalbard and Jan Mayen',
5026         'SZ': 'Swaziland',
5027         'SE': 'Sweden',
5028         'CH': 'Switzerland',
5029         'SY': 'Syrian Arab Republic',
5030         'TW': 'Taiwan, Province of China',
5031         'TJ': 'Tajikistan',
5032         'TZ': 'Tanzania, United Republic of',
5033         'TH': 'Thailand',
5034         'TL': 'Timor-Leste',
5035         'TG': 'Togo',
5036         'TK': 'Tokelau',
5037         'TO': 'Tonga',
5038         'TT': 'Trinidad and Tobago',
5039         'TN': 'Tunisia',
5040         'TR': 'Turkey',
5041         'TM': 'Turkmenistan',
5042         'TC': 'Turks and Caicos Islands',
5043         'TV': 'Tuvalu',
5044         'UG': 'Uganda',
5045         'UA': 'Ukraine',
5046         'AE': 'United Arab Emirates',
5047         'GB': 'United Kingdom',
5048         'US': 'United States',
5049         'UM': 'United States Minor Outlying Islands',
5050         'UY': 'Uruguay',
5051         'UZ': 'Uzbekistan',
5052         'VU': 'Vanuatu',
5053         'VE': 'Venezuela, Bolivarian Republic of',
5054         'VN': 'Viet Nam',
5055         'VG': 'Virgin Islands, British',
5056         'VI': 'Virgin Islands, U.S.',
5057         'WF': 'Wallis and Futuna',
5058         'EH': 'Western Sahara',
5059         'YE': 'Yemen',
5060         'ZM': 'Zambia',
5061         'ZW': 'Zimbabwe',
5062     }
5063
5064     @classmethod
5065     def short2full(cls, code):
5066         """Convert an ISO 3166-2 country code to the corresponding full name"""
5067         return cls._country_map.get(code.upper())
5068
5069
5070 class GeoUtils(object):
5071     # Major IPv4 address blocks per country
5072     _country_ip_map = {
5073         'AD': '46.172.224.0/19',
5074         'AE': '94.200.0.0/13',
5075         'AF': '149.54.0.0/17',
5076         'AG': '209.59.64.0/18',
5077         'AI': '204.14.248.0/21',
5078         'AL': '46.99.0.0/16',
5079         'AM': '46.70.0.0/15',
5080         'AO': '105.168.0.0/13',
5081         'AP': '182.50.184.0/21',
5082         'AQ': '23.154.160.0/24',
5083         'AR': '181.0.0.0/12',
5084         'AS': '202.70.112.0/20',
5085         'AT': '77.116.0.0/14',
5086         'AU': '1.128.0.0/11',
5087         'AW': '181.41.0.0/18',
5088         'AX': '185.217.4.0/22',
5089         'AZ': '5.197.0.0/16',
5090         'BA': '31.176.128.0/17',
5091         'BB': '65.48.128.0/17',
5092         'BD': '114.130.0.0/16',
5093         'BE': '57.0.0.0/8',
5094         'BF': '102.178.0.0/15',
5095         'BG': '95.42.0.0/15',
5096         'BH': '37.131.0.0/17',
5097         'BI': '154.117.192.0/18',
5098         'BJ': '137.255.0.0/16',
5099         'BL': '185.212.72.0/23',
5100         'BM': '196.12.64.0/18',
5101         'BN': '156.31.0.0/16',
5102         'BO': '161.56.0.0/16',
5103         'BQ': '161.0.80.0/20',
5104         'BR': '191.128.0.0/12',
5105         'BS': '24.51.64.0/18',
5106         'BT': '119.2.96.0/19',
5107         'BW': '168.167.0.0/16',
5108         'BY': '178.120.0.0/13',
5109         'BZ': '179.42.192.0/18',
5110         'CA': '99.224.0.0/11',
5111         'CD': '41.243.0.0/16',
5112         'CF': '197.242.176.0/21',
5113         'CG': '160.113.0.0/16',
5114         'CH': '85.0.0.0/13',
5115         'CI': '102.136.0.0/14',
5116         'CK': '202.65.32.0/19',
5117         'CL': '152.172.0.0/14',
5118         'CM': '102.244.0.0/14',
5119         'CN': '36.128.0.0/10',
5120         'CO': '181.240.0.0/12',
5121         'CR': '201.192.0.0/12',
5122         'CU': '152.206.0.0/15',
5123         'CV': '165.90.96.0/19',
5124         'CW': '190.88.128.0/17',
5125         'CY': '31.153.0.0/16',
5126         'CZ': '88.100.0.0/14',
5127         'DE': '53.0.0.0/8',
5128         'DJ': '197.241.0.0/17',
5129         'DK': '87.48.0.0/12',
5130         'DM': '192.243.48.0/20',
5131         'DO': '152.166.0.0/15',
5132         'DZ': '41.96.0.0/12',
5133         'EC': '186.68.0.0/15',
5134         'EE': '90.190.0.0/15',
5135         'EG': '156.160.0.0/11',
5136         'ER': '196.200.96.0/20',
5137         'ES': '88.0.0.0/11',
5138         'ET': '196.188.0.0/14',
5139         'EU': '2.16.0.0/13',
5140         'FI': '91.152.0.0/13',
5141         'FJ': '144.120.0.0/16',
5142         'FK': '80.73.208.0/21',
5143         'FM': '119.252.112.0/20',
5144         'FO': '88.85.32.0/19',
5145         'FR': '90.0.0.0/9',
5146         'GA': '41.158.0.0/15',
5147         'GB': '25.0.0.0/8',
5148         'GD': '74.122.88.0/21',
5149         'GE': '31.146.0.0/16',
5150         'GF': '161.22.64.0/18',
5151         'GG': '62.68.160.0/19',
5152         'GH': '154.160.0.0/12',
5153         'GI': '95.164.0.0/16',
5154         'GL': '88.83.0.0/19',
5155         'GM': '160.182.0.0/15',
5156         'GN': '197.149.192.0/18',
5157         'GP': '104.250.0.0/19',
5158         'GQ': '105.235.224.0/20',
5159         'GR': '94.64.0.0/13',
5160         'GT': '168.234.0.0/16',
5161         'GU': '168.123.0.0/16',
5162         'GW': '197.214.80.0/20',
5163         'GY': '181.41.64.0/18',
5164         'HK': '113.252.0.0/14',
5165         'HN': '181.210.0.0/16',
5166         'HR': '93.136.0.0/13',
5167         'HT': '148.102.128.0/17',
5168         'HU': '84.0.0.0/14',
5169         'ID': '39.192.0.0/10',
5170         'IE': '87.32.0.0/12',
5171         'IL': '79.176.0.0/13',
5172         'IM': '5.62.80.0/20',
5173         'IN': '117.192.0.0/10',
5174         'IO': '203.83.48.0/21',
5175         'IQ': '37.236.0.0/14',
5176         'IR': '2.176.0.0/12',
5177         'IS': '82.221.0.0/16',
5178         'IT': '79.0.0.0/10',
5179         'JE': '87.244.64.0/18',
5180         'JM': '72.27.0.0/17',
5181         'JO': '176.29.0.0/16',
5182         'JP': '133.0.0.0/8',
5183         'KE': '105.48.0.0/12',
5184         'KG': '158.181.128.0/17',
5185         'KH': '36.37.128.0/17',
5186         'KI': '103.25.140.0/22',
5187         'KM': '197.255.224.0/20',
5188         'KN': '198.167.192.0/19',
5189         'KP': '175.45.176.0/22',
5190         'KR': '175.192.0.0/10',
5191         'KW': '37.36.0.0/14',
5192         'KY': '64.96.0.0/15',
5193         'KZ': '2.72.0.0/13',
5194         'LA': '115.84.64.0/18',
5195         'LB': '178.135.0.0/16',
5196         'LC': '24.92.144.0/20',
5197         'LI': '82.117.0.0/19',
5198         'LK': '112.134.0.0/15',
5199         'LR': '102.183.0.0/16',
5200         'LS': '129.232.0.0/17',
5201         'LT': '78.56.0.0/13',
5202         'LU': '188.42.0.0/16',
5203         'LV': '46.109.0.0/16',
5204         'LY': '41.252.0.0/14',
5205         'MA': '105.128.0.0/11',
5206         'MC': '88.209.64.0/18',
5207         'MD': '37.246.0.0/16',
5208         'ME': '178.175.0.0/17',
5209         'MF': '74.112.232.0/21',
5210         'MG': '154.126.0.0/17',
5211         'MH': '117.103.88.0/21',
5212         'MK': '77.28.0.0/15',
5213         'ML': '154.118.128.0/18',
5214         'MM': '37.111.0.0/17',
5215         'MN': '49.0.128.0/17',
5216         'MO': '60.246.0.0/16',
5217         'MP': '202.88.64.0/20',
5218         'MQ': '109.203.224.0/19',
5219         'MR': '41.188.64.0/18',
5220         'MS': '208.90.112.0/22',
5221         'MT': '46.11.0.0/16',
5222         'MU': '105.16.0.0/12',
5223         'MV': '27.114.128.0/18',
5224         'MW': '102.70.0.0/15',
5225         'MX': '187.192.0.0/11',
5226         'MY': '175.136.0.0/13',
5227         'MZ': '197.218.0.0/15',
5228         'NA': '41.182.0.0/16',
5229         'NC': '101.101.0.0/18',
5230         'NE': '197.214.0.0/18',
5231         'NF': '203.17.240.0/22',
5232         'NG': '105.112.0.0/12',
5233         'NI': '186.76.0.0/15',
5234         'NL': '145.96.0.0/11',
5235         'NO': '84.208.0.0/13',
5236         'NP': '36.252.0.0/15',
5237         'NR': '203.98.224.0/19',
5238         'NU': '49.156.48.0/22',
5239         'NZ': '49.224.0.0/14',
5240         'OM': '5.36.0.0/15',
5241         'PA': '186.72.0.0/15',
5242         'PE': '186.160.0.0/14',
5243         'PF': '123.50.64.0/18',
5244         'PG': '124.240.192.0/19',
5245         'PH': '49.144.0.0/13',
5246         'PK': '39.32.0.0/11',
5247         'PL': '83.0.0.0/11',
5248         'PM': '70.36.0.0/20',
5249         'PR': '66.50.0.0/16',
5250         'PS': '188.161.0.0/16',
5251         'PT': '85.240.0.0/13',
5252         'PW': '202.124.224.0/20',
5253         'PY': '181.120.0.0/14',
5254         'QA': '37.210.0.0/15',
5255         'RE': '102.35.0.0/16',
5256         'RO': '79.112.0.0/13',
5257         'RS': '93.86.0.0/15',
5258         'RU': '5.136.0.0/13',
5259         'RW': '41.186.0.0/16',
5260         'SA': '188.48.0.0/13',
5261         'SB': '202.1.160.0/19',
5262         'SC': '154.192.0.0/11',
5263         'SD': '102.120.0.0/13',
5264         'SE': '78.64.0.0/12',
5265         'SG': '8.128.0.0/10',
5266         'SI': '188.196.0.0/14',
5267         'SK': '78.98.0.0/15',
5268         'SL': '102.143.0.0/17',
5269         'SM': '89.186.32.0/19',
5270         'SN': '41.82.0.0/15',
5271         'SO': '154.115.192.0/18',
5272         'SR': '186.179.128.0/17',
5273         'SS': '105.235.208.0/21',
5274         'ST': '197.159.160.0/19',
5275         'SV': '168.243.0.0/16',
5276         'SX': '190.102.0.0/20',
5277         'SY': '5.0.0.0/16',
5278         'SZ': '41.84.224.0/19',
5279         'TC': '65.255.48.0/20',
5280         'TD': '154.68.128.0/19',
5281         'TG': '196.168.0.0/14',
5282         'TH': '171.96.0.0/13',
5283         'TJ': '85.9.128.0/18',
5284         'TK': '27.96.24.0/21',
5285         'TL': '180.189.160.0/20',
5286         'TM': '95.85.96.0/19',
5287         'TN': '197.0.0.0/11',
5288         'TO': '175.176.144.0/21',
5289         'TR': '78.160.0.0/11',
5290         'TT': '186.44.0.0/15',
5291         'TV': '202.2.96.0/19',
5292         'TW': '120.96.0.0/11',
5293         'TZ': '156.156.0.0/14',
5294         'UA': '37.52.0.0/14',
5295         'UG': '102.80.0.0/13',
5296         'US': '6.0.0.0/8',
5297         'UY': '167.56.0.0/13',
5298         'UZ': '84.54.64.0/18',
5299         'VA': '212.77.0.0/19',
5300         'VC': '207.191.240.0/21',
5301         'VE': '186.88.0.0/13',
5302         'VG': '66.81.192.0/20',
5303         'VI': '146.226.0.0/16',
5304         'VN': '14.160.0.0/11',
5305         'VU': '202.80.32.0/20',
5306         'WF': '117.20.32.0/21',
5307         'WS': '202.4.32.0/19',
5308         'YE': '134.35.0.0/16',
5309         'YT': '41.242.116.0/22',
5310         'ZA': '41.0.0.0/11',
5311         'ZM': '102.144.0.0/13',
5312         'ZW': '102.177.192.0/18',
5313     }
5314
5315     @classmethod
5316     def random_ipv4(cls, code_or_block):
5317         if len(code_or_block) == 2:
5318             block = cls._country_ip_map.get(code_or_block.upper())
5319             if not block:
5320                 return None
5321         else:
5322             block = code_or_block
5323         addr, preflen = block.split('/')
5324         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5325         addr_max = addr_min | (0xffffffff >> int(preflen))
5326         return compat_str(socket.inet_ntoa(
5327             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5328
5329
5330 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5331     def __init__(self, proxies=None):
5332         # Set default handlers
5333         for type in ('http', 'https'):
5334             setattr(self, '%s_open' % type,
5335                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5336                         meth(r, proxy, type))
5337         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5338
5339     def proxy_open(self, req, proxy, type):
5340         req_proxy = req.headers.get('Ytdl-request-proxy')
5341         if req_proxy is not None:
5342             proxy = req_proxy
5343             del req.headers['Ytdl-request-proxy']
5344
5345         if proxy == '__noproxy__':
5346             return None  # No Proxy
5347         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5348             req.add_header('Ytdl-socks-proxy', proxy)
5349             # youtube-dl's http/https handlers do wrapping the socket with socks
5350             return None
5351         return compat_urllib_request.ProxyHandler.proxy_open(
5352             self, req, proxy, type)
5353
5354
5355 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5356 # released into Public Domain
5357 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5358
5359 def long_to_bytes(n, blocksize=0):
5360     """long_to_bytes(n:long, blocksize:int) : string
5361     Convert a long integer to a byte string.
5362
5363     If optional blocksize is given and greater than zero, pad the front of the
5364     byte string with binary zeros so that the length is a multiple of
5365     blocksize.
5366     """
5367     # after much testing, this algorithm was deemed to be the fastest
5368     s = b''
5369     n = int(n)
5370     while n > 0:
5371         s = compat_struct_pack('>I', n & 0xffffffff) + s
5372         n = n >> 32
5373     # strip off leading zeros
5374     for i in range(len(s)):
5375         if s[i] != b'\000'[0]:
5376             break
5377     else:
5378         # only happens when n == 0
5379         s = b'\000'
5380         i = 0
5381     s = s[i:]
5382     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5383     # de-padding being done above, but sigh...
5384     if blocksize > 0 and len(s) % blocksize:
5385         s = (blocksize - len(s) % blocksize) * b'\000' + s
5386     return s
5387
5388
5389 def bytes_to_long(s):
5390     """bytes_to_long(string) : long
5391     Convert a byte string to a long integer.
5392
5393     This is (essentially) the inverse of long_to_bytes().
5394     """
5395     acc = 0
5396     length = len(s)
5397     if length % 4:
5398         extra = (4 - length % 4)
5399         s = b'\000' * extra + s
5400         length = length + extra
5401     for i in range(0, length, 4):
5402         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5403     return acc
5404
5405
5406 def ohdave_rsa_encrypt(data, exponent, modulus):
5407     '''
5408     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5409
5410     Input:
5411         data: data to encrypt, bytes-like object
5412         exponent, modulus: parameter e and N of RSA algorithm, both integer
5413     Output: hex string of encrypted data
5414
5415     Limitation: supports one block encryption only
5416     '''
5417
5418     payload = int(binascii.hexlify(data[::-1]), 16)
5419     encrypted = pow(payload, exponent, modulus)
5420     return '%x' % encrypted
5421
5422
5423 def pkcs1pad(data, length):
5424     """
5425     Padding input data with PKCS#1 scheme
5426
5427     @param {int[]} data        input data
5428     @param {int}   length      target length
5429     @returns {int[]}           padded data
5430     """
5431     if len(data) > length - 11:
5432         raise ValueError('Input data too long for PKCS#1 padding')
5433
5434     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5435     return [0, 2] + pseudo_random + [0] + data
5436
5437
5438 def encode_base_n(num, n, table=None):
5439     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5440     if not table:
5441         table = FULL_TABLE[:n]
5442
5443     if n > len(table):
5444         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5445
5446     if num == 0:
5447         return table[0]
5448
5449     ret = ''
5450     while num:
5451         ret = table[num % n] + ret
5452         num = num // n
5453     return ret
5454
5455
5456 def decode_packed_codes(code):
5457     mobj = re.search(PACKED_CODES_RE, code)
5458     obfucasted_code, base, count, symbols = mobj.groups()
5459     base = int(base)
5460     count = int(count)
5461     symbols = symbols.split('|')
5462     symbol_table = {}
5463
5464     while count:
5465         count -= 1
5466         base_n_count = encode_base_n(count, base)
5467         symbol_table[base_n_count] = symbols[count] or base_n_count
5468
5469     return re.sub(
5470         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5471         obfucasted_code)
5472
5473
5474 def caesar(s, alphabet, shift):
5475     if shift == 0:
5476         return s
5477     l = len(alphabet)
5478     return ''.join(
5479         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5480         for c in s)
5481
5482
5483 def rot47(s):
5484     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5485
5486
5487 def parse_m3u8_attributes(attrib):
5488     info = {}
5489     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5490         if val.startswith('"'):
5491             val = val[1:-1]
5492         info[key] = val
5493     return info
5494
5495
5496 def urshift(val, n):
5497     return val >> n if val >= 0 else (val + 0x100000000) >> n
5498
5499
5500 # Based on png2str() written by @gdkchan and improved by @yokrysty
5501 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5502 def decode_png(png_data):
5503     # Reference: https://www.w3.org/TR/PNG/
5504     header = png_data[8:]
5505
5506     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5507         raise IOError('Not a valid PNG file.')
5508
5509     int_map = {1: '>B', 2: '>H', 4: '>I'}
5510     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5511
5512     chunks = []
5513
5514     while header:
5515         length = unpack_integer(header[:4])
5516         header = header[4:]
5517
5518         chunk_type = header[:4]
5519         header = header[4:]
5520
5521         chunk_data = header[:length]
5522         header = header[length:]
5523
5524         header = header[4:]  # Skip CRC
5525
5526         chunks.append({
5527             'type': chunk_type,
5528             'length': length,
5529             'data': chunk_data
5530         })
5531
5532     ihdr = chunks[0]['data']
5533
5534     width = unpack_integer(ihdr[:4])
5535     height = unpack_integer(ihdr[4:8])
5536
5537     idat = b''
5538
5539     for chunk in chunks:
5540         if chunk['type'] == b'IDAT':
5541             idat += chunk['data']
5542
5543     if not idat:
5544         raise IOError('Unable to read PNG data.')
5545
5546     decompressed_data = bytearray(zlib.decompress(idat))
5547
5548     stride = width * 3
5549     pixels = []
5550
5551     def _get_pixel(idx):
5552         x = idx % stride
5553         y = idx // stride
5554         return pixels[y][x]
5555
5556     for y in range(height):
5557         basePos = y * (1 + stride)
5558         filter_type = decompressed_data[basePos]
5559
5560         current_row = []
5561
5562         pixels.append(current_row)
5563
5564         for x in range(stride):
5565             color = decompressed_data[1 + basePos + x]
5566             basex = y * stride + x
5567             left = 0
5568             up = 0
5569
5570             if x > 2:
5571                 left = _get_pixel(basex - 3)
5572             if y > 0:
5573                 up = _get_pixel(basex - stride)
5574
5575             if filter_type == 1:  # Sub
5576                 color = (color + left) & 0xff
5577             elif filter_type == 2:  # Up
5578                 color = (color + up) & 0xff
5579             elif filter_type == 3:  # Average
5580                 color = (color + ((left + up) >> 1)) & 0xff
5581             elif filter_type == 4:  # Paeth
5582                 a = left
5583                 b = up
5584                 c = 0
5585
5586                 if x > 2 and y > 0:
5587                     c = _get_pixel(basex - stride - 3)
5588
5589                 p = a + b - c
5590
5591                 pa = abs(p - a)
5592                 pb = abs(p - b)
5593                 pc = abs(p - c)
5594
5595                 if pa <= pb and pa <= pc:
5596                     color = (color + a) & 0xff
5597                 elif pb <= pc:
5598                     color = (color + b) & 0xff
5599                 else:
5600                     color = (color + c) & 0xff
5601
5602             current_row.append(color)
5603
5604     return width, height, pixels
5605
5606
5607 def write_xattr(path, key, value):
5608     # This mess below finds the best xattr tool for the job
5609     try:
5610         # try the pyxattr module...
5611         import xattr
5612
5613         if hasattr(xattr, 'set'):  # pyxattr
5614             # Unicode arguments are not supported in python-pyxattr until
5615             # version 0.5.0
5616             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5617             pyxattr_required_version = '0.5.0'
5618             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5619                 # TODO: fallback to CLI tools
5620                 raise XAttrUnavailableError(
5621                     'python-pyxattr is detected but is too old. '
5622                     'youtube-dl requires %s or above while your version is %s. '
5623                     'Falling back to other xattr implementations' % (
5624                         pyxattr_required_version, xattr.__version__))
5625
5626             setxattr = xattr.set
5627         else:  # xattr
5628             setxattr = xattr.setxattr
5629
5630         try:
5631             setxattr(path, key, value)
5632         except EnvironmentError as e:
5633             raise XAttrMetadataError(e.errno, e.strerror)
5634
5635     except ImportError:
5636         if compat_os_name == 'nt':
5637             # Write xattrs to NTFS Alternate Data Streams:
5638             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5639             assert ':' not in key
5640             assert os.path.exists(path)
5641
5642             ads_fn = path + ':' + key
5643             try:
5644                 with open(ads_fn, 'wb') as f:
5645                     f.write(value)
5646             except EnvironmentError as e:
5647                 raise XAttrMetadataError(e.errno, e.strerror)
5648         else:
5649             user_has_setfattr = check_executable('setfattr', ['--version'])
5650             user_has_xattr = check_executable('xattr', ['-h'])
5651
5652             if user_has_setfattr or user_has_xattr:
5653
5654                 value = value.decode('utf-8')
5655                 if user_has_setfattr:
5656                     executable = 'setfattr'
5657                     opts = ['-n', key, '-v', value]
5658                 elif user_has_xattr:
5659                     executable = 'xattr'
5660                     opts = ['-w', key, value]
5661
5662                 cmd = ([encodeFilename(executable, True)]
5663                        + [encodeArgument(o) for o in opts]
5664                        + [encodeFilename(path, True)])
5665
5666                 try:
5667                     p = subprocess.Popen(
5668                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5669                 except EnvironmentError as e:
5670                     raise XAttrMetadataError(e.errno, e.strerror)
5671                 stdout, stderr = p.communicate()
5672                 stderr = stderr.decode('utf-8', 'replace')
5673                 if p.returncode != 0:
5674                     raise XAttrMetadataError(p.returncode, stderr)
5675
5676             else:
5677                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5678                 if sys.platform.startswith('linux'):
5679                     raise XAttrUnavailableError(
5680                         "Couldn't find a tool to set the xattrs. "
5681                         "Install either the python 'pyxattr' or 'xattr' "
5682                         "modules, or the GNU 'attr' package "
5683                         "(which contains the 'setfattr' tool).")
5684                 else:
5685                     raise XAttrUnavailableError(
5686                         "Couldn't find a tool to set the xattrs. "
5687                         "Install either the python 'xattr' module, "
5688                         "or the 'xattr' binary.")
5689
5690
5691 def random_birthday(year_field, month_field, day_field):
5692     start_date = datetime.date(1950, 1, 1)
5693     end_date = datetime.date(1995, 12, 31)
5694     offset = random.randint(0, (end_date - start_date).days)
5695     random_date = start_date + datetime.timedelta(offset)
5696     return {
5697         year_field: str(random_date.year),
5698         month_field: str(random_date.month),
5699         day_field: str(random_date.day),
5700     }