Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import contextlib
  11 import ctypes
  12 import datetime
  13 import email.utils
  14 import email.header
  15 import errno
  16 import functools
  17 import gzip
  18 import io
  19 import itertools
  20 import json
  21 import locale
  22 import math
  23 import operator
  24 import os
  25 import platform
  26 import random
  27 import re
  28 import socket
  29 import ssl
  30 import subprocess
  31 import sys
  32 import tempfile
  33 import traceback
  34 import xml.etree.ElementTree
  35 import zlib
  36
  37 from .compat import (
  38     compat_HTMLParseError,
  39     compat_HTMLParser,
  40     compat_basestring,
  41     compat_chr,
  42     compat_cookiejar,
  43     compat_ctypes_WINFUNCTYPE,
  44     compat_etree_fromstring,
  45     compat_expanduser,
  46     compat_html_entities,
  47     compat_html_entities_html5,
  48     compat_http_client,
  49     compat_kwargs,
  50     compat_os_name,
  51     compat_parse_qs,
  52     compat_shlex_quote,
  53     compat_str,
  54     compat_struct_pack,
  55     compat_struct_unpack,
  56     compat_urllib_error,
  57     compat_urllib_parse,
  58     compat_urllib_parse_urlencode,
  59     compat_urllib_parse_urlparse,
  60     compat_urllib_parse_unquote_plus,
  61     compat_urllib_request,
  62     compat_urlparse,
  63     compat_xpath,
  64 )
  65
  66 from .socks import (
  67     ProxyType,
  68     sockssocket,
  69 )
  70
  71
  72 def register_socks_protocols():
  73     # "Register" SOCKS protocols
  74     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  75     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  76     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  77         if scheme not in compat_urlparse.uses_netloc:
  78             compat_urlparse.uses_netloc.append(scheme)
  79
  80
  81 # This is not clearly defined otherwise
  82 compiled_regex_type = type(re.compile(''))
  83
  84
  85 def random_user_agent():
  86     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  87     _CHROME_VERSIONS = (
  88         '74.0.3729.129',
  89         '76.0.3780.3',
  90         '76.0.3780.2',
  91         '74.0.3729.128',
  92         '76.0.3780.1',
  93         '76.0.3780.0',
  94         '75.0.3770.15',
  95         '74.0.3729.127',
  96         '74.0.3729.126',
  97         '76.0.3779.1',
  98         '76.0.3779.0',
  99         '75.0.3770.14',
 100         '74.0.3729.125',
 101         '76.0.3778.1',
 102         '76.0.3778.0',
 103         '75.0.3770.13',
 104         '74.0.3729.124',
 105         '74.0.3729.123',
 106         '73.0.3683.121',
 107         '76.0.3777.1',
 108         '76.0.3777.0',
 109         '75.0.3770.12',
 110         '74.0.3729.122',
 111         '76.0.3776.4',
 112         '75.0.3770.11',
 113         '74.0.3729.121',
 114         '76.0.3776.3',
 115         '76.0.3776.2',
 116         '73.0.3683.120',
 117         '74.0.3729.120',
 118         '74.0.3729.119',
 119         '74.0.3729.118',
 120         '76.0.3776.1',
 121         '76.0.3776.0',
 122         '76.0.3775.5',
 123         '75.0.3770.10',
 124         '74.0.3729.117',
 125         '76.0.3775.4',
 126         '76.0.3775.3',
 127         '74.0.3729.116',
 128         '75.0.3770.9',
 129         '76.0.3775.2',
 130         '76.0.3775.1',
 131         '76.0.3775.0',
 132         '75.0.3770.8',
 133         '74.0.3729.115',
 134         '74.0.3729.114',
 135         '76.0.3774.1',
 136         '76.0.3774.0',
 137         '75.0.3770.7',
 138         '74.0.3729.113',
 139         '74.0.3729.112',
 140         '74.0.3729.111',
 141         '76.0.3773.1',
 142         '76.0.3773.0',
 143         '75.0.3770.6',
 144         '74.0.3729.110',
 145         '74.0.3729.109',
 146         '76.0.3772.1',
 147         '76.0.3772.0',
 148         '75.0.3770.5',
 149         '74.0.3729.108',
 150         '74.0.3729.107',
 151         '76.0.3771.1',
 152         '76.0.3771.0',
 153         '75.0.3770.4',
 154         '74.0.3729.106',
 155         '74.0.3729.105',
 156         '75.0.3770.3',
 157         '74.0.3729.104',
 158         '74.0.3729.103',
 159         '74.0.3729.102',
 160         '75.0.3770.2',
 161         '74.0.3729.101',
 162         '75.0.3770.1',
 163         '75.0.3770.0',
 164         '74.0.3729.100',
 165         '75.0.3769.5',
 166         '75.0.3769.4',
 167         '74.0.3729.99',
 168         '75.0.3769.3',
 169         '75.0.3769.2',
 170         '75.0.3768.6',
 171         '74.0.3729.98',
 172         '75.0.3769.1',
 173         '75.0.3769.0',
 174         '74.0.3729.97',
 175         '73.0.3683.119',
 176         '73.0.3683.118',
 177         '74.0.3729.96',
 178         '75.0.3768.5',
 179         '75.0.3768.4',
 180         '75.0.3768.3',
 181         '75.0.3768.2',
 182         '74.0.3729.95',
 183         '74.0.3729.94',
 184         '75.0.3768.1',
 185         '75.0.3768.0',
 186         '74.0.3729.93',
 187         '74.0.3729.92',
 188         '73.0.3683.117',
 189         '74.0.3729.91',
 190         '75.0.3766.3',
 191         '74.0.3729.90',
 192         '75.0.3767.2',
 193         '75.0.3767.1',
 194         '75.0.3767.0',
 195         '74.0.3729.89',
 196         '73.0.3683.116',
 197         '75.0.3766.2',
 198         '74.0.3729.88',
 199         '75.0.3766.1',
 200         '75.0.3766.0',
 201         '74.0.3729.87',
 202         '73.0.3683.115',
 203         '74.0.3729.86',
 204         '75.0.3765.1',
 205         '75.0.3765.0',
 206         '74.0.3729.85',
 207         '73.0.3683.114',
 208         '74.0.3729.84',
 209         '75.0.3764.1',
 210         '75.0.3764.0',
 211         '74.0.3729.83',
 212         '73.0.3683.113',
 213         '75.0.3763.2',
 214         '75.0.3761.4',
 215         '74.0.3729.82',
 216         '75.0.3763.1',
 217         '75.0.3763.0',
 218         '74.0.3729.81',
 219         '73.0.3683.112',
 220         '75.0.3762.1',
 221         '75.0.3762.0',
 222         '74.0.3729.80',
 223         '75.0.3761.3',
 224         '74.0.3729.79',
 225         '73.0.3683.111',
 226         '75.0.3761.2',
 227         '74.0.3729.78',
 228         '74.0.3729.77',
 229         '75.0.3761.1',
 230         '75.0.3761.0',
 231         '73.0.3683.110',
 232         '74.0.3729.76',
 233         '74.0.3729.75',
 234         '75.0.3760.0',
 235         '74.0.3729.74',
 236         '75.0.3759.8',
 237         '75.0.3759.7',
 238         '75.0.3759.6',
 239         '74.0.3729.73',
 240         '75.0.3759.5',
 241         '74.0.3729.72',
 242         '73.0.3683.109',
 243         '75.0.3759.4',
 244         '75.0.3759.3',
 245         '74.0.3729.71',
 246         '75.0.3759.2',
 247         '74.0.3729.70',
 248         '73.0.3683.108',
 249         '74.0.3729.69',
 250         '75.0.3759.1',
 251         '75.0.3759.0',
 252         '74.0.3729.68',
 253         '73.0.3683.107',
 254         '74.0.3729.67',
 255         '75.0.3758.1',
 256         '75.0.3758.0',
 257         '74.0.3729.66',
 258         '73.0.3683.106',
 259         '74.0.3729.65',
 260         '75.0.3757.1',
 261         '75.0.3757.0',
 262         '74.0.3729.64',
 263         '73.0.3683.105',
 264         '74.0.3729.63',
 265         '75.0.3756.1',
 266         '75.0.3756.0',
 267         '74.0.3729.62',
 268         '73.0.3683.104',
 269         '75.0.3755.3',
 270         '75.0.3755.2',
 271         '73.0.3683.103',
 272         '75.0.3755.1',
 273         '75.0.3755.0',
 274         '74.0.3729.61',
 275         '73.0.3683.102',
 276         '74.0.3729.60',
 277         '75.0.3754.2',
 278         '74.0.3729.59',
 279         '75.0.3753.4',
 280         '74.0.3729.58',
 281         '75.0.3754.1',
 282         '75.0.3754.0',
 283         '74.0.3729.57',
 284         '73.0.3683.101',
 285         '75.0.3753.3',
 286         '75.0.3752.2',
 287         '75.0.3753.2',
 288         '74.0.3729.56',
 289         '75.0.3753.1',
 290         '75.0.3753.0',
 291         '74.0.3729.55',
 292         '73.0.3683.100',
 293         '74.0.3729.54',
 294         '75.0.3752.1',
 295         '75.0.3752.0',
 296         '74.0.3729.53',
 297         '73.0.3683.99',
 298         '74.0.3729.52',
 299         '75.0.3751.1',
 300         '75.0.3751.0',
 301         '74.0.3729.51',
 302         '73.0.3683.98',
 303         '74.0.3729.50',
 304         '75.0.3750.0',
 305         '74.0.3729.49',
 306         '74.0.3729.48',
 307         '74.0.3729.47',
 308         '75.0.3749.3',
 309         '74.0.3729.46',
 310         '73.0.3683.97',
 311         '75.0.3749.2',
 312         '74.0.3729.45',
 313         '75.0.3749.1',
 314         '75.0.3749.0',
 315         '74.0.3729.44',
 316         '73.0.3683.96',
 317         '74.0.3729.43',
 318         '74.0.3729.42',
 319         '75.0.3748.1',
 320         '75.0.3748.0',
 321         '74.0.3729.41',
 322         '75.0.3747.1',
 323         '73.0.3683.95',
 324         '75.0.3746.4',
 325         '74.0.3729.40',
 326         '74.0.3729.39',
 327         '75.0.3747.0',
 328         '75.0.3746.3',
 329         '75.0.3746.2',
 330         '74.0.3729.38',
 331         '75.0.3746.1',
 332         '75.0.3746.0',
 333         '74.0.3729.37',
 334         '73.0.3683.94',
 335         '75.0.3745.5',
 336         '75.0.3745.4',
 337         '75.0.3745.3',
 338         '75.0.3745.2',
 339         '74.0.3729.36',
 340         '75.0.3745.1',
 341         '75.0.3745.0',
 342         '75.0.3744.2',
 343         '74.0.3729.35',
 344         '73.0.3683.93',
 345         '74.0.3729.34',
 346         '75.0.3744.1',
 347         '75.0.3744.0',
 348         '74.0.3729.33',
 349         '73.0.3683.92',
 350         '74.0.3729.32',
 351         '74.0.3729.31',
 352         '73.0.3683.91',
 353         '75.0.3741.2',
 354         '75.0.3740.5',
 355         '74.0.3729.30',
 356         '75.0.3741.1',
 357         '75.0.3741.0',
 358         '74.0.3729.29',
 359         '75.0.3740.4',
 360         '73.0.3683.90',
 361         '74.0.3729.28',
 362         '75.0.3740.3',
 363         '73.0.3683.89',
 364         '75.0.3740.2',
 365         '74.0.3729.27',
 366         '75.0.3740.1',
 367         '75.0.3740.0',
 368         '74.0.3729.26',
 369         '73.0.3683.88',
 370         '73.0.3683.87',
 371         '74.0.3729.25',
 372         '75.0.3739.1',
 373         '75.0.3739.0',
 374         '73.0.3683.86',
 375         '74.0.3729.24',
 376         '73.0.3683.85',
 377         '75.0.3738.4',
 378         '75.0.3738.3',
 379         '75.0.3738.2',
 380         '75.0.3738.1',
 381         '75.0.3738.0',
 382         '74.0.3729.23',
 383         '73.0.3683.84',
 384         '74.0.3729.22',
 385         '74.0.3729.21',
 386         '75.0.3737.1',
 387         '75.0.3737.0',
 388         '74.0.3729.20',
 389         '73.0.3683.83',
 390         '74.0.3729.19',
 391         '75.0.3736.1',
 392         '75.0.3736.0',
 393         '74.0.3729.18',
 394         '73.0.3683.82',
 395         '74.0.3729.17',
 396         '75.0.3735.1',
 397         '75.0.3735.0',
 398         '74.0.3729.16',
 399         '73.0.3683.81',
 400         '75.0.3734.1',
 401         '75.0.3734.0',
 402         '74.0.3729.15',
 403         '73.0.3683.80',
 404         '74.0.3729.14',
 405         '75.0.3733.1',
 406         '75.0.3733.0',
 407         '75.0.3732.1',
 408         '74.0.3729.13',
 409         '74.0.3729.12',
 410         '73.0.3683.79',
 411         '74.0.3729.11',
 412         '75.0.3732.0',
 413         '74.0.3729.10',
 414         '73.0.3683.78',
 415         '74.0.3729.9',
 416         '74.0.3729.8',
 417         '74.0.3729.7',
 418         '75.0.3731.3',
 419         '75.0.3731.2',
 420         '75.0.3731.0',
 421         '74.0.3729.6',
 422         '73.0.3683.77',
 423         '73.0.3683.76',
 424         '75.0.3730.5',
 425         '75.0.3730.4',
 426         '73.0.3683.75',
 427         '74.0.3729.5',
 428         '73.0.3683.74',
 429         '75.0.3730.3',
 430         '75.0.3730.2',
 431         '74.0.3729.4',
 432         '73.0.3683.73',
 433         '73.0.3683.72',
 434         '75.0.3730.1',
 435         '75.0.3730.0',
 436         '74.0.3729.3',
 437         '73.0.3683.71',
 438         '74.0.3729.2',
 439         '73.0.3683.70',
 440         '74.0.3729.1',
 441         '74.0.3729.0',
 442         '74.0.3726.4',
 443         '73.0.3683.69',
 444         '74.0.3726.3',
 445         '74.0.3728.0',
 446         '74.0.3726.2',
 447         '73.0.3683.68',
 448         '74.0.3726.1',
 449         '74.0.3726.0',
 450         '74.0.3725.4',
 451         '73.0.3683.67',
 452         '73.0.3683.66',
 453         '74.0.3725.3',
 454         '74.0.3725.2',
 455         '74.0.3725.1',
 456         '74.0.3724.8',
 457         '74.0.3725.0',
 458         '73.0.3683.65',
 459         '74.0.3724.7',
 460         '74.0.3724.6',
 461         '74.0.3724.5',
 462         '74.0.3724.4',
 463         '74.0.3724.3',
 464         '74.0.3724.2',
 465         '74.0.3724.1',
 466         '74.0.3724.0',
 467         '73.0.3683.64',
 468         '74.0.3723.1',
 469         '74.0.3723.0',
 470         '73.0.3683.63',
 471         '74.0.3722.1',
 472         '74.0.3722.0',
 473         '73.0.3683.62',
 474         '74.0.3718.9',
 475         '74.0.3702.3',
 476         '74.0.3721.3',
 477         '74.0.3721.2',
 478         '74.0.3721.1',
 479         '74.0.3721.0',
 480         '74.0.3720.6',
 481         '73.0.3683.61',
 482         '72.0.3626.122',
 483         '73.0.3683.60',
 484         '74.0.3720.5',
 485         '72.0.3626.121',
 486         '74.0.3718.8',
 487         '74.0.3720.4',
 488         '74.0.3720.3',
 489         '74.0.3718.7',
 490         '74.0.3720.2',
 491         '74.0.3720.1',
 492         '74.0.3720.0',
 493         '74.0.3718.6',
 494         '74.0.3719.5',
 495         '73.0.3683.59',
 496         '74.0.3718.5',
 497         '74.0.3718.4',
 498         '74.0.3719.4',
 499         '74.0.3719.3',
 500         '74.0.3719.2',
 501         '74.0.3719.1',
 502         '73.0.3683.58',
 503         '74.0.3719.0',
 504         '73.0.3683.57',
 505         '73.0.3683.56',
 506         '74.0.3718.3',
 507         '73.0.3683.55',
 508         '74.0.3718.2',
 509         '74.0.3718.1',
 510         '74.0.3718.0',
 511         '73.0.3683.54',
 512         '74.0.3717.2',
 513         '73.0.3683.53',
 514         '74.0.3717.1',
 515         '74.0.3717.0',
 516         '73.0.3683.52',
 517         '74.0.3716.1',
 518         '74.0.3716.0',
 519         '73.0.3683.51',
 520         '74.0.3715.1',
 521         '74.0.3715.0',
 522         '73.0.3683.50',
 523         '74.0.3711.2',
 524         '74.0.3714.2',
 525         '74.0.3713.3',
 526         '74.0.3714.1',
 527         '74.0.3714.0',
 528         '73.0.3683.49',
 529         '74.0.3713.1',
 530         '74.0.3713.0',
 531         '72.0.3626.120',
 532         '73.0.3683.48',
 533         '74.0.3712.2',
 534         '74.0.3712.1',
 535         '74.0.3712.0',
 536         '73.0.3683.47',
 537         '72.0.3626.119',
 538         '73.0.3683.46',
 539         '74.0.3710.2',
 540         '72.0.3626.118',
 541         '74.0.3711.1',
 542         '74.0.3711.0',
 543         '73.0.3683.45',
 544         '72.0.3626.117',
 545         '74.0.3710.1',
 546         '74.0.3710.0',
 547         '73.0.3683.44',
 548         '72.0.3626.116',
 549         '74.0.3709.1',
 550         '74.0.3709.0',
 551         '74.0.3704.9',
 552         '73.0.3683.43',
 553         '72.0.3626.115',
 554         '74.0.3704.8',
 555         '74.0.3704.7',
 556         '74.0.3708.0',
 557         '74.0.3706.7',
 558         '74.0.3704.6',
 559         '73.0.3683.42',
 560         '72.0.3626.114',
 561         '74.0.3706.6',
 562         '72.0.3626.113',
 563         '74.0.3704.5',
 564         '74.0.3706.5',
 565         '74.0.3706.4',
 566         '74.0.3706.3',
 567         '74.0.3706.2',
 568         '74.0.3706.1',
 569         '74.0.3706.0',
 570         '73.0.3683.41',
 571         '72.0.3626.112',
 572         '74.0.3705.1',
 573         '74.0.3705.0',
 574         '73.0.3683.40',
 575         '72.0.3626.111',
 576         '73.0.3683.39',
 577         '74.0.3704.4',
 578         '73.0.3683.38',
 579         '74.0.3704.3',
 580         '74.0.3704.2',
 581         '74.0.3704.1',
 582         '74.0.3704.0',
 583         '73.0.3683.37',
 584         '72.0.3626.110',
 585         '72.0.3626.109',
 586         '74.0.3703.3',
 587         '74.0.3703.2',
 588         '73.0.3683.36',
 589         '74.0.3703.1',
 590         '74.0.3703.0',
 591         '73.0.3683.35',
 592         '72.0.3626.108',
 593         '74.0.3702.2',
 594         '74.0.3699.3',
 595         '74.0.3702.1',
 596         '74.0.3702.0',
 597         '73.0.3683.34',
 598         '72.0.3626.107',
 599         '73.0.3683.33',
 600         '74.0.3701.1',
 601         '74.0.3701.0',
 602         '73.0.3683.32',
 603         '73.0.3683.31',
 604         '72.0.3626.105',
 605         '74.0.3700.1',
 606         '74.0.3700.0',
 607         '73.0.3683.29',
 608         '72.0.3626.103',
 609         '74.0.3699.2',
 610         '74.0.3699.1',
 611         '74.0.3699.0',
 612         '73.0.3683.28',
 613         '72.0.3626.102',
 614         '73.0.3683.27',
 615         '73.0.3683.26',
 616         '74.0.3698.0',
 617         '74.0.3696.2',
 618         '72.0.3626.101',
 619         '73.0.3683.25',
 620         '74.0.3696.1',
 621         '74.0.3696.0',
 622         '74.0.3694.8',
 623         '72.0.3626.100',
 624         '74.0.3694.7',
 625         '74.0.3694.6',
 626         '74.0.3694.5',
 627         '74.0.3694.4',
 628         '72.0.3626.99',
 629         '72.0.3626.98',
 630         '74.0.3694.3',
 631         '73.0.3683.24',
 632         '72.0.3626.97',
 633         '72.0.3626.96',
 634         '72.0.3626.95',
 635         '73.0.3683.23',
 636         '72.0.3626.94',
 637         '73.0.3683.22',
 638         '73.0.3683.21',
 639         '72.0.3626.93',
 640         '74.0.3694.2',
 641         '72.0.3626.92',
 642         '74.0.3694.1',
 643         '74.0.3694.0',
 644         '74.0.3693.6',
 645         '73.0.3683.20',
 646         '72.0.3626.91',
 647         '74.0.3693.5',
 648         '74.0.3693.4',
 649         '74.0.3693.3',
 650         '74.0.3693.2',
 651         '73.0.3683.19',
 652         '74.0.3693.1',
 653         '74.0.3693.0',
 654         '73.0.3683.18',
 655         '72.0.3626.90',
 656         '74.0.3692.1',
 657         '74.0.3692.0',
 658         '73.0.3683.17',
 659         '72.0.3626.89',
 660         '74.0.3687.3',
 661         '74.0.3691.1',
 662         '74.0.3691.0',
 663         '73.0.3683.16',
 664         '72.0.3626.88',
 665         '72.0.3626.87',
 666         '73.0.3683.15',
 667         '74.0.3690.1',
 668         '74.0.3690.0',
 669         '73.0.3683.14',
 670         '72.0.3626.86',
 671         '73.0.3683.13',
 672         '73.0.3683.12',
 673         '74.0.3689.1',
 674         '74.0.3689.0',
 675         '73.0.3683.11',
 676         '72.0.3626.85',
 677         '73.0.3683.10',
 678         '72.0.3626.84',
 679         '73.0.3683.9',
 680         '74.0.3688.1',
 681         '74.0.3688.0',
 682         '73.0.3683.8',
 683         '72.0.3626.83',
 684         '74.0.3687.2',
 685         '74.0.3687.1',
 686         '74.0.3687.0',
 687         '73.0.3683.7',
 688         '72.0.3626.82',
 689         '74.0.3686.4',
 690         '72.0.3626.81',
 691         '74.0.3686.3',
 692         '74.0.3686.2',
 693         '74.0.3686.1',
 694         '74.0.3686.0',
 695         '73.0.3683.6',
 696         '72.0.3626.80',
 697         '74.0.3685.1',
 698         '74.0.3685.0',
 699         '73.0.3683.5',
 700         '72.0.3626.79',
 701         '74.0.3684.1',
 702         '74.0.3684.0',
 703         '73.0.3683.4',
 704         '72.0.3626.78',
 705         '72.0.3626.77',
 706         '73.0.3683.3',
 707         '73.0.3683.2',
 708         '72.0.3626.76',
 709         '73.0.3683.1',
 710         '73.0.3683.0',
 711         '72.0.3626.75',
 712         '71.0.3578.141',
 713         '73.0.3682.1',
 714         '73.0.3682.0',
 715         '72.0.3626.74',
 716         '71.0.3578.140',
 717         '73.0.3681.4',
 718         '73.0.3681.3',
 719         '73.0.3681.2',
 720         '73.0.3681.1',
 721         '73.0.3681.0',
 722         '72.0.3626.73',
 723         '71.0.3578.139',
 724         '72.0.3626.72',
 725         '72.0.3626.71',
 726         '73.0.3680.1',
 727         '73.0.3680.0',
 728         '72.0.3626.70',
 729         '71.0.3578.138',
 730         '73.0.3678.2',
 731         '73.0.3679.1',
 732         '73.0.3679.0',
 733         '72.0.3626.69',
 734         '71.0.3578.137',
 735         '73.0.3678.1',
 736         '73.0.3678.0',
 737         '71.0.3578.136',
 738         '73.0.3677.1',
 739         '73.0.3677.0',
 740         '72.0.3626.68',
 741         '72.0.3626.67',
 742         '71.0.3578.135',
 743         '73.0.3676.1',
 744         '73.0.3676.0',
 745         '73.0.3674.2',
 746         '72.0.3626.66',
 747         '71.0.3578.134',
 748         '73.0.3674.1',
 749         '73.0.3674.0',
 750         '72.0.3626.65',
 751         '71.0.3578.133',
 752         '73.0.3673.2',
 753         '73.0.3673.1',
 754         '73.0.3673.0',
 755         '72.0.3626.64',
 756         '71.0.3578.132',
 757         '72.0.3626.63',
 758         '72.0.3626.62',
 759         '72.0.3626.61',
 760         '72.0.3626.60',
 761         '73.0.3672.1',
 762         '73.0.3672.0',
 763         '72.0.3626.59',
 764         '71.0.3578.131',
 765         '73.0.3671.3',
 766         '73.0.3671.2',
 767         '73.0.3671.1',
 768         '73.0.3671.0',
 769         '72.0.3626.58',
 770         '71.0.3578.130',
 771         '73.0.3670.1',
 772         '73.0.3670.0',
 773         '72.0.3626.57',
 774         '71.0.3578.129',
 775         '73.0.3669.1',
 776         '73.0.3669.0',
 777         '72.0.3626.56',
 778         '71.0.3578.128',
 779         '73.0.3668.2',
 780         '73.0.3668.1',
 781         '73.0.3668.0',
 782         '72.0.3626.55',
 783         '71.0.3578.127',
 784         '73.0.3667.2',
 785         '73.0.3667.1',
 786         '73.0.3667.0',
 787         '72.0.3626.54',
 788         '71.0.3578.126',
 789         '73.0.3666.1',
 790         '73.0.3666.0',
 791         '72.0.3626.53',
 792         '71.0.3578.125',
 793         '73.0.3665.4',
 794         '73.0.3665.3',
 795         '72.0.3626.52',
 796         '73.0.3665.2',
 797         '73.0.3664.4',
 798         '73.0.3665.1',
 799         '73.0.3665.0',
 800         '72.0.3626.51',
 801         '71.0.3578.124',
 802         '72.0.3626.50',
 803         '73.0.3664.3',
 804         '73.0.3664.2',
 805         '73.0.3664.1',
 806         '73.0.3664.0',
 807         '73.0.3663.2',
 808         '72.0.3626.49',
 809         '71.0.3578.123',
 810         '73.0.3663.1',
 811         '73.0.3663.0',
 812         '72.0.3626.48',
 813         '71.0.3578.122',
 814         '73.0.3662.1',
 815         '73.0.3662.0',
 816         '72.0.3626.47',
 817         '71.0.3578.121',
 818         '73.0.3661.1',
 819         '72.0.3626.46',
 820         '73.0.3661.0',
 821         '72.0.3626.45',
 822         '71.0.3578.120',
 823         '73.0.3660.2',
 824         '73.0.3660.1',
 825         '73.0.3660.0',
 826         '72.0.3626.44',
 827         '71.0.3578.119',
 828         '73.0.3659.1',
 829         '73.0.3659.0',
 830         '72.0.3626.43',
 831         '71.0.3578.118',
 832         '73.0.3658.1',
 833         '73.0.3658.0',
 834         '72.0.3626.42',
 835         '71.0.3578.117',
 836         '73.0.3657.1',
 837         '73.0.3657.0',
 838         '72.0.3626.41',
 839         '71.0.3578.116',
 840         '73.0.3656.1',
 841         '73.0.3656.0',
 842         '72.0.3626.40',
 843         '71.0.3578.115',
 844         '73.0.3655.1',
 845         '73.0.3655.0',
 846         '72.0.3626.39',
 847         '71.0.3578.114',
 848         '73.0.3654.1',
 849         '73.0.3654.0',
 850         '72.0.3626.38',
 851         '71.0.3578.113',
 852         '73.0.3653.1',
 853         '73.0.3653.0',
 854         '72.0.3626.37',
 855         '71.0.3578.112',
 856         '73.0.3652.1',
 857         '73.0.3652.0',
 858         '72.0.3626.36',
 859         '71.0.3578.111',
 860         '73.0.3651.1',
 861         '73.0.3651.0',
 862         '72.0.3626.35',
 863         '71.0.3578.110',
 864         '73.0.3650.1',
 865         '73.0.3650.0',
 866         '72.0.3626.34',
 867         '71.0.3578.109',
 868         '73.0.3649.1',
 869         '73.0.3649.0',
 870         '72.0.3626.33',
 871         '71.0.3578.108',
 872         '73.0.3648.2',
 873         '73.0.3648.1',
 874         '73.0.3648.0',
 875         '72.0.3626.32',
 876         '71.0.3578.107',
 877         '73.0.3647.2',
 878         '73.0.3647.1',
 879         '73.0.3647.0',
 880         '72.0.3626.31',
 881         '71.0.3578.106',
 882         '73.0.3635.3',
 883         '73.0.3646.2',
 884         '73.0.3646.1',
 885         '73.0.3646.0',
 886         '72.0.3626.30',
 887         '71.0.3578.105',
 888         '72.0.3626.29',
 889         '73.0.3645.2',
 890         '73.0.3645.1',
 891         '73.0.3645.0',
 892         '72.0.3626.28',
 893         '71.0.3578.104',
 894         '72.0.3626.27',
 895         '72.0.3626.26',
 896         '72.0.3626.25',
 897         '72.0.3626.24',
 898         '73.0.3644.0',
 899         '73.0.3643.2',
 900         '72.0.3626.23',
 901         '71.0.3578.103',
 902         '73.0.3643.1',
 903         '73.0.3643.0',
 904         '72.0.3626.22',
 905         '71.0.3578.102',
 906         '73.0.3642.1',
 907         '73.0.3642.0',
 908         '72.0.3626.21',
 909         '71.0.3578.101',
 910         '73.0.3641.1',
 911         '73.0.3641.0',
 912         '72.0.3626.20',
 913         '71.0.3578.100',
 914         '72.0.3626.19',
 915         '73.0.3640.1',
 916         '73.0.3640.0',
 917         '72.0.3626.18',
 918         '73.0.3639.1',
 919         '71.0.3578.99',
 920         '73.0.3639.0',
 921         '72.0.3626.17',
 922         '73.0.3638.2',
 923         '72.0.3626.16',
 924         '73.0.3638.1',
 925         '73.0.3638.0',
 926         '72.0.3626.15',
 927         '71.0.3578.98',
 928         '73.0.3635.2',
 929         '71.0.3578.97',
 930         '73.0.3637.1',
 931         '73.0.3637.0',
 932         '72.0.3626.14',
 933         '71.0.3578.96',
 934         '71.0.3578.95',
 935         '72.0.3626.13',
 936         '71.0.3578.94',
 937         '73.0.3636.2',
 938         '71.0.3578.93',
 939         '73.0.3636.1',
 940         '73.0.3636.0',
 941         '72.0.3626.12',
 942         '71.0.3578.92',
 943         '73.0.3635.1',
 944         '73.0.3635.0',
 945         '72.0.3626.11',
 946         '71.0.3578.91',
 947         '73.0.3634.2',
 948         '73.0.3634.1',
 949         '73.0.3634.0',
 950         '72.0.3626.10',
 951         '71.0.3578.90',
 952         '71.0.3578.89',
 953         '73.0.3633.2',
 954         '73.0.3633.1',
 955         '73.0.3633.0',
 956         '72.0.3610.4',
 957         '72.0.3626.9',
 958         '71.0.3578.88',
 959         '73.0.3632.5',
 960         '73.0.3632.4',
 961         '73.0.3632.3',
 962         '73.0.3632.2',
 963         '73.0.3632.1',
 964         '73.0.3632.0',
 965         '72.0.3626.8',
 966         '71.0.3578.87',
 967         '73.0.3631.2',
 968         '73.0.3631.1',
 969         '73.0.3631.0',
 970         '72.0.3626.7',
 971         '71.0.3578.86',
 972         '72.0.3626.6',
 973         '73.0.3630.1',
 974         '73.0.3630.0',
 975         '72.0.3626.5',
 976         '71.0.3578.85',
 977         '72.0.3626.4',
 978         '73.0.3628.3',
 979         '73.0.3628.2',
 980         '73.0.3629.1',
 981         '73.0.3629.0',
 982         '72.0.3626.3',
 983         '71.0.3578.84',
 984         '73.0.3628.1',
 985         '73.0.3628.0',
 986         '71.0.3578.83',
 987         '73.0.3627.1',
 988         '73.0.3627.0',
 989         '72.0.3626.2',
 990         '71.0.3578.82',
 991         '71.0.3578.81',
 992         '71.0.3578.80',
 993         '72.0.3626.1',
 994         '72.0.3626.0',
 995         '71.0.3578.79',
 996         '70.0.3538.124',
 997         '71.0.3578.78',
 998         '72.0.3623.4',
 999         '72.0.3625.2',
1000         '72.0.3625.1',
1001         '72.0.3625.0',
1002         '71.0.3578.77',
1003         '70.0.3538.123',
1004         '72.0.3624.4',
1005         '72.0.3624.3',
1006         '72.0.3624.2',
1007         '71.0.3578.76',
1008         '72.0.3624.1',
1009         '72.0.3624.0',
1010         '72.0.3623.3',
1011         '71.0.3578.75',
1012         '70.0.3538.122',
1013         '71.0.3578.74',
1014         '72.0.3623.2',
1015         '72.0.3610.3',
1016         '72.0.3623.1',
1017         '72.0.3623.0',
1018         '72.0.3622.3',
1019         '72.0.3622.2',
1020         '71.0.3578.73',
1021         '70.0.3538.121',
1022         '72.0.3622.1',
1023         '72.0.3622.0',
1024         '71.0.3578.72',
1025         '70.0.3538.120',
1026         '72.0.3621.1',
1027         '72.0.3621.0',
1028         '71.0.3578.71',
1029         '70.0.3538.119',
1030         '72.0.3620.1',
1031         '72.0.3620.0',
1032         '71.0.3578.70',
1033         '70.0.3538.118',
1034         '71.0.3578.69',
1035         '72.0.3619.1',
1036         '72.0.3619.0',
1037         '71.0.3578.68',
1038         '70.0.3538.117',
1039         '71.0.3578.67',
1040         '72.0.3618.1',
1041         '72.0.3618.0',
1042         '71.0.3578.66',
1043         '70.0.3538.116',
1044         '72.0.3617.1',
1045         '72.0.3617.0',
1046         '71.0.3578.65',
1047         '70.0.3538.115',
1048         '72.0.3602.3',
1049         '71.0.3578.64',
1050         '72.0.3616.1',
1051         '72.0.3616.0',
1052         '71.0.3578.63',
1053         '70.0.3538.114',
1054         '71.0.3578.62',
1055         '72.0.3615.1',
1056         '72.0.3615.0',
1057         '71.0.3578.61',
1058         '70.0.3538.113',
1059         '72.0.3614.1',
1060         '72.0.3614.0',
1061         '71.0.3578.60',
1062         '70.0.3538.112',
1063         '72.0.3613.1',
1064         '72.0.3613.0',
1065         '71.0.3578.59',
1066         '70.0.3538.111',
1067         '72.0.3612.2',
1068         '72.0.3612.1',
1069         '72.0.3612.0',
1070         '70.0.3538.110',
1071         '71.0.3578.58',
1072         '70.0.3538.109',
1073         '72.0.3611.2',
1074         '72.0.3611.1',
1075         '72.0.3611.0',
1076         '71.0.3578.57',
1077         '70.0.3538.108',
1078         '72.0.3610.2',
1079         '71.0.3578.56',
1080         '71.0.3578.55',
1081         '72.0.3610.1',
1082         '72.0.3610.0',
1083         '71.0.3578.54',
1084         '70.0.3538.107',
1085         '71.0.3578.53',
1086         '72.0.3609.3',
1087         '71.0.3578.52',
1088         '72.0.3609.2',
1089         '71.0.3578.51',
1090         '72.0.3608.5',
1091         '72.0.3609.1',
1092         '72.0.3609.0',
1093         '71.0.3578.50',
1094         '70.0.3538.106',
1095         '72.0.3608.4',
1096         '72.0.3608.3',
1097         '72.0.3608.2',
1098         '71.0.3578.49',
1099         '72.0.3608.1',
1100         '72.0.3608.0',
1101         '70.0.3538.105',
1102         '71.0.3578.48',
1103         '72.0.3607.1',
1104         '72.0.3607.0',
1105         '71.0.3578.47',
1106         '70.0.3538.104',
1107         '72.0.3606.2',
1108         '72.0.3606.1',
1109         '72.0.3606.0',
1110         '71.0.3578.46',
1111         '70.0.3538.103',
1112         '70.0.3538.102',
1113         '72.0.3605.3',
1114         '72.0.3605.2',
1115         '72.0.3605.1',
1116         '72.0.3605.0',
1117         '71.0.3578.45',
1118         '70.0.3538.101',
1119         '71.0.3578.44',
1120         '71.0.3578.43',
1121         '70.0.3538.100',
1122         '70.0.3538.99',
1123         '71.0.3578.42',
1124         '72.0.3604.1',
1125         '72.0.3604.0',
1126         '71.0.3578.41',
1127         '70.0.3538.98',
1128         '71.0.3578.40',
1129         '72.0.3603.2',
1130         '72.0.3603.1',
1131         '72.0.3603.0',
1132         '71.0.3578.39',
1133         '70.0.3538.97',
1134         '72.0.3602.2',
1135         '71.0.3578.38',
1136         '71.0.3578.37',
1137         '72.0.3602.1',
1138         '72.0.3602.0',
1139         '71.0.3578.36',
1140         '70.0.3538.96',
1141         '72.0.3601.1',
1142         '72.0.3601.0',
1143         '71.0.3578.35',
1144         '70.0.3538.95',
1145         '72.0.3600.1',
1146         '72.0.3600.0',
1147         '71.0.3578.34',
1148         '70.0.3538.94',
1149         '72.0.3599.3',
1150         '72.0.3599.2',
1151         '72.0.3599.1',
1152         '72.0.3599.0',
1153         '71.0.3578.33',
1154         '70.0.3538.93',
1155         '72.0.3598.1',
1156         '72.0.3598.0',
1157         '71.0.3578.32',
1158         '70.0.3538.87',
1159         '72.0.3597.1',
1160         '72.0.3597.0',
1161         '72.0.3596.2',
1162         '71.0.3578.31',
1163         '70.0.3538.86',
1164         '71.0.3578.30',
1165         '71.0.3578.29',
1166         '72.0.3596.1',
1167         '72.0.3596.0',
1168         '71.0.3578.28',
1169         '70.0.3538.85',
1170         '72.0.3595.2',
1171         '72.0.3591.3',
1172         '72.0.3595.1',
1173         '72.0.3595.0',
1174         '71.0.3578.27',
1175         '70.0.3538.84',
1176         '72.0.3594.1',
1177         '72.0.3594.0',
1178         '71.0.3578.26',
1179         '70.0.3538.83',
1180         '72.0.3593.2',
1181         '72.0.3593.1',
1182         '72.0.3593.0',
1183         '71.0.3578.25',
1184         '70.0.3538.82',
1185         '72.0.3589.3',
1186         '72.0.3592.2',
1187         '72.0.3592.1',
1188         '72.0.3592.0',
1189         '71.0.3578.24',
1190         '72.0.3589.2',
1191         '70.0.3538.81',
1192         '70.0.3538.80',
1193         '72.0.3591.2',
1194         '72.0.3591.1',
1195         '72.0.3591.0',
1196         '71.0.3578.23',
1197         '70.0.3538.79',
1198         '71.0.3578.22',
1199         '72.0.3590.1',
1200         '72.0.3590.0',
1201         '71.0.3578.21',
1202         '70.0.3538.78',
1203         '70.0.3538.77',
1204         '72.0.3589.1',
1205         '72.0.3589.0',
1206         '71.0.3578.20',
1207         '70.0.3538.76',
1208         '71.0.3578.19',
1209         '70.0.3538.75',
1210         '72.0.3588.1',
1211         '72.0.3588.0',
1212         '71.0.3578.18',
1213         '70.0.3538.74',
1214         '72.0.3586.2',
1215         '72.0.3587.0',
1216         '71.0.3578.17',
1217         '70.0.3538.73',
1218         '72.0.3586.1',
1219         '72.0.3586.0',
1220         '71.0.3578.16',
1221         '70.0.3538.72',
1222         '72.0.3585.1',
1223         '72.0.3585.0',
1224         '71.0.3578.15',
1225         '70.0.3538.71',
1226         '71.0.3578.14',
1227         '72.0.3584.1',
1228         '72.0.3584.0',
1229         '71.0.3578.13',
1230         '70.0.3538.70',
1231         '72.0.3583.2',
1232         '71.0.3578.12',
1233         '72.0.3583.1',
1234         '72.0.3583.0',
1235         '71.0.3578.11',
1236         '70.0.3538.69',
1237         '71.0.3578.10',
1238         '72.0.3582.0',
1239         '72.0.3581.4',
1240         '71.0.3578.9',
1241         '70.0.3538.67',
1242         '72.0.3581.3',
1243         '72.0.3581.2',
1244         '72.0.3581.1',
1245         '72.0.3581.0',
1246         '71.0.3578.8',
1247         '70.0.3538.66',
1248         '72.0.3580.1',
1249         '72.0.3580.0',
1250         '71.0.3578.7',
1251         '70.0.3538.65',
1252         '71.0.3578.6',
1253         '72.0.3579.1',
1254         '72.0.3579.0',
1255         '71.0.3578.5',
1256         '70.0.3538.64',
1257         '71.0.3578.4',
1258         '71.0.3578.3',
1259         '71.0.3578.2',
1260         '71.0.3578.1',
1261         '71.0.3578.0',
1262         '70.0.3538.63',
1263         '69.0.3497.128',
1264         '70.0.3538.62',
1265         '70.0.3538.61',
1266         '70.0.3538.60',
1267         '70.0.3538.59',
1268         '71.0.3577.1',
1269         '71.0.3577.0',
1270         '70.0.3538.58',
1271         '69.0.3497.127',
1272         '71.0.3576.2',
1273         '71.0.3576.1',
1274         '71.0.3576.0',
1275         '70.0.3538.57',
1276         '70.0.3538.56',
1277         '71.0.3575.2',
1278         '70.0.3538.55',
1279         '69.0.3497.126',
1280         '70.0.3538.54',
1281         '71.0.3575.1',
1282         '71.0.3575.0',
1283         '71.0.3574.1',
1284         '71.0.3574.0',
1285         '70.0.3538.53',
1286         '69.0.3497.125',
1287         '70.0.3538.52',
1288         '71.0.3573.1',
1289         '71.0.3573.0',
1290         '70.0.3538.51',
1291         '69.0.3497.124',
1292         '71.0.3572.1',
1293         '71.0.3572.0',
1294         '70.0.3538.50',
1295         '69.0.3497.123',
1296         '71.0.3571.2',
1297         '70.0.3538.49',
1298         '69.0.3497.122',
1299         '71.0.3571.1',
1300         '71.0.3571.0',
1301         '70.0.3538.48',
1302         '69.0.3497.121',
1303         '71.0.3570.1',
1304         '71.0.3570.0',
1305         '70.0.3538.47',
1306         '69.0.3497.120',
1307         '71.0.3568.2',
1308         '71.0.3569.1',
1309         '71.0.3569.0',
1310         '70.0.3538.46',
1311         '69.0.3497.119',
1312         '70.0.3538.45',
1313         '71.0.3568.1',
1314         '71.0.3568.0',
1315         '70.0.3538.44',
1316         '69.0.3497.118',
1317         '70.0.3538.43',
1318         '70.0.3538.42',
1319         '71.0.3567.1',
1320         '71.0.3567.0',
1321         '70.0.3538.41',
1322         '69.0.3497.117',
1323         '71.0.3566.1',
1324         '71.0.3566.0',
1325         '70.0.3538.40',
1326         '69.0.3497.116',
1327         '71.0.3565.1',
1328         '71.0.3565.0',
1329         '70.0.3538.39',
1330         '69.0.3497.115',
1331         '71.0.3564.1',
1332         '71.0.3564.0',
1333         '70.0.3538.38',
1334         '69.0.3497.114',
1335         '71.0.3563.0',
1336         '71.0.3562.2',
1337         '70.0.3538.37',
1338         '69.0.3497.113',
1339         '70.0.3538.36',
1340         '70.0.3538.35',
1341         '71.0.3562.1',
1342         '71.0.3562.0',
1343         '70.0.3538.34',
1344         '69.0.3497.112',
1345         '70.0.3538.33',
1346         '71.0.3561.1',
1347         '71.0.3561.0',
1348         '70.0.3538.32',
1349         '69.0.3497.111',
1350         '71.0.3559.6',
1351         '71.0.3560.1',
1352         '71.0.3560.0',
1353         '71.0.3559.5',
1354         '71.0.3559.4',
1355         '70.0.3538.31',
1356         '69.0.3497.110',
1357         '71.0.3559.3',
1358         '70.0.3538.30',
1359         '69.0.3497.109',
1360         '71.0.3559.2',
1361         '71.0.3559.1',
1362         '71.0.3559.0',
1363         '70.0.3538.29',
1364         '69.0.3497.108',
1365         '71.0.3558.2',
1366         '71.0.3558.1',
1367         '71.0.3558.0',
1368         '70.0.3538.28',
1369         '69.0.3497.107',
1370         '71.0.3557.2',
1371         '71.0.3557.1',
1372         '71.0.3557.0',
1373         '70.0.3538.27',
1374         '69.0.3497.106',
1375         '71.0.3554.4',
1376         '70.0.3538.26',
1377         '71.0.3556.1',
1378         '71.0.3556.0',
1379         '70.0.3538.25',
1380         '71.0.3554.3',
1381         '69.0.3497.105',
1382         '71.0.3554.2',
1383         '70.0.3538.24',
1384         '69.0.3497.104',
1385         '71.0.3555.2',
1386         '70.0.3538.23',
1387         '71.0.3555.1',
1388         '71.0.3555.0',
1389         '70.0.3538.22',
1390         '69.0.3497.103',
1391         '71.0.3554.1',
1392         '71.0.3554.0',
1393         '70.0.3538.21',
1394         '69.0.3497.102',
1395         '71.0.3553.3',
1396         '70.0.3538.20',
1397         '69.0.3497.101',
1398         '71.0.3553.2',
1399         '69.0.3497.100',
1400         '71.0.3553.1',
1401         '71.0.3553.0',
1402         '70.0.3538.19',
1403         '69.0.3497.99',
1404         '69.0.3497.98',
1405         '69.0.3497.97',
1406         '71.0.3552.6',
1407         '71.0.3552.5',
1408         '71.0.3552.4',
1409         '71.0.3552.3',
1410         '71.0.3552.2',
1411         '71.0.3552.1',
1412         '71.0.3552.0',
1413         '70.0.3538.18',
1414         '69.0.3497.96',
1415         '71.0.3551.3',
1416         '71.0.3551.2',
1417         '71.0.3551.1',
1418         '71.0.3551.0',
1419         '70.0.3538.17',
1420         '69.0.3497.95',
1421         '71.0.3550.3',
1422         '71.0.3550.2',
1423         '71.0.3550.1',
1424         '71.0.3550.0',
1425         '70.0.3538.16',
1426         '69.0.3497.94',
1427         '71.0.3549.1',
1428         '71.0.3549.0',
1429         '70.0.3538.15',
1430         '69.0.3497.93',
1431         '69.0.3497.92',
1432         '71.0.3548.1',
1433         '71.0.3548.0',
1434         '70.0.3538.14',
1435         '69.0.3497.91',
1436         '71.0.3547.1',
1437         '71.0.3547.0',
1438         '70.0.3538.13',
1439         '69.0.3497.90',
1440         '71.0.3546.2',
1441         '69.0.3497.89',
1442         '71.0.3546.1',
1443         '71.0.3546.0',
1444         '70.0.3538.12',
1445         '69.0.3497.88',
1446         '71.0.3545.4',
1447         '71.0.3545.3',
1448         '71.0.3545.2',
1449         '71.0.3545.1',
1450         '71.0.3545.0',
1451         '70.0.3538.11',
1452         '69.0.3497.87',
1453         '71.0.3544.5',
1454         '71.0.3544.4',
1455         '71.0.3544.3',
1456         '71.0.3544.2',
1457         '71.0.3544.1',
1458         '71.0.3544.0',
1459         '69.0.3497.86',
1460         '70.0.3538.10',
1461         '69.0.3497.85',
1462         '70.0.3538.9',
1463         '69.0.3497.84',
1464         '71.0.3543.4',
1465         '70.0.3538.8',
1466         '71.0.3543.3',
1467         '71.0.3543.2',
1468         '71.0.3543.1',
1469         '71.0.3543.0',
1470         '70.0.3538.7',
1471         '69.0.3497.83',
1472         '71.0.3542.2',
1473         '71.0.3542.1',
1474         '71.0.3542.0',
1475         '70.0.3538.6',
1476         '69.0.3497.82',
1477         '69.0.3497.81',
1478         '71.0.3541.1',
1479         '71.0.3541.0',
1480         '70.0.3538.5',
1481         '69.0.3497.80',
1482         '71.0.3540.1',
1483         '71.0.3540.0',
1484         '70.0.3538.4',
1485         '69.0.3497.79',
1486         '70.0.3538.3',
1487         '71.0.3539.1',
1488         '71.0.3539.0',
1489         '69.0.3497.78',
1490         '68.0.3440.134',
1491         '69.0.3497.77',
1492         '70.0.3538.2',
1493         '70.0.3538.1',
1494         '70.0.3538.0',
1495         '69.0.3497.76',
1496         '68.0.3440.133',
1497         '69.0.3497.75',
1498         '70.0.3537.2',
1499         '70.0.3537.1',
1500         '70.0.3537.0',
1501         '69.0.3497.74',
1502         '68.0.3440.132',
1503         '70.0.3536.0',
1504         '70.0.3535.5',
1505         '70.0.3535.4',
1506         '70.0.3535.3',
1507         '69.0.3497.73',
1508         '68.0.3440.131',
1509         '70.0.3532.8',
1510         '70.0.3532.7',
1511         '69.0.3497.72',
1512         '69.0.3497.71',
1513         '70.0.3535.2',
1514         '70.0.3535.1',
1515         '70.0.3535.0',
1516         '69.0.3497.70',
1517         '68.0.3440.130',
1518         '69.0.3497.69',
1519         '68.0.3440.129',
1520         '70.0.3534.4',
1521         '70.0.3534.3',
1522         '70.0.3534.2',
1523         '70.0.3534.1',
1524         '70.0.3534.0',
1525         '69.0.3497.68',
1526         '68.0.3440.128',
1527         '70.0.3533.2',
1528         '70.0.3533.1',
1529         '70.0.3533.0',
1530         '69.0.3497.67',
1531         '68.0.3440.127',
1532         '70.0.3532.6',
1533         '70.0.3532.5',
1534         '70.0.3532.4',
1535         '69.0.3497.66',
1536         '68.0.3440.126',
1537         '70.0.3532.3',
1538         '70.0.3532.2',
1539         '70.0.3532.1',
1540         '69.0.3497.60',
1541         '69.0.3497.65',
1542         '69.0.3497.64',
1543         '70.0.3532.0',
1544         '70.0.3531.0',
1545         '70.0.3530.4',
1546         '70.0.3530.3',
1547         '70.0.3530.2',
1548         '69.0.3497.58',
1549         '68.0.3440.125',
1550         '69.0.3497.57',
1551         '69.0.3497.56',
1552         '69.0.3497.55',
1553         '69.0.3497.54',
1554         '70.0.3530.1',
1555         '70.0.3530.0',
1556         '69.0.3497.53',
1557         '68.0.3440.124',
1558         '69.0.3497.52',
1559         '70.0.3529.3',
1560         '70.0.3529.2',
1561         '70.0.3529.1',
1562         '70.0.3529.0',
1563         '69.0.3497.51',
1564         '70.0.3528.4',
1565         '68.0.3440.123',
1566         '70.0.3528.3',
1567         '70.0.3528.2',
1568         '70.0.3528.1',
1569         '70.0.3528.0',
1570         '69.0.3497.50',
1571         '68.0.3440.122',
1572         '70.0.3527.1',
1573         '70.0.3527.0',
1574         '69.0.3497.49',
1575         '68.0.3440.121',
1576         '70.0.3526.1',
1577         '70.0.3526.0',
1578         '68.0.3440.120',
1579         '69.0.3497.48',
1580         '69.0.3497.47',
1581         '68.0.3440.119',
1582         '68.0.3440.118',
1583         '70.0.3525.5',
1584         '70.0.3525.4',
1585         '70.0.3525.3',
1586         '68.0.3440.117',
1587         '69.0.3497.46',
1588         '70.0.3525.2',
1589         '70.0.3525.1',
1590         '70.0.3525.0',
1591         '69.0.3497.45',
1592         '68.0.3440.116',
1593         '70.0.3524.4',
1594         '70.0.3524.3',
1595         '69.0.3497.44',
1596         '70.0.3524.2',
1597         '70.0.3524.1',
1598         '70.0.3524.0',
1599         '70.0.3523.2',
1600         '69.0.3497.43',
1601         '68.0.3440.115',
1602         '70.0.3505.9',
1603         '69.0.3497.42',
1604         '70.0.3505.8',
1605         '70.0.3523.1',
1606         '70.0.3523.0',
1607         '69.0.3497.41',
1608         '68.0.3440.114',
1609         '70.0.3505.7',
1610         '69.0.3497.40',
1611         '70.0.3522.1',
1612         '70.0.3522.0',
1613         '70.0.3521.2',
1614         '69.0.3497.39',
1615         '68.0.3440.113',
1616         '70.0.3505.6',
1617         '70.0.3521.1',
1618         '70.0.3521.0',
1619         '69.0.3497.38',
1620         '68.0.3440.112',
1621         '70.0.3520.1',
1622         '70.0.3520.0',
1623         '69.0.3497.37',
1624         '68.0.3440.111',
1625         '70.0.3519.3',
1626         '70.0.3519.2',
1627         '70.0.3519.1',
1628         '70.0.3519.0',
1629         '69.0.3497.36',
1630         '68.0.3440.110',
1631         '70.0.3518.1',
1632         '70.0.3518.0',
1633         '69.0.3497.35',
1634         '69.0.3497.34',
1635         '68.0.3440.109',
1636         '70.0.3517.1',
1637         '70.0.3517.0',
1638         '69.0.3497.33',
1639         '68.0.3440.108',
1640         '69.0.3497.32',
1641         '70.0.3516.3',
1642         '70.0.3516.2',
1643         '70.0.3516.1',
1644         '70.0.3516.0',
1645         '69.0.3497.31',
1646         '68.0.3440.107',
1647         '70.0.3515.4',
1648         '68.0.3440.106',
1649         '70.0.3515.3',
1650         '70.0.3515.2',
1651         '70.0.3515.1',
1652         '70.0.3515.0',
1653         '69.0.3497.30',
1654         '68.0.3440.105',
1655         '68.0.3440.104',
1656         '70.0.3514.2',
1657         '70.0.3514.1',
1658         '70.0.3514.0',
1659         '69.0.3497.29',
1660         '68.0.3440.103',
1661         '70.0.3513.1',
1662         '70.0.3513.0',
1663         '69.0.3497.28',
1664     )
1665     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1666
1667
1668 std_headers = {
1669     'User-Agent': random_user_agent(),
1670     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1671     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1672     'Accept-Encoding': 'gzip, deflate',
1673     'Accept-Language': 'en-us,en;q=0.5',
1674 }
1675
1676
1677 USER_AGENTS = {
1678     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1679 }
1680
1681
1682 NO_DEFAULT = object()
1683
1684 ENGLISH_MONTH_NAMES = [
1685     'January', 'February', 'March', 'April', 'May', 'June',
1686     'July', 'August', 'September', 'October', 'November', 'December']
1687
1688 MONTH_NAMES = {
1689     'en': ENGLISH_MONTH_NAMES,
1690     'fr': [
1691         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1692         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1693 }
1694
1695 KNOWN_EXTENSIONS = (
1696     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1697     'flv', 'f4v', 'f4a', 'f4b',
1698     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1699     'mkv', 'mka', 'mk3d',
1700     'avi', 'divx',
1701     'mov',
1702     'asf', 'wmv', 'wma',
1703     '3gp', '3g2',
1704     'mp3',
1705     'flac',
1706     'ape',
1707     'wav',
1708     'f4f', 'f4m', 'm3u8', 'smil')
1709
1710 # needed for sanitizing filenames in restricted mode
1711 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1712                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1713                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1714
1715 DATE_FORMATS = (
1716     '%d %B %Y',
1717     '%d %b %Y',
1718     '%B %d %Y',
1719     '%B %dst %Y',
1720     '%B %dnd %Y',
1721     '%B %dth %Y',
1722     '%b %d %Y',
1723     '%b %dst %Y',
1724     '%b %dnd %Y',
1725     '%b %dth %Y',
1726     '%b %dst %Y %I:%M',
1727     '%b %dnd %Y %I:%M',
1728     '%b %dth %Y %I:%M',
1729     '%Y %m %d',
1730     '%Y-%m-%d',
1731     '%Y/%m/%d',
1732     '%Y/%m/%d %H:%M',
1733     '%Y/%m/%d %H:%M:%S',
1734     '%Y-%m-%d %H:%M',
1735     '%Y-%m-%d %H:%M:%S',
1736     '%Y-%m-%d %H:%M:%S.%f',
1737     '%d.%m.%Y %H:%M',
1738     '%d.%m.%Y %H.%M',
1739     '%Y-%m-%dT%H:%M:%SZ',
1740     '%Y-%m-%dT%H:%M:%S.%fZ',
1741     '%Y-%m-%dT%H:%M:%S.%f0Z',
1742     '%Y-%m-%dT%H:%M:%S',
1743     '%Y-%m-%dT%H:%M:%S.%f',
1744     '%Y-%m-%dT%H:%M',
1745     '%b %d %Y at %H:%M',
1746     '%b %d %Y at %H:%M:%S',
1747     '%B %d %Y at %H:%M',
1748     '%B %d %Y at %H:%M:%S',
1749 )
1750
1751 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1752 DATE_FORMATS_DAY_FIRST.extend([
1753     '%d-%m-%Y',
1754     '%d.%m.%Y',
1755     '%d.%m.%y',
1756     '%d/%m/%Y',
1757     '%d/%m/%y',
1758     '%d/%m/%Y %H:%M:%S',
1759 ])
1760
1761 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1762 DATE_FORMATS_MONTH_FIRST.extend([
1763     '%m-%d-%Y',
1764     '%m.%d.%Y',
1765     '%m/%d/%Y',
1766     '%m/%d/%y',
1767     '%m/%d/%Y %H:%M:%S',
1768 ])
1769
1770 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1771 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1772
1773
1774 def preferredencoding():
1775     """Get preferred encoding.
1776
1777     Returns the best encoding scheme for the system, based on
1778     locale.getpreferredencoding() and some further tweaks.
1779     """
1780     try:
1781         pref = locale.getpreferredencoding()
1782         'TEST'.encode(pref)
1783     except Exception:
1784         pref = 'UTF-8'
1785
1786     return pref
1787
1788
1789 def write_json_file(obj, fn):
1790     """ Encode obj as JSON and write it to fn, atomically if possible """
1791
1792     fn = encodeFilename(fn)
1793     if sys.version_info < (3, 0) and sys.platform != 'win32':
1794         encoding = get_filesystem_encoding()
1795         # os.path.basename returns a bytes object, but NamedTemporaryFile
1796         # will fail if the filename contains non ascii characters unless we
1797         # use a unicode object
1798         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1799         # the same for os.path.dirname
1800         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1801     else:
1802         path_basename = os.path.basename
1803         path_dirname = os.path.dirname
1804
1805     args = {
1806         'suffix': '.tmp',
1807         'prefix': path_basename(fn) + '.',
1808         'dir': path_dirname(fn),
1809         'delete': False,
1810     }
1811
1812     # In Python 2.x, json.dump expects a bytestream.
1813     # In Python 3.x, it writes to a character stream
1814     if sys.version_info < (3, 0):
1815         args['mode'] = 'wb'
1816     else:
1817         args.update({
1818             'mode': 'w',
1819             'encoding': 'utf-8',
1820         })
1821
1822     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1823
1824     try:
1825         with tf:
1826             json.dump(obj, tf)
1827         if sys.platform == 'win32':
1828             # Need to remove existing file on Windows, else os.rename raises
1829             # WindowsError or FileExistsError.
1830             try:
1831                 os.unlink(fn)
1832             except OSError:
1833                 pass
1834         os.rename(tf.name, fn)
1835     except Exception:
1836         try:
1837             os.remove(tf.name)
1838         except OSError:
1839             pass
1840         raise
1841
1842
1843 if sys.version_info >= (2, 7):
1844     def find_xpath_attr(node, xpath, key, val=None):
1845         """ Find the xpath xpath[@key=val] """
1846         assert re.match(r'^[a-zA-Z_-]+$', key)
1847         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1848         return node.find(expr)
1849 else:
1850     def find_xpath_attr(node, xpath, key, val=None):
1851         for f in node.findall(compat_xpath(xpath)):
1852             if key not in f.attrib:
1853                 continue
1854             if val is None or f.attrib.get(key) == val:
1855                 return f
1856         return None
1857
1858 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1859 # the namespace parameter
1860
1861
1862 def xpath_with_ns(path, ns_map):
1863     components = [c.split(':') for c in path.split('/')]
1864     replaced = []
1865     for c in components:
1866         if len(c) == 1:
1867             replaced.append(c[0])
1868         else:
1869             ns, tag = c
1870             replaced.append('{%s}%s' % (ns_map[ns], tag))
1871     return '/'.join(replaced)
1872
1873
1874 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1875     def _find_xpath(xpath):
1876         return node.find(compat_xpath(xpath))
1877
1878     if isinstance(xpath, (str, compat_str)):
1879         n = _find_xpath(xpath)
1880     else:
1881         for xp in xpath:
1882             n = _find_xpath(xp)
1883             if n is not None:
1884                 break
1885
1886     if n is None:
1887         if default is not NO_DEFAULT:
1888             return default
1889         elif fatal:
1890             name = xpath if name is None else name
1891             raise ExtractorError('Could not find XML element %s' % name)
1892         else:
1893             return None
1894     return n
1895
1896
1897 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1898     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1899     if n is None or n == default:
1900         return n
1901     if n.text is None:
1902         if default is not NO_DEFAULT:
1903             return default
1904         elif fatal:
1905             name = xpath if name is None else name
1906             raise ExtractorError('Could not find XML element\'s text %s' % name)
1907         else:
1908             return None
1909     return n.text
1910
1911
1912 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1913     n = find_xpath_attr(node, xpath, key)
1914     if n is None:
1915         if default is not NO_DEFAULT:
1916             return default
1917         elif fatal:
1918             name = '%s[@%s]' % (xpath, key) if name is None else name
1919             raise ExtractorError('Could not find XML attribute %s' % name)
1920         else:
1921             return None
1922     return n.attrib[key]
1923
1924
1925 def get_element_by_id(id, html):
1926     """Return the content of the tag with the specified ID in the passed HTML document"""
1927     return get_element_by_attribute('id', id, html)
1928
1929
1930 def get_element_by_class(class_name, html):
1931     """Return the content of the first tag with the specified class in the passed HTML document"""
1932     retval = get_elements_by_class(class_name, html)
1933     return retval[0] if retval else None
1934
1935
1936 def get_element_by_attribute(attribute, value, html, escape_value=True):
1937     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1938     return retval[0] if retval else None
1939
1940
1941 def get_elements_by_class(class_name, html):
1942     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1943     return get_elements_by_attribute(
1944         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1945         html, escape_value=False)
1946
1947
1948 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1949     """Return the content of the tag with the specified attribute in the passed HTML document"""
1950
1951     value = re.escape(value) if escape_value else value
1952
1953     retlist = []
1954     for m in re.finditer(r'''(?xs)
1955         <([a-zA-Z0-9:._-]+)
1956          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1957          \s+%s=['"]?%s['"]?
1958          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1959         \s*>
1960         (?P<content>.*?)
1961         </\1>
1962     ''' % (re.escape(attribute), value), html):
1963         res = m.group('content')
1964
1965         if res.startswith('"') or res.startswith("'"):
1966             res = res[1:-1]
1967
1968         retlist.append(unescapeHTML(res))
1969
1970     return retlist
1971
1972
1973 class HTMLAttributeParser(compat_HTMLParser):
1974     """Trivial HTML parser to gather the attributes for a single element"""
1975     def __init__(self):
1976         self.attrs = {}
1977         compat_HTMLParser.__init__(self)
1978
1979     def handle_starttag(self, tag, attrs):
1980         self.attrs = dict(attrs)
1981
1982
1983 def extract_attributes(html_element):
1984     """Given a string for an HTML element such as
1985     <el
1986          a="foo" B="bar" c="&98;az" d=boz
1987          empty= noval entity="&amp;"
1988          sq='"' dq="'"
1989     >
1990     Decode and return a dictionary of attributes.
1991     {
1992         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
1993         'empty': '', 'noval': None, 'entity': '&',
1994         'sq': '"', 'dq': '\''
1995     }.
1996     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
1997     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
1998     """
1999     parser = HTMLAttributeParser()
2000     try:
2001         parser.feed(html_element)
2002         parser.close()
2003     # Older Python may throw HTMLParseError in case of malformed HTML
2004     except compat_HTMLParseError:
2005         pass
2006     return parser.attrs
2007
2008
2009 def clean_html(html):
2010     """Clean an HTML snippet into a readable string"""
2011
2012     if html is None:  # Convenience for sanitizing descriptions etc.
2013         return html
2014
2015     # Newline vs <br />
2016     html = html.replace('\n', ' ')
2017     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2018     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2019     # Strip html tags
2020     html = re.sub('<.*?>', '', html)
2021     # Replace html entities
2022     html = unescapeHTML(html)
2023     return html.strip()
2024
2025
2026 def sanitize_open(filename, open_mode):
2027     """Try to open the given filename, and slightly tweak it if this fails.
2028
2029     Attempts to open the given filename. If this fails, it tries to change
2030     the filename slightly, step by step, until it's either able to open it
2031     or it fails and raises a final exception, like the standard open()
2032     function.
2033
2034     It returns the tuple (stream, definitive_file_name).
2035     """
2036     try:
2037         if filename == '-':
2038             if sys.platform == 'win32':
2039                 import msvcrt
2040                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2041             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2042         stream = open(encodeFilename(filename), open_mode)
2043         return (stream, filename)
2044     except (IOError, OSError) as err:
2045         if err.errno in (errno.EACCES,):
2046             raise
2047
2048         # In case of error, try to remove win32 forbidden chars
2049         alt_filename = sanitize_path(filename)
2050         if alt_filename == filename:
2051             raise
2052         else:
2053             # An exception here should be caught in the caller
2054             stream = open(encodeFilename(alt_filename), open_mode)
2055             return (stream, alt_filename)
2056
2057
2058 def timeconvert(timestr):
2059     """Convert RFC 2822 defined time string into system timestamp"""
2060     timestamp = None
2061     timetuple = email.utils.parsedate_tz(timestr)
2062     if timetuple is not None:
2063         timestamp = email.utils.mktime_tz(timetuple)
2064     return timestamp
2065
2066
2067 def sanitize_filename(s, restricted=False, is_id=False):
2068     """Sanitizes a string so it could be used as part of a filename.
2069     If restricted is set, use a stricter subset of allowed characters.
2070     Set is_id if this is not an arbitrary string, but an ID that should be kept
2071     if possible.
2072     """
2073     def replace_insane(char):
2074         if restricted and char in ACCENT_CHARS:
2075             return ACCENT_CHARS[char]
2076         if char == '?' or ord(char) < 32 or ord(char) == 127:
2077             return ''
2078         elif char == '"':
2079             return '' if restricted else '\''
2080         elif char == ':':
2081             return '_-' if restricted else ' -'
2082         elif char in '\\/|*<>':
2083             return '_'
2084         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2085             return '_'
2086         if restricted and ord(char) > 127:
2087             return '_'
2088         return char
2089
2090     # Handle timestamps
2091     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2092     result = ''.join(map(replace_insane, s))
2093     if not is_id:
2094         while '__' in result:
2095             result = result.replace('__', '_')
2096         result = result.strip('_')
2097         # Common case of "Foreign band name - English song title"
2098         if restricted and result.startswith('-_'):
2099             result = result[2:]
2100         if result.startswith('-'):
2101             result = '_' + result[len('-'):]
2102         result = result.lstrip('.')
2103         if not result:
2104             result = '_'
2105     return result
2106
2107
2108 def sanitize_path(s):
2109     """Sanitizes and normalizes path on Windows"""
2110     if sys.platform != 'win32':
2111         return s
2112     drive_or_unc, _ = os.path.splitdrive(s)
2113     if sys.version_info < (2, 7) and not drive_or_unc:
2114         drive_or_unc, _ = os.path.splitunc(s)
2115     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2116     if drive_or_unc:
2117         norm_path.pop(0)
2118     sanitized_path = [
2119         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2120         for path_part in norm_path]
2121     if drive_or_unc:
2122         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2123     return os.path.join(*sanitized_path)
2124
2125
2126 def sanitize_url(url):
2127     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2128     # the number of unwanted failures due to missing protocol
2129     if url.startswith('//'):
2130         return 'http:%s' % url
2131     # Fix some common typos seen so far
2132     COMMON_TYPOS = (
2133         # https://github.com/ytdl-org/youtube-dl/issues/15649
2134         (r'^httpss://', r'https://'),
2135         # https://bx1.be/lives/direct-tv/
2136         (r'^rmtp([es]?)://', r'rtmp\1://'),
2137     )
2138     for mistake, fixup in COMMON_TYPOS:
2139         if re.match(mistake, url):
2140             return re.sub(mistake, fixup, url)
2141     return url
2142
2143
2144 def sanitized_Request(url, *args, **kwargs):
2145     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2146
2147
2148 def expand_path(s):
2149     """Expand shell variables and ~"""
2150     return os.path.expandvars(compat_expanduser(s))
2151
2152
2153 def orderedSet(iterable):
2154     """ Remove all duplicates from the input iterable """
2155     res = []
2156     for el in iterable:
2157         if el not in res:
2158             res.append(el)
2159     return res
2160
2161
2162 def _htmlentity_transform(entity_with_semicolon):
2163     """Transforms an HTML entity to a character."""
2164     entity = entity_with_semicolon[:-1]
2165
2166     # Known non-numeric HTML entity
2167     if entity in compat_html_entities.name2codepoint:
2168         return compat_chr(compat_html_entities.name2codepoint[entity])
2169
2170     # TODO: HTML5 allows entities without a semicolon. For example,
2171     # '&Eacuteric' should be decoded as 'Éric'.
2172     if entity_with_semicolon in compat_html_entities_html5:
2173         return compat_html_entities_html5[entity_with_semicolon]
2174
2175     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2176     if mobj is not None:
2177         numstr = mobj.group(1)
2178         if numstr.startswith('x'):
2179             base = 16
2180             numstr = '0%s' % numstr
2181         else:
2182             base = 10
2183         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2184         try:
2185             return compat_chr(int(numstr, base))
2186         except ValueError:
2187             pass
2188
2189     # Unknown entity in name, return its literal representation
2190     return '&%s;' % entity
2191
2192
2193 def unescapeHTML(s):
2194     if s is None:
2195         return None
2196     assert type(s) == compat_str
2197
2198     return re.sub(
2199         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2200
2201
2202 def get_subprocess_encoding():
2203     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2204         # For subprocess calls, encode with locale encoding
2205         # Refer to http://stackoverflow.com/a/9951851/35070
2206         encoding = preferredencoding()
2207     else:
2208         encoding = sys.getfilesystemencoding()
2209     if encoding is None:
2210         encoding = 'utf-8'
2211     return encoding
2212
2213
2214 def encodeFilename(s, for_subprocess=False):
2215     """
2216     @param s The name of the file
2217     """
2218
2219     assert type(s) == compat_str
2220
2221     # Python 3 has a Unicode API
2222     if sys.version_info >= (3, 0):
2223         return s
2224
2225     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2226     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2227     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2228     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2229         return s
2230
2231     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2232     if sys.platform.startswith('java'):
2233         return s
2234
2235     return s.encode(get_subprocess_encoding(), 'ignore')
2236
2237
2238 def decodeFilename(b, for_subprocess=False):
2239
2240     if sys.version_info >= (3, 0):
2241         return b
2242
2243     if not isinstance(b, bytes):
2244         return b
2245
2246     return b.decode(get_subprocess_encoding(), 'ignore')
2247
2248
2249 def encodeArgument(s):
2250     if not isinstance(s, compat_str):
2251         # Legacy code that uses byte strings
2252         # Uncomment the following line after fixing all post processors
2253         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2254         s = s.decode('ascii')
2255     return encodeFilename(s, True)
2256
2257
2258 def decodeArgument(b):
2259     return decodeFilename(b, True)
2260
2261
2262 def decodeOption(optval):
2263     if optval is None:
2264         return optval
2265     if isinstance(optval, bytes):
2266         optval = optval.decode(preferredencoding())
2267
2268     assert isinstance(optval, compat_str)
2269     return optval
2270
2271
2272 def formatSeconds(secs):
2273     if secs > 3600:
2274         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2275     elif secs > 60:
2276         return '%d:%02d' % (secs // 60, secs % 60)
2277     else:
2278         return '%d' % secs
2279
2280
2281 def make_HTTPS_handler(params, **kwargs):
2282     opts_no_check_certificate = params.get('nocheckcertificate', False)
2283     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2284         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2285         if opts_no_check_certificate:
2286             context.check_hostname = False
2287             context.verify_mode = ssl.CERT_NONE
2288         try:
2289             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2290         except TypeError:
2291             # Python 2.7.8
2292             # (create_default_context present but HTTPSHandler has no context=)
2293             pass
2294
2295     if sys.version_info < (3, 2):
2296         return YoutubeDLHTTPSHandler(params, **kwargs)
2297     else:  # Python < 3.4
2298         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2299         context.verify_mode = (ssl.CERT_NONE
2300                                if opts_no_check_certificate
2301                                else ssl.CERT_REQUIRED)
2302         context.set_default_verify_paths()
2303         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2304
2305
2306 def bug_reports_message():
2307     if ytdl_is_updateable():
2308         update_cmd = 'type  youtube-dl -U  to update'
2309     else:
2310         update_cmd = 'see  https://yt-dl.org/update  on how to update'
2311     msg = '; please report this issue on https://yt-dl.org/bug .'
2312     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2313     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2314     return msg
2315
2316
2317 class YoutubeDLError(Exception):
2318     """Base exception for YoutubeDL errors."""
2319     pass
2320
2321
2322 class ExtractorError(YoutubeDLError):
2323     """Error during info extraction."""
2324
2325     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2326         """ tb, if given, is the original traceback (so that it can be printed out).
2327         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2328         """
2329
2330         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2331             expected = True
2332         if video_id is not None:
2333             msg = video_id + ': ' + msg
2334         if cause:
2335             msg += ' (caused by %r)' % cause
2336         if not expected:
2337             msg += bug_reports_message()
2338         super(ExtractorError, self).__init__(msg)
2339
2340         self.traceback = tb
2341         self.exc_info = sys.exc_info()  # preserve original exception
2342         self.cause = cause
2343         self.video_id = video_id
2344
2345     def format_traceback(self):
2346         if self.traceback is None:
2347             return None
2348         return ''.join(traceback.format_tb(self.traceback))
2349
2350
2351 class UnsupportedError(ExtractorError):
2352     def __init__(self, url):
2353         super(UnsupportedError, self).__init__(
2354             'Unsupported URL: %s' % url, expected=True)
2355         self.url = url
2356
2357
2358 class RegexNotFoundError(ExtractorError):
2359     """Error when a regex didn't match"""
2360     pass
2361
2362
2363 class GeoRestrictedError(ExtractorError):
2364     """Geographic restriction Error exception.
2365
2366     This exception may be thrown when a video is not available from your
2367     geographic location due to geographic restrictions imposed by a website.
2368     """
2369     def __init__(self, msg, countries=None):
2370         super(GeoRestrictedError, self).__init__(msg, expected=True)
2371         self.msg = msg
2372         self.countries = countries
2373
2374
2375 class DownloadError(YoutubeDLError):
2376     """Download Error exception.
2377
2378     This exception may be thrown by FileDownloader objects if they are not
2379     configured to continue on errors. They will contain the appropriate
2380     error message.
2381     """
2382
2383     def __init__(self, msg, exc_info=None):
2384         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2385         super(DownloadError, self).__init__(msg)
2386         self.exc_info = exc_info
2387
2388
2389 class SameFileError(YoutubeDLError):
2390     """Same File exception.
2391
2392     This exception will be thrown by FileDownloader objects if they detect
2393     multiple files would have to be downloaded to the same file on disk.
2394     """
2395     pass
2396
2397
2398 class PostProcessingError(YoutubeDLError):
2399     """Post Processing exception.
2400
2401     This exception may be raised by PostProcessor's .run() method to
2402     indicate an error in the postprocessing task.
2403     """
2404
2405     def __init__(self, msg):
2406         super(PostProcessingError, self).__init__(msg)
2407         self.msg = msg
2408
2409
2410 class MaxDownloadsReached(YoutubeDLError):
2411     """ --max-downloads limit has been reached. """
2412     pass
2413
2414
2415 class UnavailableVideoError(YoutubeDLError):
2416     """Unavailable Format exception.
2417
2418     This exception will be thrown when a video is requested
2419     in a format that is not available for that video.
2420     """
2421     pass
2422
2423
2424 class ContentTooShortError(YoutubeDLError):
2425     """Content Too Short exception.
2426
2427     This exception may be raised by FileDownloader objects when a file they
2428     download is too small for what the server announced first, indicating
2429     the connection was probably interrupted.
2430     """
2431
2432     def __init__(self, downloaded, expected):
2433         super(ContentTooShortError, self).__init__(
2434             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2435         )
2436         # Both in bytes
2437         self.downloaded = downloaded
2438         self.expected = expected
2439
2440
2441 class XAttrMetadataError(YoutubeDLError):
2442     def __init__(self, code=None, msg='Unknown error'):
2443         super(XAttrMetadataError, self).__init__(msg)
2444         self.code = code
2445         self.msg = msg
2446
2447         # Parsing code and msg
2448         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2449                 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2450             self.reason = 'NO_SPACE'
2451         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2452             self.reason = 'VALUE_TOO_LONG'
2453         else:
2454             self.reason = 'NOT_SUPPORTED'
2455
2456
2457 class XAttrUnavailableError(YoutubeDLError):
2458     pass
2459
2460
2461 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2462     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2463     # expected HTTP responses to meet HTTP/1.0 or later (see also
2464     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2465     if sys.version_info < (3, 0):
2466         kwargs['strict'] = True
2467     hc = http_class(*args, **compat_kwargs(kwargs))
2468     source_address = ydl_handler._params.get('source_address')
2469
2470     if source_address is not None:
2471         # This is to workaround _create_connection() from socket where it will try all
2472         # address data from getaddrinfo() including IPv6. This filters the result from
2473         # getaddrinfo() based on the source_address value.
2474         # This is based on the cpython socket.create_connection() function.
2475         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2476         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2477             host, port = address
2478             err = None
2479             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2480             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2481             ip_addrs = [addr for addr in addrs if addr[0] == af]
2482             if addrs and not ip_addrs:
2483                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2484                 raise socket.error(
2485                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2486                     % (ip_version, source_address[0]))
2487             for res in ip_addrs:
2488                 af, socktype, proto, canonname, sa = res
2489                 sock = None
2490                 try:
2491                     sock = socket.socket(af, socktype, proto)
2492                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2493                         sock.settimeout(timeout)
2494                     sock.bind(source_address)
2495                     sock.connect(sa)
2496                     err = None  # Explicitly break reference cycle
2497                     return sock
2498                 except socket.error as _:
2499                     err = _
2500                     if sock is not None:
2501                         sock.close()
2502             if err is not None:
2503                 raise err
2504             else:
2505                 raise socket.error('getaddrinfo returns an empty list')
2506         if hasattr(hc, '_create_connection'):
2507             hc._create_connection = _create_connection
2508         sa = (source_address, 0)
2509         if hasattr(hc, 'source_address'):  # Python 2.7+
2510             hc.source_address = sa
2511         else:  # Python 2.6
2512             def _hc_connect(self, *args, **kwargs):
2513                 sock = _create_connection(
2514                     (self.host, self.port), self.timeout, sa)
2515                 if is_https:
2516                     self.sock = ssl.wrap_socket(
2517                         sock, self.key_file, self.cert_file,
2518                         ssl_version=ssl.PROTOCOL_TLSv1)
2519                 else:
2520                     self.sock = sock
2521             hc.connect = functools.partial(_hc_connect, hc)
2522
2523     return hc
2524
2525
2526 def handle_youtubedl_headers(headers):
2527     filtered_headers = headers
2528
2529     if 'Youtubedl-no-compression' in filtered_headers:
2530         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2531         del filtered_headers['Youtubedl-no-compression']
2532
2533     return filtered_headers
2534
2535
2536 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2537     """Handler for HTTP requests and responses.
2538
2539     This class, when installed with an OpenerDirector, automatically adds
2540     the standard headers to every HTTP request and handles gzipped and
2541     deflated responses from web servers. If compression is to be avoided in
2542     a particular request, the original request in the program code only has
2543     to include the HTTP header "Youtubedl-no-compression", which will be
2544     removed before making the real request.
2545
2546     Part of this code was copied from:
2547
2548     http://techknack.net/python-urllib2-handlers/
2549
2550     Andrew Rowls, the author of that code, agreed to release it to the
2551     public domain.
2552     """
2553
2554     def __init__(self, params, *args, **kwargs):
2555         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2556         self._params = params
2557
2558     def http_open(self, req):
2559         conn_class = compat_http_client.HTTPConnection
2560
2561         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2562         if socks_proxy:
2563             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2564             del req.headers['Ytdl-socks-proxy']
2565
2566         return self.do_open(functools.partial(
2567             _create_http_connection, self, conn_class, False),
2568             req)
2569
2570     @staticmethod
2571     def deflate(data):
2572         try:
2573             return zlib.decompress(data, -zlib.MAX_WBITS)
2574         except zlib.error:
2575             return zlib.decompress(data)
2576
2577     def http_request(self, req):
2578         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2579         # always respected by websites, some tend to give out URLs with non percent-encoded
2580         # non-ASCII characters (see telemb.py, ard.py [#3412])
2581         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2582         # To work around aforementioned issue we will replace request's original URL with
2583         # percent-encoded one
2584         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2585         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2586         url = req.get_full_url()
2587         url_escaped = escape_url(url)
2588
2589         # Substitute URL if any change after escaping
2590         if url != url_escaped:
2591             req = update_Request(req, url=url_escaped)
2592
2593         for h, v in std_headers.items():
2594             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2595             # The dict keys are capitalized because of this bug by urllib
2596             if h.capitalize() not in req.headers:
2597                 req.add_header(h, v)
2598
2599         req.headers = handle_youtubedl_headers(req.headers)
2600
2601         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2602             # Python 2.6 is brain-dead when it comes to fragments
2603             req._Request__original = req._Request__original.partition('#')[0]
2604             req._Request__r_type = req._Request__r_type.partition('#')[0]
2605
2606         return req
2607
2608     def http_response(self, req, resp):
2609         old_resp = resp
2610         # gzip
2611         if resp.headers.get('Content-encoding', '') == 'gzip':
2612             content = resp.read()
2613             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2614             try:
2615                 uncompressed = io.BytesIO(gz.read())
2616             except IOError as original_ioerror:
2617                 # There may be junk add the end of the file
2618                 # See http://stackoverflow.com/q/4928560/35070 for details
2619                 for i in range(1, 1024):
2620                     try:
2621                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2622                         uncompressed = io.BytesIO(gz.read())
2623                     except IOError:
2624                         continue
2625                     break
2626                 else:
2627                     raise original_ioerror
2628             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2629             resp.msg = old_resp.msg
2630             del resp.headers['Content-encoding']
2631         # deflate
2632         if resp.headers.get('Content-encoding', '') == 'deflate':
2633             gz = io.BytesIO(self.deflate(resp.read()))
2634             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2635             resp.msg = old_resp.msg
2636             del resp.headers['Content-encoding']
2637         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2638         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2639         if 300 <= resp.code < 400:
2640             location = resp.headers.get('Location')
2641             if location:
2642                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2643                 if sys.version_info >= (3, 0):
2644                     location = location.encode('iso-8859-1').decode('utf-8')
2645                 else:
2646                     location = location.decode('utf-8')
2647                 location_escaped = escape_url(location)
2648                 if location != location_escaped:
2649                     del resp.headers['Location']
2650                     if sys.version_info < (3, 0):
2651                         location_escaped = location_escaped.encode('utf-8')
2652                     resp.headers['Location'] = location_escaped
2653         return resp
2654
2655     https_request = http_request
2656     https_response = http_response
2657
2658
2659 def make_socks_conn_class(base_class, socks_proxy):
2660     assert issubclass(base_class, (
2661         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2662
2663     url_components = compat_urlparse.urlparse(socks_proxy)
2664     if url_components.scheme.lower() == 'socks5':
2665         socks_type = ProxyType.SOCKS5
2666     elif url_components.scheme.lower() in ('socks', 'socks4'):
2667         socks_type = ProxyType.SOCKS4
2668     elif url_components.scheme.lower() == 'socks4a':
2669         socks_type = ProxyType.SOCKS4A
2670
2671     def unquote_if_non_empty(s):
2672         if not s:
2673             return s
2674         return compat_urllib_parse_unquote_plus(s)
2675
2676     proxy_args = (
2677         socks_type,
2678         url_components.hostname, url_components.port or 1080,
2679         True,  # Remote DNS
2680         unquote_if_non_empty(url_components.username),
2681         unquote_if_non_empty(url_components.password),
2682     )
2683
2684     class SocksConnection(base_class):
2685         def connect(self):
2686             self.sock = sockssocket()
2687             self.sock.setproxy(*proxy_args)
2688             if type(self.timeout) in (int, float):
2689                 self.sock.settimeout(self.timeout)
2690             self.sock.connect((self.host, self.port))
2691
2692             if isinstance(self, compat_http_client.HTTPSConnection):
2693                 if hasattr(self, '_context'):  # Python > 2.6
2694                     self.sock = self._context.wrap_socket(
2695                         self.sock, server_hostname=self.host)
2696                 else:
2697                     self.sock = ssl.wrap_socket(self.sock)
2698
2699     return SocksConnection
2700
2701
2702 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2703     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2704         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2705         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2706         self._params = params
2707
2708     def https_open(self, req):
2709         kwargs = {}
2710         conn_class = self._https_conn_class
2711
2712         if hasattr(self, '_context'):  # python > 2.6
2713             kwargs['context'] = self._context
2714         if hasattr(self, '_check_hostname'):  # python 3.x
2715             kwargs['check_hostname'] = self._check_hostname
2716
2717         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2718         if socks_proxy:
2719             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2720             del req.headers['Ytdl-socks-proxy']
2721
2722         return self.do_open(functools.partial(
2723             _create_http_connection, self, conn_class, True),
2724             req, **kwargs)
2725
2726
2727 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2728     _HTTPONLY_PREFIX = '#HttpOnly_'
2729
2730     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2731         # Store session cookies with `expires` set to 0 instead of an empty
2732         # string
2733         for cookie in self:
2734             if cookie.expires is None:
2735                 cookie.expires = 0
2736         compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
2737
2738     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2739         """Load cookies from a file."""
2740         if filename is None:
2741             if self.filename is not None:
2742                 filename = self.filename
2743             else:
2744                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2745
2746         cf = io.StringIO()
2747         with open(filename) as f:
2748             for line in f:
2749                 if line.startswith(self._HTTPONLY_PREFIX):
2750                     line = line[len(self._HTTPONLY_PREFIX):]
2751                 cf.write(compat_str(line))
2752         cf.seek(0)
2753         self._really_load(cf, filename, ignore_discard, ignore_expires)
2754         # Session cookies are denoted by either `expires` field set to
2755         # an empty string or 0. MozillaCookieJar only recognizes the former
2756         # (see [1]). So we need force the latter to be recognized as session
2757         # cookies on our own.
2758         # Session cookies may be important for cookies-based authentication,
2759         # e.g. usually, when user does not check 'Remember me' check box while
2760         # logging in on a site, some important cookies are stored as session
2761         # cookies so that not recognizing them will result in failed login.
2762         # 1. https://bugs.python.org/issue17164
2763         for cookie in self:
2764             # Treat `expires=0` cookies as session cookies
2765             if cookie.expires == 0:
2766                 cookie.expires = None
2767                 cookie.discard = True
2768
2769
2770 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2771     def __init__(self, cookiejar=None):
2772         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2773
2774     def http_response(self, request, response):
2775         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2776         # characters in Set-Cookie HTTP header of last response (see
2777         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2778         # In order to at least prevent crashing we will percent encode Set-Cookie
2779         # header before HTTPCookieProcessor starts processing it.
2780         # if sys.version_info < (3, 0) and response.headers:
2781         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2782         #         set_cookie = response.headers.get(set_cookie_header)
2783         #         if set_cookie:
2784         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2785         #             if set_cookie != set_cookie_escaped:
2786         #                 del response.headers[set_cookie_header]
2787         #                 response.headers[set_cookie_header] = set_cookie_escaped
2788         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2789
2790     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2791     https_response = http_response
2792
2793
2794 def extract_timezone(date_str):
2795     m = re.search(
2796         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2797         date_str)
2798     if not m:
2799         timezone = datetime.timedelta()
2800     else:
2801         date_str = date_str[:-len(m.group('tz'))]
2802         if not m.group('sign'):
2803             timezone = datetime.timedelta()
2804         else:
2805             sign = 1 if m.group('sign') == '+' else -1
2806             timezone = datetime.timedelta(
2807                 hours=sign * int(m.group('hours')),
2808                 minutes=sign * int(m.group('minutes')))
2809     return timezone, date_str
2810
2811
2812 def parse_iso8601(date_str, delimiter='T', timezone=None):
2813     """ Return a UNIX timestamp from the given date """
2814
2815     if date_str is None:
2816         return None
2817
2818     date_str = re.sub(r'\.[0-9]+', '', date_str)
2819
2820     if timezone is None:
2821         timezone, date_str = extract_timezone(date_str)
2822
2823     try:
2824         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2825         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2826         return calendar.timegm(dt.timetuple())
2827     except ValueError:
2828         pass
2829
2830
2831 def date_formats(day_first=True):
2832     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2833
2834
2835 def unified_strdate(date_str, day_first=True):
2836     """Return a string with the date in the format YYYYMMDD"""
2837
2838     if date_str is None:
2839         return None
2840     upload_date = None
2841     # Replace commas
2842     date_str = date_str.replace(',', ' ')
2843     # Remove AM/PM + timezone
2844     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2845     _, date_str = extract_timezone(date_str)
2846
2847     for expression in date_formats(day_first):
2848         try:
2849             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2850         except ValueError:
2851             pass
2852     if upload_date is None:
2853         timetuple = email.utils.parsedate_tz(date_str)
2854         if timetuple:
2855             try:
2856                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2857             except ValueError:
2858                 pass
2859     if upload_date is not None:
2860         return compat_str(upload_date)
2861
2862
2863 def unified_timestamp(date_str, day_first=True):
2864     if date_str is None:
2865         return None
2866
2867     date_str = re.sub(r'[,|]', '', date_str)
2868
2869     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2870     timezone, date_str = extract_timezone(date_str)
2871
2872     # Remove AM/PM + timezone
2873     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2874
2875     # Remove unrecognized timezones from ISO 8601 alike timestamps
2876     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2877     if m:
2878         date_str = date_str[:-len(m.group('tz'))]
2879
2880     # Python only supports microseconds, so remove nanoseconds
2881     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2882     if m:
2883         date_str = m.group(1)
2884
2885     for expression in date_formats(day_first):
2886         try:
2887             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2888             return calendar.timegm(dt.timetuple())
2889         except ValueError:
2890             pass
2891     timetuple = email.utils.parsedate_tz(date_str)
2892     if timetuple:
2893         return calendar.timegm(timetuple) + pm_delta * 3600
2894
2895
2896 def determine_ext(url, default_ext='unknown_video'):
2897     if url is None or '.' not in url:
2898         return default_ext
2899     guess = url.partition('?')[0].rpartition('.')[2]
2900     if re.match(r'^[A-Za-z0-9]+$', guess):
2901         return guess
2902     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2903     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
2904         return guess.rstrip('/')
2905     else:
2906         return default_ext
2907
2908
2909 def subtitles_filename(filename, sub_lang, sub_format):
2910     return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
2911
2912
2913 def date_from_str(date_str):
2914     """
2915     Return a datetime object from a string in the format YYYYMMDD or
2916     (now|today)[+-][0-9](day|week|month|year)(s)?"""
2917     today = datetime.date.today()
2918     if date_str in ('now', 'today'):
2919         return today
2920     if date_str == 'yesterday':
2921         return today - datetime.timedelta(days=1)
2922     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
2923     if match is not None:
2924         sign = match.group('sign')
2925         time = int(match.group('time'))
2926         if sign == '-':
2927             time = -time
2928         unit = match.group('unit')
2929         # A bad approximation?
2930         if unit == 'month':
2931             unit = 'day'
2932             time *= 30
2933         elif unit == 'year':
2934             unit = 'day'
2935             time *= 365
2936         unit += 's'
2937         delta = datetime.timedelta(**{unit: time})
2938         return today + delta
2939     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
2940
2941
2942 def hyphenate_date(date_str):
2943     """
2944     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
2945     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
2946     if match is not None:
2947         return '-'.join(match.groups())
2948     else:
2949         return date_str
2950
2951
2952 class DateRange(object):
2953     """Represents a time interval between two dates"""
2954
2955     def __init__(self, start=None, end=None):
2956         """start and end must be strings in the format accepted by date"""
2957         if start is not None:
2958             self.start = date_from_str(start)
2959         else:
2960             self.start = datetime.datetime.min.date()
2961         if end is not None:
2962             self.end = date_from_str(end)
2963         else:
2964             self.end = datetime.datetime.max.date()
2965         if self.start > self.end:
2966             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
2967
2968     @classmethod
2969     def day(cls, day):
2970         """Returns a range that only contains the given day"""
2971         return cls(day, day)
2972
2973     def __contains__(self, date):
2974         """Check if the date is in the range"""
2975         if not isinstance(date, datetime.date):
2976             date = date_from_str(date)
2977         return self.start <= date <= self.end
2978
2979     def __str__(self):
2980         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
2981
2982
2983 def platform_name():
2984     """ Returns the platform name as a compat_str """
2985     res = platform.platform()
2986     if isinstance(res, bytes):
2987         res = res.decode(preferredencoding())
2988
2989     assert isinstance(res, compat_str)
2990     return res
2991
2992
2993 def _windows_write_string(s, out):
2994     """ Returns True if the string was written using special methods,
2995     False if it has yet to be written out."""
2996     # Adapted from http://stackoverflow.com/a/3259271/35070
2997
2998     import ctypes
2999     import ctypes.wintypes
3000
3001     WIN_OUTPUT_IDS = {
3002         1: -11,
3003         2: -12,
3004     }
3005
3006     try:
3007         fileno = out.fileno()
3008     except AttributeError:
3009         # If the output stream doesn't have a fileno, it's virtual
3010         return False
3011     except io.UnsupportedOperation:
3012         # Some strange Windows pseudo files?
3013         return False
3014     if fileno not in WIN_OUTPUT_IDS:
3015         return False
3016
3017     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3018         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3019         ('GetStdHandle', ctypes.windll.kernel32))
3020     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3021
3022     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3023         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3024         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3025         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3026     written = ctypes.wintypes.DWORD(0)
3027
3028     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3029     FILE_TYPE_CHAR = 0x0002
3030     FILE_TYPE_REMOTE = 0x8000
3031     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3032         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3033         ctypes.POINTER(ctypes.wintypes.DWORD))(
3034         ('GetConsoleMode', ctypes.windll.kernel32))
3035     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3036
3037     def not_a_console(handle):
3038         if handle == INVALID_HANDLE_VALUE or handle is None:
3039             return True
3040         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3041                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3042
3043     if not_a_console(h):
3044         return False
3045
3046     def next_nonbmp_pos(s):
3047         try:
3048             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3049         except StopIteration:
3050             return len(s)
3051
3052     while s:
3053         count = min(next_nonbmp_pos(s), 1024)
3054
3055         ret = WriteConsoleW(
3056             h, s, count if count else 2, ctypes.byref(written), None)
3057         if ret == 0:
3058             raise OSError('Failed to write string')
3059         if not count:  # We just wrote a non-BMP character
3060             assert written.value == 2
3061             s = s[1:]
3062         else:
3063             assert written.value > 0
3064             s = s[written.value:]
3065     return True
3066
3067
3068 def write_string(s, out=None, encoding=None):
3069     if out is None:
3070         out = sys.stderr
3071     assert type(s) == compat_str
3072
3073     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3074         if _windows_write_string(s, out):
3075             return
3076
3077     if ('b' in getattr(out, 'mode', '')
3078             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3079         byt = s.encode(encoding or preferredencoding(), 'ignore')
3080         out.write(byt)
3081     elif hasattr(out, 'buffer'):
3082         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3083         byt = s.encode(enc, 'ignore')
3084         out.buffer.write(byt)
3085     else:
3086         out.write(s)
3087     out.flush()
3088
3089
3090 def bytes_to_intlist(bs):
3091     if not bs:
3092         return []
3093     if isinstance(bs[0], int):  # Python 3
3094         return list(bs)
3095     else:
3096         return [ord(c) for c in bs]
3097
3098
3099 def intlist_to_bytes(xs):
3100     if not xs:
3101         return b''
3102     return compat_struct_pack('%dB' % len(xs), *xs)
3103
3104
3105 # Cross-platform file locking
3106 if sys.platform == 'win32':
3107     import ctypes.wintypes
3108     import msvcrt
3109
3110     class OVERLAPPED(ctypes.Structure):
3111         _fields_ = [
3112             ('Internal', ctypes.wintypes.LPVOID),
3113             ('InternalHigh', ctypes.wintypes.LPVOID),
3114             ('Offset', ctypes.wintypes.DWORD),
3115             ('OffsetHigh', ctypes.wintypes.DWORD),
3116             ('hEvent', ctypes.wintypes.HANDLE),
3117         ]
3118
3119     kernel32 = ctypes.windll.kernel32
3120     LockFileEx = kernel32.LockFileEx
3121     LockFileEx.argtypes = [
3122         ctypes.wintypes.HANDLE,     # hFile
3123         ctypes.wintypes.DWORD,      # dwFlags
3124         ctypes.wintypes.DWORD,      # dwReserved
3125         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3126         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3127         ctypes.POINTER(OVERLAPPED)  # Overlapped
3128     ]
3129     LockFileEx.restype = ctypes.wintypes.BOOL
3130     UnlockFileEx = kernel32.UnlockFileEx
3131     UnlockFileEx.argtypes = [
3132         ctypes.wintypes.HANDLE,     # hFile
3133         ctypes.wintypes.DWORD,      # dwReserved
3134         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3135         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3136         ctypes.POINTER(OVERLAPPED)  # Overlapped
3137     ]
3138     UnlockFileEx.restype = ctypes.wintypes.BOOL
3139     whole_low = 0xffffffff
3140     whole_high = 0x7fffffff
3141
3142     def _lock_file(f, exclusive):
3143         overlapped = OVERLAPPED()
3144         overlapped.Offset = 0
3145         overlapped.OffsetHigh = 0
3146         overlapped.hEvent = 0
3147         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3148         handle = msvcrt.get_osfhandle(f.fileno())
3149         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3150                           whole_low, whole_high, f._lock_file_overlapped_p):
3151             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3152
3153     def _unlock_file(f):
3154         assert f._lock_file_overlapped_p
3155         handle = msvcrt.get_osfhandle(f.fileno())
3156         if not UnlockFileEx(handle, 0,
3157                             whole_low, whole_high, f._lock_file_overlapped_p):
3158             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3159
3160 else:
3161     # Some platforms, such as Jython, is missing fcntl
3162     try:
3163         import fcntl
3164
3165         def _lock_file(f, exclusive):
3166             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3167
3168         def _unlock_file(f):
3169             fcntl.flock(f, fcntl.LOCK_UN)
3170     except ImportError:
3171         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3172
3173         def _lock_file(f, exclusive):
3174             raise IOError(UNSUPPORTED_MSG)
3175
3176         def _unlock_file(f):
3177             raise IOError(UNSUPPORTED_MSG)
3178
3179
3180 class locked_file(object):
3181     def __init__(self, filename, mode, encoding=None):
3182         assert mode in ['r', 'a', 'w']
3183         self.f = io.open(filename, mode, encoding=encoding)
3184         self.mode = mode
3185
3186     def __enter__(self):
3187         exclusive = self.mode != 'r'
3188         try:
3189             _lock_file(self.f, exclusive)
3190         except IOError:
3191             self.f.close()
3192             raise
3193         return self
3194
3195     def __exit__(self, etype, value, traceback):
3196         try:
3197             _unlock_file(self.f)
3198         finally:
3199             self.f.close()
3200
3201     def __iter__(self):
3202         return iter(self.f)
3203
3204     def write(self, *args):
3205         return self.f.write(*args)
3206
3207     def read(self, *args):
3208         return self.f.read(*args)
3209
3210
3211 def get_filesystem_encoding():
3212     encoding = sys.getfilesystemencoding()
3213     return encoding if encoding is not None else 'utf-8'
3214
3215
3216 def shell_quote(args):
3217     quoted_args = []
3218     encoding = get_filesystem_encoding()
3219     for a in args:
3220         if isinstance(a, bytes):
3221             # We may get a filename encoded with 'encodeFilename'
3222             a = a.decode(encoding)
3223         quoted_args.append(compat_shlex_quote(a))
3224     return ' '.join(quoted_args)
3225
3226
3227 def smuggle_url(url, data):
3228     """ Pass additional data in a URL for internal use. """
3229
3230     url, idata = unsmuggle_url(url, {})
3231     data.update(idata)
3232     sdata = compat_urllib_parse_urlencode(
3233         {'__youtubedl_smuggle': json.dumps(data)})
3234     return url + '#' + sdata
3235
3236
3237 def unsmuggle_url(smug_url, default=None):
3238     if '#__youtubedl_smuggle' not in smug_url:
3239         return smug_url, default
3240     url, _, sdata = smug_url.rpartition('#')
3241     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3242     data = json.loads(jsond)
3243     return url, data
3244
3245
3246 def format_bytes(bytes):
3247     if bytes is None:
3248         return 'N/A'
3249     if type(bytes) is str:
3250         bytes = float(bytes)
3251     if bytes == 0.0:
3252         exponent = 0
3253     else:
3254         exponent = int(math.log(bytes, 1024.0))
3255     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3256     converted = float(bytes) / float(1024 ** exponent)
3257     return '%.2f%s' % (converted, suffix)
3258
3259
3260 def lookup_unit_table(unit_table, s):
3261     units_re = '|'.join(re.escape(u) for u in unit_table)
3262     m = re.match(
3263         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3264     if not m:
3265         return None
3266     num_str = m.group('num').replace(',', '.')
3267     mult = unit_table[m.group('unit')]
3268     return int(float(num_str) * mult)
3269
3270
3271 def parse_filesize(s):
3272     if s is None:
3273         return None
3274
3275     # The lower-case forms are of course incorrect and unofficial,
3276     # but we support those too
3277     _UNIT_TABLE = {
3278         'B': 1,
3279         'b': 1,
3280         'bytes': 1,
3281         'KiB': 1024,
3282         'KB': 1000,
3283         'kB': 1024,
3284         'Kb': 1000,
3285         'kb': 1000,
3286         'kilobytes': 1000,
3287         'kibibytes': 1024,
3288         'MiB': 1024 ** 2,
3289         'MB': 1000 ** 2,
3290         'mB': 1024 ** 2,
3291         'Mb': 1000 ** 2,
3292         'mb': 1000 ** 2,
3293         'megabytes': 1000 ** 2,
3294         'mebibytes': 1024 ** 2,
3295         'GiB': 1024 ** 3,
3296         'GB': 1000 ** 3,
3297         'gB': 1024 ** 3,
3298         'Gb': 1000 ** 3,
3299         'gb': 1000 ** 3,
3300         'gigabytes': 1000 ** 3,
3301         'gibibytes': 1024 ** 3,
3302         'TiB': 1024 ** 4,
3303         'TB': 1000 ** 4,
3304         'tB': 1024 ** 4,
3305         'Tb': 1000 ** 4,
3306         'tb': 1000 ** 4,
3307         'terabytes': 1000 ** 4,
3308         'tebibytes': 1024 ** 4,
3309         'PiB': 1024 ** 5,
3310         'PB': 1000 ** 5,
3311         'pB': 1024 ** 5,
3312         'Pb': 1000 ** 5,
3313         'pb': 1000 ** 5,
3314         'petabytes': 1000 ** 5,
3315         'pebibytes': 1024 ** 5,
3316         'EiB': 1024 ** 6,
3317         'EB': 1000 ** 6,
3318         'eB': 1024 ** 6,
3319         'Eb': 1000 ** 6,
3320         'eb': 1000 ** 6,
3321         'exabytes': 1000 ** 6,
3322         'exbibytes': 1024 ** 6,
3323         'ZiB': 1024 ** 7,
3324         'ZB': 1000 ** 7,
3325         'zB': 1024 ** 7,
3326         'Zb': 1000 ** 7,
3327         'zb': 1000 ** 7,
3328         'zettabytes': 1000 ** 7,
3329         'zebibytes': 1024 ** 7,
3330         'YiB': 1024 ** 8,
3331         'YB': 1000 ** 8,
3332         'yB': 1024 ** 8,
3333         'Yb': 1000 ** 8,
3334         'yb': 1000 ** 8,
3335         'yottabytes': 1000 ** 8,
3336         'yobibytes': 1024 ** 8,
3337     }
3338
3339     return lookup_unit_table(_UNIT_TABLE, s)
3340
3341
3342 def parse_count(s):
3343     if s is None:
3344         return None
3345
3346     s = s.strip()
3347
3348     if re.match(r'^[\d,.]+$', s):
3349         return str_to_int(s)
3350
3351     _UNIT_TABLE = {
3352         'k': 1000,
3353         'K': 1000,
3354         'm': 1000 ** 2,
3355         'M': 1000 ** 2,
3356         'kk': 1000 ** 2,
3357         'KK': 1000 ** 2,
3358     }
3359
3360     return lookup_unit_table(_UNIT_TABLE, s)
3361
3362
3363 def parse_resolution(s):
3364     if s is None:
3365         return {}
3366
3367     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3368     if mobj:
3369         return {
3370             'width': int(mobj.group('w')),
3371             'height': int(mobj.group('h')),
3372         }
3373
3374     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3375     if mobj:
3376         return {'height': int(mobj.group(1))}
3377
3378     mobj = re.search(r'\b([48])[kK]\b', s)
3379     if mobj:
3380         return {'height': int(mobj.group(1)) * 540}
3381
3382     return {}
3383
3384
3385 def parse_bitrate(s):
3386     if not isinstance(s, compat_str):
3387         return
3388     mobj = re.search(r'\b(\d+)\s*kbps', s)
3389     if mobj:
3390         return int(mobj.group(1))
3391
3392
3393 def month_by_name(name, lang='en'):
3394     """ Return the number of a month by (locale-independently) English name """
3395
3396     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3397
3398     try:
3399         return month_names.index(name) + 1
3400     except ValueError:
3401         return None
3402
3403
3404 def month_by_abbreviation(abbrev):
3405     """ Return the number of a month by (locale-independently) English
3406         abbreviations """
3407
3408     try:
3409         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3410     except ValueError:
3411         return None
3412
3413
3414 def fix_xml_ampersands(xml_str):
3415     """Replace all the '&' by '&amp;' in XML"""
3416     return re.sub(
3417         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3418         '&amp;',
3419         xml_str)
3420
3421
3422 def setproctitle(title):
3423     assert isinstance(title, compat_str)
3424
3425     # ctypes in Jython is not complete
3426     # http://bugs.jython.org/issue2148
3427     if sys.platform.startswith('java'):
3428         return
3429
3430     try:
3431         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3432     except OSError:
3433         return
3434     except TypeError:
3435         # LoadLibrary in Windows Python 2.7.13 only expects
3436         # a bytestring, but since unicode_literals turns
3437         # every string into a unicode string, it fails.
3438         return
3439     title_bytes = title.encode('utf-8')
3440     buf = ctypes.create_string_buffer(len(title_bytes))
3441     buf.value = title_bytes
3442     try:
3443         libc.prctl(15, buf, 0, 0, 0)
3444     except AttributeError:
3445         return  # Strange libc, just skip this
3446
3447
3448 def remove_start(s, start):
3449     return s[len(start):] if s is not None and s.startswith(start) else s
3450
3451
3452 def remove_end(s, end):
3453     return s[:-len(end)] if s is not None and s.endswith(end) else s
3454
3455
3456 def remove_quotes(s):
3457     if s is None or len(s) < 2:
3458         return s
3459     for quote in ('"', "'", ):
3460         if s[0] == quote and s[-1] == quote:
3461             return s[1:-1]
3462     return s
3463
3464
3465 def url_basename(url):
3466     path = compat_urlparse.urlparse(url).path
3467     return path.strip('/').split('/')[-1]
3468
3469
3470 def base_url(url):
3471     return re.match(r'https?://[^?#&]+/', url).group()
3472
3473
3474 def urljoin(base, path):
3475     if isinstance(path, bytes):
3476         path = path.decode('utf-8')
3477     if not isinstance(path, compat_str) or not path:
3478         return None
3479     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3480         return path
3481     if isinstance(base, bytes):
3482         base = base.decode('utf-8')
3483     if not isinstance(base, compat_str) or not re.match(
3484             r'^(?:https?:)?//', base):
3485         return None
3486     return compat_urlparse.urljoin(base, path)
3487
3488
3489 class HEADRequest(compat_urllib_request.Request):
3490     def get_method(self):
3491         return 'HEAD'
3492
3493
3494 class PUTRequest(compat_urllib_request.Request):
3495     def get_method(self):
3496         return 'PUT'
3497
3498
3499 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3500     if get_attr:
3501         if v is not None:
3502             v = getattr(v, get_attr, None)
3503     if v == '':
3504         v = None
3505     if v is None:
3506         return default
3507     try:
3508         return int(v) * invscale // scale
3509     except (ValueError, TypeError):
3510         return default
3511
3512
3513 def str_or_none(v, default=None):
3514     return default if v is None else compat_str(v)
3515
3516
3517 def str_to_int(int_str):
3518     """ A more relaxed version of int_or_none """
3519     if int_str is None:
3520         return None
3521     int_str = re.sub(r'[,\.\+]', '', int_str)
3522     return int(int_str)
3523
3524
3525 def float_or_none(v, scale=1, invscale=1, default=None):
3526     if v is None:
3527         return default
3528     try:
3529         return float(v) * invscale / scale
3530     except (ValueError, TypeError):
3531         return default
3532
3533
3534 def bool_or_none(v, default=None):
3535     return v if isinstance(v, bool) else default
3536
3537
3538 def strip_or_none(v, default=None):
3539     return v.strip() if isinstance(v, compat_str) else default
3540
3541
3542 def url_or_none(url):
3543     if not url or not isinstance(url, compat_str):
3544         return None
3545     url = url.strip()
3546     return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3547
3548
3549 def parse_duration(s):
3550     if not isinstance(s, compat_basestring):
3551         return None
3552
3553     s = s.strip()
3554
3555     days, hours, mins, secs, ms = [None] * 5
3556     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3557     if m:
3558         days, hours, mins, secs, ms = m.groups()
3559     else:
3560         m = re.match(
3561             r'''(?ix)(?:P?
3562                 (?:
3563                     [0-9]+\s*y(?:ears?)?\s*
3564                 )?
3565                 (?:
3566                     [0-9]+\s*m(?:onths?)?\s*
3567                 )?
3568                 (?:
3569                     [0-9]+\s*w(?:eeks?)?\s*
3570                 )?
3571                 (?:
3572                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3573                 )?
3574                 T)?
3575                 (?:
3576                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3577                 )?
3578                 (?:
3579                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3580                 )?
3581                 (?:
3582                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3583                 )?Z?$''', s)
3584         if m:
3585             days, hours, mins, secs, ms = m.groups()
3586         else:
3587             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3588             if m:
3589                 hours, mins = m.groups()
3590             else:
3591                 return None
3592
3593     duration = 0
3594     if secs:
3595         duration += float(secs)
3596     if mins:
3597         duration += float(mins) * 60
3598     if hours:
3599         duration += float(hours) * 60 * 60
3600     if days:
3601         duration += float(days) * 24 * 60 * 60
3602     if ms:
3603         duration += float(ms)
3604     return duration
3605
3606
3607 def prepend_extension(filename, ext, expected_real_ext=None):
3608     name, real_ext = os.path.splitext(filename)
3609     return (
3610         '{0}.{1}{2}'.format(name, ext, real_ext)
3611         if not expected_real_ext or real_ext[1:] == expected_real_ext
3612         else '{0}.{1}'.format(filename, ext))
3613
3614
3615 def replace_extension(filename, ext, expected_real_ext=None):
3616     name, real_ext = os.path.splitext(filename)
3617     return '{0}.{1}'.format(
3618         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3619         ext)
3620
3621
3622 def check_executable(exe, args=[]):
3623     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3624     args can be a list of arguments for a short output (like -version) """
3625     try:
3626         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3627     except OSError:
3628         return False
3629     return exe
3630
3631
3632 def get_exe_version(exe, args=['--version'],
3633                     version_re=None, unrecognized='present'):
3634     """ Returns the version of the specified executable,
3635     or False if the executable is not present """
3636     try:
3637         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3638         # SIGTTOU if youtube-dl is run in the background.
3639         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3640         out, _ = subprocess.Popen(
3641             [encodeArgument(exe)] + args,
3642             stdin=subprocess.PIPE,
3643             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3644     except OSError:
3645         return False
3646     if isinstance(out, bytes):  # Python 2.x
3647         out = out.decode('ascii', 'ignore')
3648     return detect_exe_version(out, version_re, unrecognized)
3649
3650
3651 def detect_exe_version(output, version_re=None, unrecognized='present'):
3652     assert isinstance(output, compat_str)
3653     if version_re is None:
3654         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3655     m = re.search(version_re, output)
3656     if m:
3657         return m.group(1)
3658     else:
3659         return unrecognized
3660
3661
3662 class PagedList(object):
3663     def __len__(self):
3664         # This is only useful for tests
3665         return len(self.getslice())
3666
3667
3668 class OnDemandPagedList(PagedList):
3669     def __init__(self, pagefunc, pagesize, use_cache=True):
3670         self._pagefunc = pagefunc
3671         self._pagesize = pagesize
3672         self._use_cache = use_cache
3673         if use_cache:
3674             self._cache = {}
3675
3676     def getslice(self, start=0, end=None):
3677         res = []
3678         for pagenum in itertools.count(start // self._pagesize):
3679             firstid = pagenum * self._pagesize
3680             nextfirstid = pagenum * self._pagesize + self._pagesize
3681             if start >= nextfirstid:
3682                 continue
3683
3684             page_results = None
3685             if self._use_cache:
3686                 page_results = self._cache.get(pagenum)
3687             if page_results is None:
3688                 page_results = list(self._pagefunc(pagenum))
3689             if self._use_cache:
3690                 self._cache[pagenum] = page_results
3691
3692             startv = (
3693                 start % self._pagesize
3694                 if firstid <= start < nextfirstid
3695                 else 0)
3696
3697             endv = (
3698                 ((end - 1) % self._pagesize) + 1
3699                 if (end is not None and firstid <= end <= nextfirstid)
3700                 else None)
3701
3702             if startv != 0 or endv is not None:
3703                 page_results = page_results[startv:endv]
3704             res.extend(page_results)
3705
3706             # A little optimization - if current page is not "full", ie. does
3707             # not contain page_size videos then we can assume that this page
3708             # is the last one - there are no more ids on further pages -
3709             # i.e. no need to query again.
3710             if len(page_results) + startv < self._pagesize:
3711                 break
3712
3713             # If we got the whole page, but the next page is not interesting,
3714             # break out early as well
3715             if end == nextfirstid:
3716                 break
3717         return res
3718
3719
3720 class InAdvancePagedList(PagedList):
3721     def __init__(self, pagefunc, pagecount, pagesize):
3722         self._pagefunc = pagefunc
3723         self._pagecount = pagecount
3724         self._pagesize = pagesize
3725
3726     def getslice(self, start=0, end=None):
3727         res = []
3728         start_page = start // self._pagesize
3729         end_page = (
3730             self._pagecount if end is None else (end // self._pagesize + 1))
3731         skip_elems = start - start_page * self._pagesize
3732         only_more = None if end is None else end - start
3733         for pagenum in range(start_page, end_page):
3734             page = list(self._pagefunc(pagenum))
3735             if skip_elems:
3736                 page = page[skip_elems:]
3737                 skip_elems = None
3738             if only_more is not None:
3739                 if len(page) < only_more:
3740                     only_more -= len(page)
3741                 else:
3742                     page = page[:only_more]
3743                     res.extend(page)
3744                     break
3745             res.extend(page)
3746         return res
3747
3748
3749 def uppercase_escape(s):
3750     unicode_escape = codecs.getdecoder('unicode_escape')
3751     return re.sub(
3752         r'\\U[0-9a-fA-F]{8}',
3753         lambda m: unicode_escape(m.group(0))[0],
3754         s)
3755
3756
3757 def lowercase_escape(s):
3758     unicode_escape = codecs.getdecoder('unicode_escape')
3759     return re.sub(
3760         r'\\u[0-9a-fA-F]{4}',
3761         lambda m: unicode_escape(m.group(0))[0],
3762         s)
3763
3764
3765 def escape_rfc3986(s):
3766     """Escape non-ASCII characters as suggested by RFC 3986"""
3767     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3768         s = s.encode('utf-8')
3769     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3770
3771
3772 def escape_url(url):
3773     """Escape URL as suggested by RFC 3986"""
3774     url_parsed = compat_urllib_parse_urlparse(url)
3775     return url_parsed._replace(
3776         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3777         path=escape_rfc3986(url_parsed.path),
3778         params=escape_rfc3986(url_parsed.params),
3779         query=escape_rfc3986(url_parsed.query),
3780         fragment=escape_rfc3986(url_parsed.fragment)
3781     ).geturl()
3782
3783
3784 def read_batch_urls(batch_fd):
3785     def fixup(url):
3786         if not isinstance(url, compat_str):
3787             url = url.decode('utf-8', 'replace')
3788         BOM_UTF8 = '\xef\xbb\xbf'
3789         if url.startswith(BOM_UTF8):
3790             url = url[len(BOM_UTF8):]
3791         url = url.strip()
3792         if url.startswith(('#', ';', ']')):
3793             return False
3794         return url
3795
3796     with contextlib.closing(batch_fd) as fd:
3797         return [url for url in map(fixup, fd) if url]
3798
3799
3800 def urlencode_postdata(*args, **kargs):
3801     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3802
3803
3804 def update_url_query(url, query):
3805     if not query:
3806         return url
3807     parsed_url = compat_urlparse.urlparse(url)
3808     qs = compat_parse_qs(parsed_url.query)
3809     qs.update(query)
3810     return compat_urlparse.urlunparse(parsed_url._replace(
3811         query=compat_urllib_parse_urlencode(qs, True)))
3812
3813
3814 def update_Request(req, url=None, data=None, headers={}, query={}):
3815     req_headers = req.headers.copy()
3816     req_headers.update(headers)
3817     req_data = data or req.data
3818     req_url = update_url_query(url or req.get_full_url(), query)
3819     req_get_method = req.get_method()
3820     if req_get_method == 'HEAD':
3821         req_type = HEADRequest
3822     elif req_get_method == 'PUT':
3823         req_type = PUTRequest
3824     else:
3825         req_type = compat_urllib_request.Request
3826     new_req = req_type(
3827         req_url, data=req_data, headers=req_headers,
3828         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3829     if hasattr(req, 'timeout'):
3830         new_req.timeout = req.timeout
3831     return new_req
3832
3833
3834 def _multipart_encode_impl(data, boundary):
3835     content_type = 'multipart/form-data; boundary=%s' % boundary
3836
3837     out = b''
3838     for k, v in data.items():
3839         out += b'--' + boundary.encode('ascii') + b'\r\n'
3840         if isinstance(k, compat_str):
3841             k = k.encode('utf-8')
3842         if isinstance(v, compat_str):
3843             v = v.encode('utf-8')
3844         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3845         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3846         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3847         if boundary.encode('ascii') in content:
3848             raise ValueError('Boundary overlaps with data')
3849         out += content
3850
3851     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3852
3853     return out, content_type
3854
3855
3856 def multipart_encode(data, boundary=None):
3857     '''
3858     Encode a dict to RFC 7578-compliant form-data
3859
3860     data:
3861         A dict where keys and values can be either Unicode or bytes-like
3862         objects.
3863     boundary:
3864         If specified a Unicode object, it's used as the boundary. Otherwise
3865         a random boundary is generated.
3866
3867     Reference: https://tools.ietf.org/html/rfc7578
3868     '''
3869     has_specified_boundary = boundary is not None
3870
3871     while True:
3872         if boundary is None:
3873             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3874
3875         try:
3876             out, content_type = _multipart_encode_impl(data, boundary)
3877             break
3878         except ValueError:
3879             if has_specified_boundary:
3880                 raise
3881             boundary = None
3882
3883     return out, content_type
3884
3885
3886 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3887     if isinstance(key_or_keys, (list, tuple)):
3888         for key in key_or_keys:
3889             if key not in d or d[key] is None or skip_false_values and not d[key]:
3890                 continue
3891             return d[key]
3892         return default
3893     return d.get(key_or_keys, default)
3894
3895
3896 def try_get(src, getter, expected_type=None):
3897     if not isinstance(getter, (list, tuple)):
3898         getter = [getter]
3899     for get in getter:
3900         try:
3901             v = get(src)
3902         except (AttributeError, KeyError, TypeError, IndexError):
3903             pass
3904         else:
3905             if expected_type is None or isinstance(v, expected_type):
3906                 return v
3907
3908
3909 def merge_dicts(*dicts):
3910     merged = {}
3911     for a_dict in dicts:
3912         for k, v in a_dict.items():
3913             if v is None:
3914                 continue
3915             if (k not in merged
3916                     or (isinstance(v, compat_str) and v
3917                         and isinstance(merged[k], compat_str)
3918                         and not merged[k])):
3919                 merged[k] = v
3920     return merged
3921
3922
3923 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3924     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
3925
3926
3927 US_RATINGS = {
3928     'G': 0,
3929     'PG': 10,
3930     'PG-13': 13,
3931     'R': 16,
3932     'NC': 18,
3933 }
3934
3935
3936 TV_PARENTAL_GUIDELINES = {
3937     'TV-Y': 0,
3938     'TV-Y7': 7,
3939     'TV-G': 0,
3940     'TV-PG': 0,
3941     'TV-14': 14,
3942     'TV-MA': 17,
3943 }
3944
3945
3946 def parse_age_limit(s):
3947     if type(s) == int:
3948         return s if 0 <= s <= 21 else None
3949     if not isinstance(s, compat_basestring):
3950         return None
3951     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3952     if m:
3953         return int(m.group('age'))
3954     if s in US_RATINGS:
3955         return US_RATINGS[s]
3956     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3957     if m:
3958         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3959     return None
3960
3961
3962 def strip_jsonp(code):
3963     return re.sub(
3964         r'''(?sx)^
3965             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3966             (?:\s*&&\s*(?P=func_name))?
3967             \s*\(\s*(?P<callback_data>.*)\);?
3968             \s*?(?://[^\n]*)*$''',
3969         r'\g<callback_data>', code)
3970
3971
3972 def js_to_json(code):
3973     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
3974     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
3975     INTEGER_TABLE = (
3976         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
3977         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
3978     )
3979
3980     def fix_kv(m):
3981         v = m.group(0)
3982         if v in ('true', 'false', 'null'):
3983             return v
3984         elif v.startswith('/*') or v.startswith('//') or v == ',':
3985             return ""
3986
3987         if v[0] in ("'", '"'):
3988             v = re.sub(r'(?s)\\.|"', lambda m: {
3989                 '"': '\\"',
3990                 "\\'": "'",
3991                 '\\\n': '',
3992                 '\\x': '\\u00',
3993             }.get(m.group(0), m.group(0)), v[1:-1])
3994
3995         for regex, base in INTEGER_TABLE:
3996             im = re.match(regex, v)
3997             if im:
3998                 i = int(im.group(1), base)
3999                 return '"%d":' % i if v.endswith(':') else '%d' % i
4000
4001         return '"%s"' % v
4002
4003     return re.sub(r'''(?sx)
4004         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4005         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4006         {comment}|,(?={skip}[\]}}])|
4007         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4008         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4009         [0-9]+(?={skip}:)
4010         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4011
4012
4013 def qualities(quality_ids):
4014     """ Get a numeric quality value out of a list of possible values """
4015     def q(qid):
4016         try:
4017             return quality_ids.index(qid)
4018         except ValueError:
4019             return -1
4020     return q
4021
4022
4023 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4024
4025
4026 def limit_length(s, length):
4027     """ Add ellipses to overly long strings """
4028     if s is None:
4029         return None
4030     ELLIPSES = '...'
4031     if len(s) > length:
4032         return s[:length - len(ELLIPSES)] + ELLIPSES
4033     return s
4034
4035
4036 def version_tuple(v):
4037     return tuple(int(e) for e in re.split(r'[-.]', v))
4038
4039
4040 def is_outdated_version(version, limit, assume_new=True):
4041     if not version:
4042         return not assume_new
4043     try:
4044         return version_tuple(version) < version_tuple(limit)
4045     except ValueError:
4046         return not assume_new
4047
4048
4049 def ytdl_is_updateable():
4050     """ Returns if youtube-dl can be updated with -U """
4051     from zipimport import zipimporter
4052
4053     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4054
4055
4056 def args_to_str(args):
4057     # Get a short string representation for a subprocess command
4058     return ' '.join(compat_shlex_quote(a) for a in args)
4059
4060
4061 def error_to_compat_str(err):
4062     err_str = str(err)
4063     # On python 2 error byte string must be decoded with proper
4064     # encoding rather than ascii
4065     if sys.version_info[0] < 3:
4066         err_str = err_str.decode(preferredencoding())
4067     return err_str
4068
4069
4070 def mimetype2ext(mt):
4071     if mt is None:
4072         return None
4073
4074     ext = {
4075         'audio/mp4': 'm4a',
4076         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4077         # it's the most popular one
4078         'audio/mpeg': 'mp3',
4079     }.get(mt)
4080     if ext is not None:
4081         return ext
4082
4083     _, _, res = mt.rpartition('/')
4084     res = res.split(';')[0].strip().lower()
4085
4086     return {
4087         '3gpp': '3gp',
4088         'smptett+xml': 'tt',
4089         'ttaf+xml': 'dfxp',
4090         'ttml+xml': 'ttml',
4091         'x-flv': 'flv',
4092         'x-mp4-fragmented': 'mp4',
4093         'x-ms-sami': 'sami',
4094         'x-ms-wmv': 'wmv',
4095         'mpegurl': 'm3u8',
4096         'x-mpegurl': 'm3u8',
4097         'vnd.apple.mpegurl': 'm3u8',
4098         'dash+xml': 'mpd',
4099         'f4m+xml': 'f4m',
4100         'hds+xml': 'f4m',
4101         'vnd.ms-sstr+xml': 'ism',
4102         'quicktime': 'mov',
4103         'mp2t': 'ts',
4104     }.get(res, res)
4105
4106
4107 def parse_codecs(codecs_str):
4108     # http://tools.ietf.org/html/rfc6381
4109     if not codecs_str:
4110         return {}
4111     splited_codecs = list(filter(None, map(
4112         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4113     vcodec, acodec = None, None
4114     for full_codec in splited_codecs:
4115         codec = full_codec.split('.')[0]
4116         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4117             if not vcodec:
4118                 vcodec = full_codec
4119         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4120             if not acodec:
4121                 acodec = full_codec
4122         else:
4123             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4124     if not vcodec and not acodec:
4125         if len(splited_codecs) == 2:
4126             return {
4127                 'vcodec': splited_codecs[0],
4128                 'acodec': splited_codecs[1],
4129             }
4130     else:
4131         return {
4132             'vcodec': vcodec or 'none',
4133             'acodec': acodec or 'none',
4134         }
4135     return {}
4136
4137
4138 def urlhandle_detect_ext(url_handle):
4139     getheader = url_handle.headers.get
4140
4141     cd = getheader('Content-Disposition')
4142     if cd:
4143         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4144         if m:
4145             e = determine_ext(m.group('filename'), default_ext=None)
4146             if e:
4147                 return e
4148
4149     return mimetype2ext(getheader('Content-Type'))
4150
4151
4152 def encode_data_uri(data, mime_type):
4153     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4154
4155
4156 def age_restricted(content_limit, age_limit):
4157     """ Returns True iff the content should be blocked """
4158
4159     if age_limit is None:  # No limit set
4160         return False
4161     if content_limit is None:
4162         return False  # Content available for everyone
4163     return age_limit < content_limit
4164
4165
4166 def is_html(first_bytes):
4167     """ Detect whether a file contains HTML by examining its first bytes. """
4168
4169     BOMS = [
4170         (b'\xef\xbb\xbf', 'utf-8'),
4171         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4172         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4173         (b'\xff\xfe', 'utf-16-le'),
4174         (b'\xfe\xff', 'utf-16-be'),
4175     ]
4176     for bom, enc in BOMS:
4177         if first_bytes.startswith(bom):
4178             s = first_bytes[len(bom):].decode(enc, 'replace')
4179             break
4180     else:
4181         s = first_bytes.decode('utf-8', 'replace')
4182
4183     return re.match(r'^\s*<', s)
4184
4185
4186 def determine_protocol(info_dict):
4187     protocol = info_dict.get('protocol')
4188     if protocol is not None:
4189         return protocol
4190
4191     url = info_dict['url']
4192     if url.startswith('rtmp'):
4193         return 'rtmp'
4194     elif url.startswith('mms'):
4195         return 'mms'
4196     elif url.startswith('rtsp'):
4197         return 'rtsp'
4198
4199     ext = determine_ext(url)
4200     if ext == 'm3u8':
4201         return 'm3u8'
4202     elif ext == 'f4m':
4203         return 'f4m'
4204
4205     return compat_urllib_parse_urlparse(url).scheme
4206
4207
4208 def render_table(header_row, data):
4209     """ Render a list of rows, each as a list of values """
4210     table = [header_row] + data
4211     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4212     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4213     return '\n'.join(format_str % tuple(row) for row in table)
4214
4215
4216 def _match_one(filter_part, dct):
4217     COMPARISON_OPERATORS = {
4218         '<': operator.lt,
4219         '<=': operator.le,
4220         '>': operator.gt,
4221         '>=': operator.ge,
4222         '=': operator.eq,
4223         '!=': operator.ne,
4224     }
4225     operator_rex = re.compile(r'''(?x)\s*
4226         (?P<key>[a-z_]+)
4227         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4228         (?:
4229             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4230             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4231             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4232         )
4233         \s*$
4234         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4235     m = operator_rex.search(filter_part)
4236     if m:
4237         op = COMPARISON_OPERATORS[m.group('op')]
4238         actual_value = dct.get(m.group('key'))
4239         if (m.group('quotedstrval') is not None
4240             or m.group('strval') is not None
4241             # If the original field is a string and matching comparisonvalue is
4242             # a number we should respect the origin of the original field
4243             # and process comparison value as a string (see
4244             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4245             or actual_value is not None and m.group('intval') is not None
4246                 and isinstance(actual_value, compat_str)):
4247             if m.group('op') not in ('=', '!='):
4248                 raise ValueError(
4249                     'Operator %s does not support string values!' % m.group('op'))
4250             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4251             quote = m.group('quote')
4252             if quote is not None:
4253                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4254         else:
4255             try:
4256                 comparison_value = int(m.group('intval'))
4257             except ValueError:
4258                 comparison_value = parse_filesize(m.group('intval'))
4259                 if comparison_value is None:
4260                     comparison_value = parse_filesize(m.group('intval') + 'B')
4261                 if comparison_value is None:
4262                     raise ValueError(
4263                         'Invalid integer value %r in filter part %r' % (
4264                             m.group('intval'), filter_part))
4265         if actual_value is None:
4266             return m.group('none_inclusive')
4267         return op(actual_value, comparison_value)
4268
4269     UNARY_OPERATORS = {
4270         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4271         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4272     }
4273     operator_rex = re.compile(r'''(?x)\s*
4274         (?P<op>%s)\s*(?P<key>[a-z_]+)
4275         \s*$
4276         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4277     m = operator_rex.search(filter_part)
4278     if m:
4279         op = UNARY_OPERATORS[m.group('op')]
4280         actual_value = dct.get(m.group('key'))
4281         return op(actual_value)
4282
4283     raise ValueError('Invalid filter part %r' % filter_part)
4284
4285
4286 def match_str(filter_str, dct):
4287     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4288
4289     return all(
4290         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4291
4292
4293 def match_filter_func(filter_str):
4294     def _match_func(info_dict):
4295         if match_str(filter_str, info_dict):
4296             return None
4297         else:
4298             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4299             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4300     return _match_func
4301
4302
4303 def parse_dfxp_time_expr(time_expr):
4304     if not time_expr:
4305         return
4306
4307     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4308     if mobj:
4309         return float(mobj.group('time_offset'))
4310
4311     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4312     if mobj:
4313         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4314
4315
4316 def srt_subtitles_timecode(seconds):
4317     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4318
4319
4320 def dfxp2srt(dfxp_data):
4321     '''
4322     @param dfxp_data A bytes-like object containing DFXP data
4323     @returns A unicode object containing converted SRT data
4324     '''
4325     LEGACY_NAMESPACES = (
4326         (b'http://www.w3.org/ns/ttml', [
4327             b'http://www.w3.org/2004/11/ttaf1',
4328             b'http://www.w3.org/2006/04/ttaf1',
4329             b'http://www.w3.org/2006/10/ttaf1',
4330         ]),
4331         (b'http://www.w3.org/ns/ttml#styling', [
4332             b'http://www.w3.org/ns/ttml#style',
4333         ]),
4334     )
4335
4336     SUPPORTED_STYLING = [
4337         'color',
4338         'fontFamily',
4339         'fontSize',
4340         'fontStyle',
4341         'fontWeight',
4342         'textDecoration'
4343     ]
4344
4345     _x = functools.partial(xpath_with_ns, ns_map={
4346         'xml': 'http://www.w3.org/XML/1998/namespace',
4347         'ttml': 'http://www.w3.org/ns/ttml',
4348         'tts': 'http://www.w3.org/ns/ttml#styling',
4349     })
4350
4351     styles = {}
4352     default_style = {}
4353
4354     class TTMLPElementParser(object):
4355         _out = ''
4356         _unclosed_elements = []
4357         _applied_styles = []
4358
4359         def start(self, tag, attrib):
4360             if tag in (_x('ttml:br'), 'br'):
4361                 self._out += '\n'
4362             else:
4363                 unclosed_elements = []
4364                 style = {}
4365                 element_style_id = attrib.get('style')
4366                 if default_style:
4367                     style.update(default_style)
4368                 if element_style_id:
4369                     style.update(styles.get(element_style_id, {}))
4370                 for prop in SUPPORTED_STYLING:
4371                     prop_val = attrib.get(_x('tts:' + prop))
4372                     if prop_val:
4373                         style[prop] = prop_val
4374                 if style:
4375                     font = ''
4376                     for k, v in sorted(style.items()):
4377                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4378                             continue
4379                         if k == 'color':
4380                             font += ' color="%s"' % v
4381                         elif k == 'fontSize':
4382                             font += ' size="%s"' % v
4383                         elif k == 'fontFamily':
4384                             font += ' face="%s"' % v
4385                         elif k == 'fontWeight' and v == 'bold':
4386                             self._out += '<b>'
4387                             unclosed_elements.append('b')
4388                         elif k == 'fontStyle' and v == 'italic':
4389                             self._out += '<i>'
4390                             unclosed_elements.append('i')
4391                         elif k == 'textDecoration' and v == 'underline':
4392                             self._out += '<u>'
4393                             unclosed_elements.append('u')
4394                     if font:
4395                         self._out += '<font' + font + '>'
4396                         unclosed_elements.append('font')
4397                     applied_style = {}
4398                     if self._applied_styles:
4399                         applied_style.update(self._applied_styles[-1])
4400                     applied_style.update(style)
4401                     self._applied_styles.append(applied_style)
4402                 self._unclosed_elements.append(unclosed_elements)
4403
4404         def end(self, tag):
4405             if tag not in (_x('ttml:br'), 'br'):
4406                 unclosed_elements = self._unclosed_elements.pop()
4407                 for element in reversed(unclosed_elements):
4408                     self._out += '</%s>' % element
4409                 if unclosed_elements and self._applied_styles:
4410                     self._applied_styles.pop()
4411
4412         def data(self, data):
4413             self._out += data
4414
4415         def close(self):
4416             return self._out.strip()
4417
4418     def parse_node(node):
4419         target = TTMLPElementParser()
4420         parser = xml.etree.ElementTree.XMLParser(target=target)
4421         parser.feed(xml.etree.ElementTree.tostring(node))
4422         return parser.close()
4423
4424     for k, v in LEGACY_NAMESPACES:
4425         for ns in v:
4426             dfxp_data = dfxp_data.replace(ns, k)
4427
4428     dfxp = compat_etree_fromstring(dfxp_data)
4429     out = []
4430     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4431
4432     if not paras:
4433         raise ValueError('Invalid dfxp/TTML subtitle')
4434
4435     repeat = False
4436     while True:
4437         for style in dfxp.findall(_x('.//ttml:style')):
4438             style_id = style.get('id') or style.get(_x('xml:id'))
4439             if not style_id:
4440                 continue
4441             parent_style_id = style.get('style')
4442             if parent_style_id:
4443                 if parent_style_id not in styles:
4444                     repeat = True
4445                     continue
4446                 styles[style_id] = styles[parent_style_id].copy()
4447             for prop in SUPPORTED_STYLING:
4448                 prop_val = style.get(_x('tts:' + prop))
4449                 if prop_val:
4450                     styles.setdefault(style_id, {})[prop] = prop_val
4451         if repeat:
4452             repeat = False
4453         else:
4454             break
4455
4456     for p in ('body', 'div'):
4457         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4458         if ele is None:
4459             continue
4460         style = styles.get(ele.get('style'))
4461         if not style:
4462             continue
4463         default_style.update(style)
4464
4465     for para, index in zip(paras, itertools.count(1)):
4466         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4467         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4468         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4469         if begin_time is None:
4470             continue
4471         if not end_time:
4472             if not dur:
4473                 continue
4474             end_time = begin_time + dur
4475         out.append('%d\n%s --> %s\n%s\n\n' % (
4476             index,
4477             srt_subtitles_timecode(begin_time),
4478             srt_subtitles_timecode(end_time),
4479             parse_node(para)))
4480
4481     return ''.join(out)
4482
4483
4484 def cli_option(params, command_option, param):
4485     param = params.get(param)
4486     if param:
4487         param = compat_str(param)
4488     return [command_option, param] if param is not None else []
4489
4490
4491 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4492     param = params.get(param)
4493     if param is None:
4494         return []
4495     assert isinstance(param, bool)
4496     if separator:
4497         return [command_option + separator + (true_value if param else false_value)]
4498     return [command_option, true_value if param else false_value]
4499
4500
4501 def cli_valueless_option(params, command_option, param, expected_value=True):
4502     param = params.get(param)
4503     return [command_option] if param == expected_value else []
4504
4505
4506 def cli_configuration_args(params, param, default=[]):
4507     ex_args = params.get(param)
4508     if ex_args is None:
4509         return default
4510     assert isinstance(ex_args, list)
4511     return ex_args
4512
4513
4514 class ISO639Utils(object):
4515     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4516     _lang_map = {
4517         'aa': 'aar',
4518         'ab': 'abk',
4519         'ae': 'ave',
4520         'af': 'afr',
4521         'ak': 'aka',
4522         'am': 'amh',
4523         'an': 'arg',
4524         'ar': 'ara',
4525         'as': 'asm',
4526         'av': 'ava',
4527         'ay': 'aym',
4528         'az': 'aze',
4529         'ba': 'bak',
4530         'be': 'bel',
4531         'bg': 'bul',
4532         'bh': 'bih',
4533         'bi': 'bis',
4534         'bm': 'bam',
4535         'bn': 'ben',
4536         'bo': 'bod',
4537         'br': 'bre',
4538         'bs': 'bos',
4539         'ca': 'cat',
4540         'ce': 'che',
4541         'ch': 'cha',
4542         'co': 'cos',
4543         'cr': 'cre',
4544         'cs': 'ces',
4545         'cu': 'chu',
4546         'cv': 'chv',
4547         'cy': 'cym',
4548         'da': 'dan',
4549         'de': 'deu',
4550         'dv': 'div',
4551         'dz': 'dzo',
4552         'ee': 'ewe',
4553         'el': 'ell',
4554         'en': 'eng',
4555         'eo': 'epo',
4556         'es': 'spa',
4557         'et': 'est',
4558         'eu': 'eus',
4559         'fa': 'fas',
4560         'ff': 'ful',
4561         'fi': 'fin',
4562         'fj': 'fij',
4563         'fo': 'fao',
4564         'fr': 'fra',
4565         'fy': 'fry',
4566         'ga': 'gle',
4567         'gd': 'gla',
4568         'gl': 'glg',
4569         'gn': 'grn',
4570         'gu': 'guj',
4571         'gv': 'glv',
4572         'ha': 'hau',
4573         'he': 'heb',
4574         'iw': 'heb',  # Replaced by he in 1989 revision
4575         'hi': 'hin',
4576         'ho': 'hmo',
4577         'hr': 'hrv',
4578         'ht': 'hat',
4579         'hu': 'hun',
4580         'hy': 'hye',
4581         'hz': 'her',
4582         'ia': 'ina',
4583         'id': 'ind',
4584         'in': 'ind',  # Replaced by id in 1989 revision
4585         'ie': 'ile',
4586         'ig': 'ibo',
4587         'ii': 'iii',
4588         'ik': 'ipk',
4589         'io': 'ido',
4590         'is': 'isl',
4591         'it': 'ita',
4592         'iu': 'iku',
4593         'ja': 'jpn',
4594         'jv': 'jav',
4595         'ka': 'kat',
4596         'kg': 'kon',
4597         'ki': 'kik',
4598         'kj': 'kua',
4599         'kk': 'kaz',
4600         'kl': 'kal',
4601         'km': 'khm',
4602         'kn': 'kan',
4603         'ko': 'kor',
4604         'kr': 'kau',
4605         'ks': 'kas',
4606         'ku': 'kur',
4607         'kv': 'kom',
4608         'kw': 'cor',
4609         'ky': 'kir',
4610         'la': 'lat',
4611         'lb': 'ltz',
4612         'lg': 'lug',
4613         'li': 'lim',
4614         'ln': 'lin',
4615         'lo': 'lao',
4616         'lt': 'lit',
4617         'lu': 'lub',
4618         'lv': 'lav',
4619         'mg': 'mlg',
4620         'mh': 'mah',
4621         'mi': 'mri',
4622         'mk': 'mkd',
4623         'ml': 'mal',
4624         'mn': 'mon',
4625         'mr': 'mar',
4626         'ms': 'msa',
4627         'mt': 'mlt',
4628         'my': 'mya',
4629         'na': 'nau',
4630         'nb': 'nob',
4631         'nd': 'nde',
4632         'ne': 'nep',
4633         'ng': 'ndo',
4634         'nl': 'nld',
4635         'nn': 'nno',
4636         'no': 'nor',
4637         'nr': 'nbl',
4638         'nv': 'nav',
4639         'ny': 'nya',
4640         'oc': 'oci',
4641         'oj': 'oji',
4642         'om': 'orm',
4643         'or': 'ori',
4644         'os': 'oss',
4645         'pa': 'pan',
4646         'pi': 'pli',
4647         'pl': 'pol',
4648         'ps': 'pus',
4649         'pt': 'por',
4650         'qu': 'que',
4651         'rm': 'roh',
4652         'rn': 'run',
4653         'ro': 'ron',
4654         'ru': 'rus',
4655         'rw': 'kin',
4656         'sa': 'san',
4657         'sc': 'srd',
4658         'sd': 'snd',
4659         'se': 'sme',
4660         'sg': 'sag',
4661         'si': 'sin',
4662         'sk': 'slk',
4663         'sl': 'slv',
4664         'sm': 'smo',
4665         'sn': 'sna',
4666         'so': 'som',
4667         'sq': 'sqi',
4668         'sr': 'srp',
4669         'ss': 'ssw',
4670         'st': 'sot',
4671         'su': 'sun',
4672         'sv': 'swe',
4673         'sw': 'swa',
4674         'ta': 'tam',
4675         'te': 'tel',
4676         'tg': 'tgk',
4677         'th': 'tha',
4678         'ti': 'tir',
4679         'tk': 'tuk',
4680         'tl': 'tgl',
4681         'tn': 'tsn',
4682         'to': 'ton',
4683         'tr': 'tur',
4684         'ts': 'tso',
4685         'tt': 'tat',
4686         'tw': 'twi',
4687         'ty': 'tah',
4688         'ug': 'uig',
4689         'uk': 'ukr',
4690         'ur': 'urd',
4691         'uz': 'uzb',
4692         've': 'ven',
4693         'vi': 'vie',
4694         'vo': 'vol',
4695         'wa': 'wln',
4696         'wo': 'wol',
4697         'xh': 'xho',
4698         'yi': 'yid',
4699         'ji': 'yid',  # Replaced by yi in 1989 revision
4700         'yo': 'yor',
4701         'za': 'zha',
4702         'zh': 'zho',
4703         'zu': 'zul',
4704     }
4705
4706     @classmethod
4707     def short2long(cls, code):
4708         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4709         return cls._lang_map.get(code[:2])
4710
4711     @classmethod
4712     def long2short(cls, code):
4713         """Convert language code from ISO 639-2/T to ISO 639-1"""
4714         for short_name, long_name in cls._lang_map.items():
4715             if long_name == code:
4716                 return short_name
4717
4718
4719 class ISO3166Utils(object):
4720     # From http://data.okfn.org/data/core/country-list
4721     _country_map = {
4722         'AF': 'Afghanistan',
4723         'AX': 'Åland Islands',
4724         'AL': 'Albania',
4725         'DZ': 'Algeria',
4726         'AS': 'American Samoa',
4727         'AD': 'Andorra',
4728         'AO': 'Angola',
4729         'AI': 'Anguilla',
4730         'AQ': 'Antarctica',
4731         'AG': 'Antigua and Barbuda',
4732         'AR': 'Argentina',
4733         'AM': 'Armenia',
4734         'AW': 'Aruba',
4735         'AU': 'Australia',
4736         'AT': 'Austria',
4737         'AZ': 'Azerbaijan',
4738         'BS': 'Bahamas',
4739         'BH': 'Bahrain',
4740         'BD': 'Bangladesh',
4741         'BB': 'Barbados',
4742         'BY': 'Belarus',
4743         'BE': 'Belgium',
4744         'BZ': 'Belize',
4745         'BJ': 'Benin',
4746         'BM': 'Bermuda',
4747         'BT': 'Bhutan',
4748         'BO': 'Bolivia, Plurinational State of',
4749         'BQ': 'Bonaire, Sint Eustatius and Saba',
4750         'BA': 'Bosnia and Herzegovina',
4751         'BW': 'Botswana',
4752         'BV': 'Bouvet Island',
4753         'BR': 'Brazil',
4754         'IO': 'British Indian Ocean Territory',
4755         'BN': 'Brunei Darussalam',
4756         'BG': 'Bulgaria',
4757         'BF': 'Burkina Faso',
4758         'BI': 'Burundi',
4759         'KH': 'Cambodia',
4760         'CM': 'Cameroon',
4761         'CA': 'Canada',
4762         'CV': 'Cape Verde',
4763         'KY': 'Cayman Islands',
4764         'CF': 'Central African Republic',
4765         'TD': 'Chad',
4766         'CL': 'Chile',
4767         'CN': 'China',
4768         'CX': 'Christmas Island',
4769         'CC': 'Cocos (Keeling) Islands',
4770         'CO': 'Colombia',
4771         'KM': 'Comoros',
4772         'CG': 'Congo',
4773         'CD': 'Congo, the Democratic Republic of the',
4774         'CK': 'Cook Islands',
4775         'CR': 'Costa Rica',
4776         'CI': 'Côte d\'Ivoire',
4777         'HR': 'Croatia',
4778         'CU': 'Cuba',
4779         'CW': 'Curaçao',
4780         'CY': 'Cyprus',
4781         'CZ': 'Czech Republic',
4782         'DK': 'Denmark',
4783         'DJ': 'Djibouti',
4784         'DM': 'Dominica',
4785         'DO': 'Dominican Republic',
4786         'EC': 'Ecuador',
4787         'EG': 'Egypt',
4788         'SV': 'El Salvador',
4789         'GQ': 'Equatorial Guinea',
4790         'ER': 'Eritrea',
4791         'EE': 'Estonia',
4792         'ET': 'Ethiopia',
4793         'FK': 'Falkland Islands (Malvinas)',
4794         'FO': 'Faroe Islands',
4795         'FJ': 'Fiji',
4796         'FI': 'Finland',
4797         'FR': 'France',
4798         'GF': 'French Guiana',
4799         'PF': 'French Polynesia',
4800         'TF': 'French Southern Territories',
4801         'GA': 'Gabon',
4802         'GM': 'Gambia',
4803         'GE': 'Georgia',
4804         'DE': 'Germany',
4805         'GH': 'Ghana',
4806         'GI': 'Gibraltar',
4807         'GR': 'Greece',
4808         'GL': 'Greenland',
4809         'GD': 'Grenada',
4810         'GP': 'Guadeloupe',
4811         'GU': 'Guam',
4812         'GT': 'Guatemala',
4813         'GG': 'Guernsey',
4814         'GN': 'Guinea',
4815         'GW': 'Guinea-Bissau',
4816         'GY': 'Guyana',
4817         'HT': 'Haiti',
4818         'HM': 'Heard Island and McDonald Islands',
4819         'VA': 'Holy See (Vatican City State)',
4820         'HN': 'Honduras',
4821         'HK': 'Hong Kong',
4822         'HU': 'Hungary',
4823         'IS': 'Iceland',
4824         'IN': 'India',
4825         'ID': 'Indonesia',
4826         'IR': 'Iran, Islamic Republic of',
4827         'IQ': 'Iraq',
4828         'IE': 'Ireland',
4829         'IM': 'Isle of Man',
4830         'IL': 'Israel',
4831         'IT': 'Italy',
4832         'JM': 'Jamaica',
4833         'JP': 'Japan',
4834         'JE': 'Jersey',
4835         'JO': 'Jordan',
4836         'KZ': 'Kazakhstan',
4837         'KE': 'Kenya',
4838         'KI': 'Kiribati',
4839         'KP': 'Korea, Democratic People\'s Republic of',
4840         'KR': 'Korea, Republic of',
4841         'KW': 'Kuwait',
4842         'KG': 'Kyrgyzstan',
4843         'LA': 'Lao People\'s Democratic Republic',
4844         'LV': 'Latvia',
4845         'LB': 'Lebanon',
4846         'LS': 'Lesotho',
4847         'LR': 'Liberia',
4848         'LY': 'Libya',
4849         'LI': 'Liechtenstein',
4850         'LT': 'Lithuania',
4851         'LU': 'Luxembourg',
4852         'MO': 'Macao',
4853         'MK': 'Macedonia, the Former Yugoslav Republic of',
4854         'MG': 'Madagascar',
4855         'MW': 'Malawi',
4856         'MY': 'Malaysia',
4857         'MV': 'Maldives',
4858         'ML': 'Mali',
4859         'MT': 'Malta',
4860         'MH': 'Marshall Islands',
4861         'MQ': 'Martinique',
4862         'MR': 'Mauritania',
4863         'MU': 'Mauritius',
4864         'YT': 'Mayotte',
4865         'MX': 'Mexico',
4866         'FM': 'Micronesia, Federated States of',
4867         'MD': 'Moldova, Republic of',
4868         'MC': 'Monaco',
4869         'MN': 'Mongolia',
4870         'ME': 'Montenegro',
4871         'MS': 'Montserrat',
4872         'MA': 'Morocco',
4873         'MZ': 'Mozambique',
4874         'MM': 'Myanmar',
4875         'NA': 'Namibia',
4876         'NR': 'Nauru',
4877         'NP': 'Nepal',
4878         'NL': 'Netherlands',
4879         'NC': 'New Caledonia',
4880         'NZ': 'New Zealand',
4881         'NI': 'Nicaragua',
4882         'NE': 'Niger',
4883         'NG': 'Nigeria',
4884         'NU': 'Niue',
4885         'NF': 'Norfolk Island',
4886         'MP': 'Northern Mariana Islands',
4887         'NO': 'Norway',
4888         'OM': 'Oman',
4889         'PK': 'Pakistan',
4890         'PW': 'Palau',
4891         'PS': 'Palestine, State of',
4892         'PA': 'Panama',
4893         'PG': 'Papua New Guinea',
4894         'PY': 'Paraguay',
4895         'PE': 'Peru',
4896         'PH': 'Philippines',
4897         'PN': 'Pitcairn',
4898         'PL': 'Poland',
4899         'PT': 'Portugal',
4900         'PR': 'Puerto Rico',
4901         'QA': 'Qatar',
4902         'RE': 'Réunion',
4903         'RO': 'Romania',
4904         'RU': 'Russian Federation',
4905         'RW': 'Rwanda',
4906         'BL': 'Saint Barthélemy',
4907         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4908         'KN': 'Saint Kitts and Nevis',
4909         'LC': 'Saint Lucia',
4910         'MF': 'Saint Martin (French part)',
4911         'PM': 'Saint Pierre and Miquelon',
4912         'VC': 'Saint Vincent and the Grenadines',
4913         'WS': 'Samoa',
4914         'SM': 'San Marino',
4915         'ST': 'Sao Tome and Principe',
4916         'SA': 'Saudi Arabia',
4917         'SN': 'Senegal',
4918         'RS': 'Serbia',
4919         'SC': 'Seychelles',
4920         'SL': 'Sierra Leone',
4921         'SG': 'Singapore',
4922         'SX': 'Sint Maarten (Dutch part)',
4923         'SK': 'Slovakia',
4924         'SI': 'Slovenia',
4925         'SB': 'Solomon Islands',
4926         'SO': 'Somalia',
4927         'ZA': 'South Africa',
4928         'GS': 'South Georgia and the South Sandwich Islands',
4929         'SS': 'South Sudan',
4930         'ES': 'Spain',
4931         'LK': 'Sri Lanka',
4932         'SD': 'Sudan',
4933         'SR': 'Suriname',
4934         'SJ': 'Svalbard and Jan Mayen',
4935         'SZ': 'Swaziland',
4936         'SE': 'Sweden',
4937         'CH': 'Switzerland',
4938         'SY': 'Syrian Arab Republic',
4939         'TW': 'Taiwan, Province of China',
4940         'TJ': 'Tajikistan',
4941         'TZ': 'Tanzania, United Republic of',
4942         'TH': 'Thailand',
4943         'TL': 'Timor-Leste',
4944         'TG': 'Togo',
4945         'TK': 'Tokelau',
4946         'TO': 'Tonga',
4947         'TT': 'Trinidad and Tobago',
4948         'TN': 'Tunisia',
4949         'TR': 'Turkey',
4950         'TM': 'Turkmenistan',
4951         'TC': 'Turks and Caicos Islands',
4952         'TV': 'Tuvalu',
4953         'UG': 'Uganda',
4954         'UA': 'Ukraine',
4955         'AE': 'United Arab Emirates',
4956         'GB': 'United Kingdom',
4957         'US': 'United States',
4958         'UM': 'United States Minor Outlying Islands',
4959         'UY': 'Uruguay',
4960         'UZ': 'Uzbekistan',
4961         'VU': 'Vanuatu',
4962         'VE': 'Venezuela, Bolivarian Republic of',
4963         'VN': 'Viet Nam',
4964         'VG': 'Virgin Islands, British',
4965         'VI': 'Virgin Islands, U.S.',
4966         'WF': 'Wallis and Futuna',
4967         'EH': 'Western Sahara',
4968         'YE': 'Yemen',
4969         'ZM': 'Zambia',
4970         'ZW': 'Zimbabwe',
4971     }
4972
4973     @classmethod
4974     def short2full(cls, code):
4975         """Convert an ISO 3166-2 country code to the corresponding full name"""
4976         return cls._country_map.get(code.upper())
4977
4978
4979 class GeoUtils(object):
4980     # Major IPv4 address blocks per country
4981     _country_ip_map = {
4982         'AD': '85.94.160.0/19',
4983         'AE': '94.200.0.0/13',
4984         'AF': '149.54.0.0/17',
4985         'AG': '209.59.64.0/18',
4986         'AI': '204.14.248.0/21',
4987         'AL': '46.99.0.0/16',
4988         'AM': '46.70.0.0/15',
4989         'AO': '105.168.0.0/13',
4990         'AP': '159.117.192.0/21',
4991         'AR': '181.0.0.0/12',
4992         'AS': '202.70.112.0/20',
4993         'AT': '84.112.0.0/13',
4994         'AU': '1.128.0.0/11',
4995         'AW': '181.41.0.0/18',
4996         'AZ': '5.191.0.0/16',
4997         'BA': '31.176.128.0/17',
4998         'BB': '65.48.128.0/17',
4999         'BD': '114.130.0.0/16',
5000         'BE': '57.0.0.0/8',
5001         'BF': '129.45.128.0/17',
5002         'BG': '95.42.0.0/15',
5003         'BH': '37.131.0.0/17',
5004         'BI': '154.117.192.0/18',
5005         'BJ': '137.255.0.0/16',
5006         'BL': '192.131.134.0/24',
5007         'BM': '196.12.64.0/18',
5008         'BN': '156.31.0.0/16',
5009         'BO': '161.56.0.0/16',
5010         'BQ': '161.0.80.0/20',
5011         'BR': '152.240.0.0/12',
5012         'BS': '24.51.64.0/18',
5013         'BT': '119.2.96.0/19',
5014         'BW': '168.167.0.0/16',
5015         'BY': '178.120.0.0/13',
5016         'BZ': '179.42.192.0/18',
5017         'CA': '99.224.0.0/11',
5018         'CD': '41.243.0.0/16',
5019         'CF': '196.32.200.0/21',
5020         'CG': '197.214.128.0/17',
5021         'CH': '85.0.0.0/13',
5022         'CI': '154.232.0.0/14',
5023         'CK': '202.65.32.0/19',
5024         'CL': '152.172.0.0/14',
5025         'CM': '165.210.0.0/15',
5026         'CN': '36.128.0.0/10',
5027         'CO': '181.240.0.0/12',
5028         'CR': '201.192.0.0/12',
5029         'CU': '152.206.0.0/15',
5030         'CV': '165.90.96.0/19',
5031         'CW': '190.88.128.0/17',
5032         'CY': '46.198.0.0/15',
5033         'CZ': '88.100.0.0/14',
5034         'DE': '53.0.0.0/8',
5035         'DJ': '197.241.0.0/17',
5036         'DK': '87.48.0.0/12',
5037         'DM': '192.243.48.0/20',
5038         'DO': '152.166.0.0/15',
5039         'DZ': '41.96.0.0/12',
5040         'EC': '186.68.0.0/15',
5041         'EE': '90.190.0.0/15',
5042         'EG': '156.160.0.0/11',
5043         'ER': '196.200.96.0/20',
5044         'ES': '88.0.0.0/11',
5045         'ET': '196.188.0.0/14',
5046         'EU': '2.16.0.0/13',
5047         'FI': '91.152.0.0/13',
5048         'FJ': '144.120.0.0/16',
5049         'FM': '119.252.112.0/20',
5050         'FO': '88.85.32.0/19',
5051         'FR': '90.0.0.0/9',
5052         'GA': '41.158.0.0/15',
5053         'GB': '25.0.0.0/8',
5054         'GD': '74.122.88.0/21',
5055         'GE': '31.146.0.0/16',
5056         'GF': '161.22.64.0/18',
5057         'GG': '62.68.160.0/19',
5058         'GH': '45.208.0.0/14',
5059         'GI': '85.115.128.0/19',
5060         'GL': '88.83.0.0/19',
5061         'GM': '160.182.0.0/15',
5062         'GN': '197.149.192.0/18',
5063         'GP': '104.250.0.0/19',
5064         'GQ': '105.235.224.0/20',
5065         'GR': '94.64.0.0/13',
5066         'GT': '168.234.0.0/16',
5067         'GU': '168.123.0.0/16',
5068         'GW': '197.214.80.0/20',
5069         'GY': '181.41.64.0/18',
5070         'HK': '113.252.0.0/14',
5071         'HN': '181.210.0.0/16',
5072         'HR': '93.136.0.0/13',
5073         'HT': '148.102.128.0/17',
5074         'HU': '84.0.0.0/14',
5075         'ID': '39.192.0.0/10',
5076         'IE': '87.32.0.0/12',
5077         'IL': '79.176.0.0/13',
5078         'IM': '5.62.80.0/20',
5079         'IN': '117.192.0.0/10',
5080         'IO': '203.83.48.0/21',
5081         'IQ': '37.236.0.0/14',
5082         'IR': '2.176.0.0/12',
5083         'IS': '82.221.0.0/16',
5084         'IT': '79.0.0.0/10',
5085         'JE': '87.244.64.0/18',
5086         'JM': '72.27.0.0/17',
5087         'JO': '176.29.0.0/16',
5088         'JP': '126.0.0.0/8',
5089         'KE': '105.48.0.0/12',
5090         'KG': '158.181.128.0/17',
5091         'KH': '36.37.128.0/17',
5092         'KI': '103.25.140.0/22',
5093         'KM': '197.255.224.0/20',
5094         'KN': '198.32.32.0/19',
5095         'KP': '175.45.176.0/22',
5096         'KR': '175.192.0.0/10',
5097         'KW': '37.36.0.0/14',
5098         'KY': '64.96.0.0/15',
5099         'KZ': '2.72.0.0/13',
5100         'LA': '115.84.64.0/18',
5101         'LB': '178.135.0.0/16',
5102         'LC': '192.147.231.0/24',
5103         'LI': '82.117.0.0/19',
5104         'LK': '112.134.0.0/15',
5105         'LR': '41.86.0.0/19',
5106         'LS': '129.232.0.0/17',
5107         'LT': '78.56.0.0/13',
5108         'LU': '188.42.0.0/16',
5109         'LV': '46.109.0.0/16',
5110         'LY': '41.252.0.0/14',
5111         'MA': '105.128.0.0/11',
5112         'MC': '88.209.64.0/18',
5113         'MD': '37.246.0.0/16',
5114         'ME': '178.175.0.0/17',
5115         'MF': '74.112.232.0/21',
5116         'MG': '154.126.0.0/17',
5117         'MH': '117.103.88.0/21',
5118         'MK': '77.28.0.0/15',
5119         'ML': '154.118.128.0/18',
5120         'MM': '37.111.0.0/17',
5121         'MN': '49.0.128.0/17',
5122         'MO': '60.246.0.0/16',
5123         'MP': '202.88.64.0/20',
5124         'MQ': '109.203.224.0/19',
5125         'MR': '41.188.64.0/18',
5126         'MS': '208.90.112.0/22',
5127         'MT': '46.11.0.0/16',
5128         'MU': '105.16.0.0/12',
5129         'MV': '27.114.128.0/18',
5130         'MW': '105.234.0.0/16',
5131         'MX': '187.192.0.0/11',
5132         'MY': '175.136.0.0/13',
5133         'MZ': '197.218.0.0/15',
5134         'NA': '41.182.0.0/16',
5135         'NC': '101.101.0.0/18',
5136         'NE': '197.214.0.0/18',
5137         'NF': '203.17.240.0/22',
5138         'NG': '105.112.0.0/12',
5139         'NI': '186.76.0.0/15',
5140         'NL': '145.96.0.0/11',
5141         'NO': '84.208.0.0/13',
5142         'NP': '36.252.0.0/15',
5143         'NR': '203.98.224.0/19',
5144         'NU': '49.156.48.0/22',
5145         'NZ': '49.224.0.0/14',
5146         'OM': '5.36.0.0/15',
5147         'PA': '186.72.0.0/15',
5148         'PE': '186.160.0.0/14',
5149         'PF': '123.50.64.0/18',
5150         'PG': '124.240.192.0/19',
5151         'PH': '49.144.0.0/13',
5152         'PK': '39.32.0.0/11',
5153         'PL': '83.0.0.0/11',
5154         'PM': '70.36.0.0/20',
5155         'PR': '66.50.0.0/16',
5156         'PS': '188.161.0.0/16',
5157         'PT': '85.240.0.0/13',
5158         'PW': '202.124.224.0/20',
5159         'PY': '181.120.0.0/14',
5160         'QA': '37.210.0.0/15',
5161         'RE': '139.26.0.0/16',
5162         'RO': '79.112.0.0/13',
5163         'RS': '178.220.0.0/14',
5164         'RU': '5.136.0.0/13',
5165         'RW': '105.178.0.0/15',
5166         'SA': '188.48.0.0/13',
5167         'SB': '202.1.160.0/19',
5168         'SC': '154.192.0.0/11',
5169         'SD': '154.96.0.0/13',
5170         'SE': '78.64.0.0/12',
5171         'SG': '152.56.0.0/14',
5172         'SI': '188.196.0.0/14',
5173         'SK': '78.98.0.0/15',
5174         'SL': '197.215.0.0/17',
5175         'SM': '89.186.32.0/19',
5176         'SN': '41.82.0.0/15',
5177         'SO': '197.220.64.0/19',
5178         'SR': '186.179.128.0/17',
5179         'SS': '105.235.208.0/21',
5180         'ST': '197.159.160.0/19',
5181         'SV': '168.243.0.0/16',
5182         'SX': '190.102.0.0/20',
5183         'SY': '5.0.0.0/16',
5184         'SZ': '41.84.224.0/19',
5185         'TC': '65.255.48.0/20',
5186         'TD': '154.68.128.0/19',
5187         'TG': '196.168.0.0/14',
5188         'TH': '171.96.0.0/13',
5189         'TJ': '85.9.128.0/18',
5190         'TK': '27.96.24.0/21',
5191         'TL': '180.189.160.0/20',
5192         'TM': '95.85.96.0/19',
5193         'TN': '197.0.0.0/11',
5194         'TO': '175.176.144.0/21',
5195         'TR': '78.160.0.0/11',
5196         'TT': '186.44.0.0/15',
5197         'TV': '202.2.96.0/19',
5198         'TW': '120.96.0.0/11',
5199         'TZ': '156.156.0.0/14',
5200         'UA': '93.72.0.0/13',
5201         'UG': '154.224.0.0/13',
5202         'US': '3.0.0.0/8',
5203         'UY': '167.56.0.0/13',
5204         'UZ': '82.215.64.0/18',
5205         'VA': '212.77.0.0/19',
5206         'VC': '24.92.144.0/20',
5207         'VE': '186.88.0.0/13',
5208         'VG': '172.103.64.0/18',
5209         'VI': '146.226.0.0/16',
5210         'VN': '14.160.0.0/11',
5211         'VU': '202.80.32.0/20',
5212         'WF': '117.20.32.0/21',
5213         'WS': '202.4.32.0/19',
5214         'YE': '134.35.0.0/16',
5215         'YT': '41.242.116.0/22',
5216         'ZA': '41.0.0.0/11',
5217         'ZM': '165.56.0.0/13',
5218         'ZW': '41.85.192.0/19',
5219     }
5220
5221     @classmethod
5222     def random_ipv4(cls, code_or_block):
5223         if len(code_or_block) == 2:
5224             block = cls._country_ip_map.get(code_or_block.upper())
5225             if not block:
5226                 return None
5227         else:
5228             block = code_or_block
5229         addr, preflen = block.split('/')
5230         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5231         addr_max = addr_min | (0xffffffff >> int(preflen))
5232         return compat_str(socket.inet_ntoa(
5233             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5234
5235
5236 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5237     def __init__(self, proxies=None):
5238         # Set default handlers
5239         for type in ('http', 'https'):
5240             setattr(self, '%s_open' % type,
5241                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5242                         meth(r, proxy, type))
5243         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5244
5245     def proxy_open(self, req, proxy, type):
5246         req_proxy = req.headers.get('Ytdl-request-proxy')
5247         if req_proxy is not None:
5248             proxy = req_proxy
5249             del req.headers['Ytdl-request-proxy']
5250
5251         if proxy == '__noproxy__':
5252             return None  # No Proxy
5253         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5254             req.add_header('Ytdl-socks-proxy', proxy)
5255             # youtube-dl's http/https handlers do wrapping the socket with socks
5256             return None
5257         return compat_urllib_request.ProxyHandler.proxy_open(
5258             self, req, proxy, type)
5259
5260
5261 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5262 # released into Public Domain
5263 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5264
5265 def long_to_bytes(n, blocksize=0):
5266     """long_to_bytes(n:long, blocksize:int) : string
5267     Convert a long integer to a byte string.
5268
5269     If optional blocksize is given and greater than zero, pad the front of the
5270     byte string with binary zeros so that the length is a multiple of
5271     blocksize.
5272     """
5273     # after much testing, this algorithm was deemed to be the fastest
5274     s = b''
5275     n = int(n)
5276     while n > 0:
5277         s = compat_struct_pack('>I', n & 0xffffffff) + s
5278         n = n >> 32
5279     # strip off leading zeros
5280     for i in range(len(s)):
5281         if s[i] != b'\000'[0]:
5282             break
5283     else:
5284         # only happens when n == 0
5285         s = b'\000'
5286         i = 0
5287     s = s[i:]
5288     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5289     # de-padding being done above, but sigh...
5290     if blocksize > 0 and len(s) % blocksize:
5291         s = (blocksize - len(s) % blocksize) * b'\000' + s
5292     return s
5293
5294
5295 def bytes_to_long(s):
5296     """bytes_to_long(string) : long
5297     Convert a byte string to a long integer.
5298
5299     This is (essentially) the inverse of long_to_bytes().
5300     """
5301     acc = 0
5302     length = len(s)
5303     if length % 4:
5304         extra = (4 - length % 4)
5305         s = b'\000' * extra + s
5306         length = length + extra
5307     for i in range(0, length, 4):
5308         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5309     return acc
5310
5311
5312 def ohdave_rsa_encrypt(data, exponent, modulus):
5313     '''
5314     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5315
5316     Input:
5317         data: data to encrypt, bytes-like object
5318         exponent, modulus: parameter e and N of RSA algorithm, both integer
5319     Output: hex string of encrypted data
5320
5321     Limitation: supports one block encryption only
5322     '''
5323
5324     payload = int(binascii.hexlify(data[::-1]), 16)
5325     encrypted = pow(payload, exponent, modulus)
5326     return '%x' % encrypted
5327
5328
5329 def pkcs1pad(data, length):
5330     """
5331     Padding input data with PKCS#1 scheme
5332
5333     @param {int[]} data        input data
5334     @param {int}   length      target length
5335     @returns {int[]}           padded data
5336     """
5337     if len(data) > length - 11:
5338         raise ValueError('Input data too long for PKCS#1 padding')
5339
5340     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5341     return [0, 2] + pseudo_random + [0] + data
5342
5343
5344 def encode_base_n(num, n, table=None):
5345     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5346     if not table:
5347         table = FULL_TABLE[:n]
5348
5349     if n > len(table):
5350         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5351
5352     if num == 0:
5353         return table[0]
5354
5355     ret = ''
5356     while num:
5357         ret = table[num % n] + ret
5358         num = num // n
5359     return ret
5360
5361
5362 def decode_packed_codes(code):
5363     mobj = re.search(PACKED_CODES_RE, code)
5364     obfucasted_code, base, count, symbols = mobj.groups()
5365     base = int(base)
5366     count = int(count)
5367     symbols = symbols.split('|')
5368     symbol_table = {}
5369
5370     while count:
5371         count -= 1
5372         base_n_count = encode_base_n(count, base)
5373         symbol_table[base_n_count] = symbols[count] or base_n_count
5374
5375     return re.sub(
5376         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5377         obfucasted_code)
5378
5379
5380 def parse_m3u8_attributes(attrib):
5381     info = {}
5382     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5383         if val.startswith('"'):
5384             val = val[1:-1]
5385         info[key] = val
5386     return info
5387
5388
5389 def urshift(val, n):
5390     return val >> n if val >= 0 else (val + 0x100000000) >> n
5391
5392
5393 # Based on png2str() written by @gdkchan and improved by @yokrysty
5394 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5395 def decode_png(png_data):
5396     # Reference: https://www.w3.org/TR/PNG/
5397     header = png_data[8:]
5398
5399     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5400         raise IOError('Not a valid PNG file.')
5401
5402     int_map = {1: '>B', 2: '>H', 4: '>I'}
5403     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5404
5405     chunks = []
5406
5407     while header:
5408         length = unpack_integer(header[:4])
5409         header = header[4:]
5410
5411         chunk_type = header[:4]
5412         header = header[4:]
5413
5414         chunk_data = header[:length]
5415         header = header[length:]
5416
5417         header = header[4:]  # Skip CRC
5418
5419         chunks.append({
5420             'type': chunk_type,
5421             'length': length,
5422             'data': chunk_data
5423         })
5424
5425     ihdr = chunks[0]['data']
5426
5427     width = unpack_integer(ihdr[:4])
5428     height = unpack_integer(ihdr[4:8])
5429
5430     idat = b''
5431
5432     for chunk in chunks:
5433         if chunk['type'] == b'IDAT':
5434             idat += chunk['data']
5435
5436     if not idat:
5437         raise IOError('Unable to read PNG data.')
5438
5439     decompressed_data = bytearray(zlib.decompress(idat))
5440
5441     stride = width * 3
5442     pixels = []
5443
5444     def _get_pixel(idx):
5445         x = idx % stride
5446         y = idx // stride
5447         return pixels[y][x]
5448
5449     for y in range(height):
5450         basePos = y * (1 + stride)
5451         filter_type = decompressed_data[basePos]
5452
5453         current_row = []
5454
5455         pixels.append(current_row)
5456
5457         for x in range(stride):
5458             color = decompressed_data[1 + basePos + x]
5459             basex = y * stride + x
5460             left = 0
5461             up = 0
5462
5463             if x > 2:
5464                 left = _get_pixel(basex - 3)
5465             if y > 0:
5466                 up = _get_pixel(basex - stride)
5467
5468             if filter_type == 1:  # Sub
5469                 color = (color + left) & 0xff
5470             elif filter_type == 2:  # Up
5471                 color = (color + up) & 0xff
5472             elif filter_type == 3:  # Average
5473                 color = (color + ((left + up) >> 1)) & 0xff
5474             elif filter_type == 4:  # Paeth
5475                 a = left
5476                 b = up
5477                 c = 0
5478
5479                 if x > 2 and y > 0:
5480                     c = _get_pixel(basex - stride - 3)
5481
5482                 p = a + b - c
5483
5484                 pa = abs(p - a)
5485                 pb = abs(p - b)
5486                 pc = abs(p - c)
5487
5488                 if pa <= pb and pa <= pc:
5489                     color = (color + a) & 0xff
5490                 elif pb <= pc:
5491                     color = (color + b) & 0xff
5492                 else:
5493                     color = (color + c) & 0xff
5494
5495             current_row.append(color)
5496
5497     return width, height, pixels
5498
5499
5500 def write_xattr(path, key, value):
5501     # This mess below finds the best xattr tool for the job
5502     try:
5503         # try the pyxattr module...
5504         import xattr
5505
5506         if hasattr(xattr, 'set'):  # pyxattr
5507             # Unicode arguments are not supported in python-pyxattr until
5508             # version 0.5.0
5509             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5510             pyxattr_required_version = '0.5.0'
5511             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5512                 # TODO: fallback to CLI tools
5513                 raise XAttrUnavailableError(
5514                     'python-pyxattr is detected but is too old. '
5515                     'youtube-dl requires %s or above while your version is %s. '
5516                     'Falling back to other xattr implementations' % (
5517                         pyxattr_required_version, xattr.__version__))
5518
5519             setxattr = xattr.set
5520         else:  # xattr
5521             setxattr = xattr.setxattr
5522
5523         try:
5524             setxattr(path, key, value)
5525         except EnvironmentError as e:
5526             raise XAttrMetadataError(e.errno, e.strerror)
5527
5528     except ImportError:
5529         if compat_os_name == 'nt':
5530             # Write xattrs to NTFS Alternate Data Streams:
5531             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5532             assert ':' not in key
5533             assert os.path.exists(path)
5534
5535             ads_fn = path + ':' + key
5536             try:
5537                 with open(ads_fn, 'wb') as f:
5538                     f.write(value)
5539             except EnvironmentError as e:
5540                 raise XAttrMetadataError(e.errno, e.strerror)
5541         else:
5542             user_has_setfattr = check_executable('setfattr', ['--version'])
5543             user_has_xattr = check_executable('xattr', ['-h'])
5544
5545             if user_has_setfattr or user_has_xattr:
5546
5547                 value = value.decode('utf-8')
5548                 if user_has_setfattr:
5549                     executable = 'setfattr'
5550                     opts = ['-n', key, '-v', value]
5551                 elif user_has_xattr:
5552                     executable = 'xattr'
5553                     opts = ['-w', key, value]
5554
5555                 cmd = ([encodeFilename(executable, True)]
5556                        + [encodeArgument(o) for o in opts]
5557                        + [encodeFilename(path, True)])
5558
5559                 try:
5560                     p = subprocess.Popen(
5561                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5562                 except EnvironmentError as e:
5563                     raise XAttrMetadataError(e.errno, e.strerror)
5564                 stdout, stderr = p.communicate()
5565                 stderr = stderr.decode('utf-8', 'replace')
5566                 if p.returncode != 0:
5567                     raise XAttrMetadataError(p.returncode, stderr)
5568
5569             else:
5570                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5571                 if sys.platform.startswith('linux'):
5572                     raise XAttrUnavailableError(
5573                         "Couldn't find a tool to set the xattrs. "
5574                         "Install either the python 'pyxattr' or 'xattr' "
5575                         "modules, or the GNU 'attr' package "
5576                         "(which contains the 'setfattr' tool).")
5577                 else:
5578                     raise XAttrUnavailableError(
5579                         "Couldn't find a tool to set the xattrs. "
5580                         "Install either the python 'xattr' module, "
5581                         "or the 'xattr' binary.")
5582
5583
5584 def random_birthday(year_field, month_field, day_field):
5585     start_date = datetime.date(1950, 1, 1)
5586     end_date = datetime.date(1995, 12, 31)
5587     offset = random.randint(0, (end_date - start_date).days)
5588     random_date = start_date + datetime.timedelta(offset)
5589     return {
5590         year_field: str(random_date.year),
5591         month_field: str(random_date.month),
5592         day_field: str(random_date.day),
5593     }