]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/utils.py
New upstream version 2020.09.14
[youtubedl] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import io
20 import itertools
21 import json
22 import locale
23 import math
24 import operator
25 import os
26 import platform
27 import random
28 import re
29 import socket
30 import ssl
31 import subprocess
32 import sys
33 import tempfile
34 import time
35 import traceback
36 import xml.etree.ElementTree
37 import zlib
38
39 from .compat import (
40 compat_HTMLParseError,
41 compat_HTMLParser,
42 compat_basestring,
43 compat_chr,
44 compat_cookiejar,
45 compat_ctypes_WINFUNCTYPE,
46 compat_etree_fromstring,
47 compat_expanduser,
48 compat_html_entities,
49 compat_html_entities_html5,
50 compat_http_client,
51 compat_integer_types,
52 compat_kwargs,
53 compat_os_name,
54 compat_parse_qs,
55 compat_shlex_quote,
56 compat_str,
57 compat_struct_pack,
58 compat_struct_unpack,
59 compat_urllib_error,
60 compat_urllib_parse,
61 compat_urllib_parse_urlencode,
62 compat_urllib_parse_urlparse,
63 compat_urllib_parse_unquote_plus,
64 compat_urllib_request,
65 compat_urlparse,
66 compat_xpath,
67 )
68
69 from .socks import (
70 ProxyType,
71 sockssocket,
72 )
73
74
75 def register_socks_protocols():
76 # "Register" SOCKS protocols
77 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
78 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
79 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
80 if scheme not in compat_urlparse.uses_netloc:
81 compat_urlparse.uses_netloc.append(scheme)
82
83
84 # This is not clearly defined otherwise
85 compiled_regex_type = type(re.compile(''))
86
87
88 def random_user_agent():
89 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
90 _CHROME_VERSIONS = (
91 '74.0.3729.129',
92 '76.0.3780.3',
93 '76.0.3780.2',
94 '74.0.3729.128',
95 '76.0.3780.1',
96 '76.0.3780.0',
97 '75.0.3770.15',
98 '74.0.3729.127',
99 '74.0.3729.126',
100 '76.0.3779.1',
101 '76.0.3779.0',
102 '75.0.3770.14',
103 '74.0.3729.125',
104 '76.0.3778.1',
105 '76.0.3778.0',
106 '75.0.3770.13',
107 '74.0.3729.124',
108 '74.0.3729.123',
109 '73.0.3683.121',
110 '76.0.3777.1',
111 '76.0.3777.0',
112 '75.0.3770.12',
113 '74.0.3729.122',
114 '76.0.3776.4',
115 '75.0.3770.11',
116 '74.0.3729.121',
117 '76.0.3776.3',
118 '76.0.3776.2',
119 '73.0.3683.120',
120 '74.0.3729.120',
121 '74.0.3729.119',
122 '74.0.3729.118',
123 '76.0.3776.1',
124 '76.0.3776.0',
125 '76.0.3775.5',
126 '75.0.3770.10',
127 '74.0.3729.117',
128 '76.0.3775.4',
129 '76.0.3775.3',
130 '74.0.3729.116',
131 '75.0.3770.9',
132 '76.0.3775.2',
133 '76.0.3775.1',
134 '76.0.3775.0',
135 '75.0.3770.8',
136 '74.0.3729.115',
137 '74.0.3729.114',
138 '76.0.3774.1',
139 '76.0.3774.0',
140 '75.0.3770.7',
141 '74.0.3729.113',
142 '74.0.3729.112',
143 '74.0.3729.111',
144 '76.0.3773.1',
145 '76.0.3773.0',
146 '75.0.3770.6',
147 '74.0.3729.110',
148 '74.0.3729.109',
149 '76.0.3772.1',
150 '76.0.3772.0',
151 '75.0.3770.5',
152 '74.0.3729.108',
153 '74.0.3729.107',
154 '76.0.3771.1',
155 '76.0.3771.0',
156 '75.0.3770.4',
157 '74.0.3729.106',
158 '74.0.3729.105',
159 '75.0.3770.3',
160 '74.0.3729.104',
161 '74.0.3729.103',
162 '74.0.3729.102',
163 '75.0.3770.2',
164 '74.0.3729.101',
165 '75.0.3770.1',
166 '75.0.3770.0',
167 '74.0.3729.100',
168 '75.0.3769.5',
169 '75.0.3769.4',
170 '74.0.3729.99',
171 '75.0.3769.3',
172 '75.0.3769.2',
173 '75.0.3768.6',
174 '74.0.3729.98',
175 '75.0.3769.1',
176 '75.0.3769.0',
177 '74.0.3729.97',
178 '73.0.3683.119',
179 '73.0.3683.118',
180 '74.0.3729.96',
181 '75.0.3768.5',
182 '75.0.3768.4',
183 '75.0.3768.3',
184 '75.0.3768.2',
185 '74.0.3729.95',
186 '74.0.3729.94',
187 '75.0.3768.1',
188 '75.0.3768.0',
189 '74.0.3729.93',
190 '74.0.3729.92',
191 '73.0.3683.117',
192 '74.0.3729.91',
193 '75.0.3766.3',
194 '74.0.3729.90',
195 '75.0.3767.2',
196 '75.0.3767.1',
197 '75.0.3767.0',
198 '74.0.3729.89',
199 '73.0.3683.116',
200 '75.0.3766.2',
201 '74.0.3729.88',
202 '75.0.3766.1',
203 '75.0.3766.0',
204 '74.0.3729.87',
205 '73.0.3683.115',
206 '74.0.3729.86',
207 '75.0.3765.1',
208 '75.0.3765.0',
209 '74.0.3729.85',
210 '73.0.3683.114',
211 '74.0.3729.84',
212 '75.0.3764.1',
213 '75.0.3764.0',
214 '74.0.3729.83',
215 '73.0.3683.113',
216 '75.0.3763.2',
217 '75.0.3761.4',
218 '74.0.3729.82',
219 '75.0.3763.1',
220 '75.0.3763.0',
221 '74.0.3729.81',
222 '73.0.3683.112',
223 '75.0.3762.1',
224 '75.0.3762.0',
225 '74.0.3729.80',
226 '75.0.3761.3',
227 '74.0.3729.79',
228 '73.0.3683.111',
229 '75.0.3761.2',
230 '74.0.3729.78',
231 '74.0.3729.77',
232 '75.0.3761.1',
233 '75.0.3761.0',
234 '73.0.3683.110',
235 '74.0.3729.76',
236 '74.0.3729.75',
237 '75.0.3760.0',
238 '74.0.3729.74',
239 '75.0.3759.8',
240 '75.0.3759.7',
241 '75.0.3759.6',
242 '74.0.3729.73',
243 '75.0.3759.5',
244 '74.0.3729.72',
245 '73.0.3683.109',
246 '75.0.3759.4',
247 '75.0.3759.3',
248 '74.0.3729.71',
249 '75.0.3759.2',
250 '74.0.3729.70',
251 '73.0.3683.108',
252 '74.0.3729.69',
253 '75.0.3759.1',
254 '75.0.3759.0',
255 '74.0.3729.68',
256 '73.0.3683.107',
257 '74.0.3729.67',
258 '75.0.3758.1',
259 '75.0.3758.0',
260 '74.0.3729.66',
261 '73.0.3683.106',
262 '74.0.3729.65',
263 '75.0.3757.1',
264 '75.0.3757.0',
265 '74.0.3729.64',
266 '73.0.3683.105',
267 '74.0.3729.63',
268 '75.0.3756.1',
269 '75.0.3756.0',
270 '74.0.3729.62',
271 '73.0.3683.104',
272 '75.0.3755.3',
273 '75.0.3755.2',
274 '73.0.3683.103',
275 '75.0.3755.1',
276 '75.0.3755.0',
277 '74.0.3729.61',
278 '73.0.3683.102',
279 '74.0.3729.60',
280 '75.0.3754.2',
281 '74.0.3729.59',
282 '75.0.3753.4',
283 '74.0.3729.58',
284 '75.0.3754.1',
285 '75.0.3754.0',
286 '74.0.3729.57',
287 '73.0.3683.101',
288 '75.0.3753.3',
289 '75.0.3752.2',
290 '75.0.3753.2',
291 '74.0.3729.56',
292 '75.0.3753.1',
293 '75.0.3753.0',
294 '74.0.3729.55',
295 '73.0.3683.100',
296 '74.0.3729.54',
297 '75.0.3752.1',
298 '75.0.3752.0',
299 '74.0.3729.53',
300 '73.0.3683.99',
301 '74.0.3729.52',
302 '75.0.3751.1',
303 '75.0.3751.0',
304 '74.0.3729.51',
305 '73.0.3683.98',
306 '74.0.3729.50',
307 '75.0.3750.0',
308 '74.0.3729.49',
309 '74.0.3729.48',
310 '74.0.3729.47',
311 '75.0.3749.3',
312 '74.0.3729.46',
313 '73.0.3683.97',
314 '75.0.3749.2',
315 '74.0.3729.45',
316 '75.0.3749.1',
317 '75.0.3749.0',
318 '74.0.3729.44',
319 '73.0.3683.96',
320 '74.0.3729.43',
321 '74.0.3729.42',
322 '75.0.3748.1',
323 '75.0.3748.0',
324 '74.0.3729.41',
325 '75.0.3747.1',
326 '73.0.3683.95',
327 '75.0.3746.4',
328 '74.0.3729.40',
329 '74.0.3729.39',
330 '75.0.3747.0',
331 '75.0.3746.3',
332 '75.0.3746.2',
333 '74.0.3729.38',
334 '75.0.3746.1',
335 '75.0.3746.0',
336 '74.0.3729.37',
337 '73.0.3683.94',
338 '75.0.3745.5',
339 '75.0.3745.4',
340 '75.0.3745.3',
341 '75.0.3745.2',
342 '74.0.3729.36',
343 '75.0.3745.1',
344 '75.0.3745.0',
345 '75.0.3744.2',
346 '74.0.3729.35',
347 '73.0.3683.93',
348 '74.0.3729.34',
349 '75.0.3744.1',
350 '75.0.3744.0',
351 '74.0.3729.33',
352 '73.0.3683.92',
353 '74.0.3729.32',
354 '74.0.3729.31',
355 '73.0.3683.91',
356 '75.0.3741.2',
357 '75.0.3740.5',
358 '74.0.3729.30',
359 '75.0.3741.1',
360 '75.0.3741.0',
361 '74.0.3729.29',
362 '75.0.3740.4',
363 '73.0.3683.90',
364 '74.0.3729.28',
365 '75.0.3740.3',
366 '73.0.3683.89',
367 '75.0.3740.2',
368 '74.0.3729.27',
369 '75.0.3740.1',
370 '75.0.3740.0',
371 '74.0.3729.26',
372 '73.0.3683.88',
373 '73.0.3683.87',
374 '74.0.3729.25',
375 '75.0.3739.1',
376 '75.0.3739.0',
377 '73.0.3683.86',
378 '74.0.3729.24',
379 '73.0.3683.85',
380 '75.0.3738.4',
381 '75.0.3738.3',
382 '75.0.3738.2',
383 '75.0.3738.1',
384 '75.0.3738.0',
385 '74.0.3729.23',
386 '73.0.3683.84',
387 '74.0.3729.22',
388 '74.0.3729.21',
389 '75.0.3737.1',
390 '75.0.3737.0',
391 '74.0.3729.20',
392 '73.0.3683.83',
393 '74.0.3729.19',
394 '75.0.3736.1',
395 '75.0.3736.0',
396 '74.0.3729.18',
397 '73.0.3683.82',
398 '74.0.3729.17',
399 '75.0.3735.1',
400 '75.0.3735.0',
401 '74.0.3729.16',
402 '73.0.3683.81',
403 '75.0.3734.1',
404 '75.0.3734.0',
405 '74.0.3729.15',
406 '73.0.3683.80',
407 '74.0.3729.14',
408 '75.0.3733.1',
409 '75.0.3733.0',
410 '75.0.3732.1',
411 '74.0.3729.13',
412 '74.0.3729.12',
413 '73.0.3683.79',
414 '74.0.3729.11',
415 '75.0.3732.0',
416 '74.0.3729.10',
417 '73.0.3683.78',
418 '74.0.3729.9',
419 '74.0.3729.8',
420 '74.0.3729.7',
421 '75.0.3731.3',
422 '75.0.3731.2',
423 '75.0.3731.0',
424 '74.0.3729.6',
425 '73.0.3683.77',
426 '73.0.3683.76',
427 '75.0.3730.5',
428 '75.0.3730.4',
429 '73.0.3683.75',
430 '74.0.3729.5',
431 '73.0.3683.74',
432 '75.0.3730.3',
433 '75.0.3730.2',
434 '74.0.3729.4',
435 '73.0.3683.73',
436 '73.0.3683.72',
437 '75.0.3730.1',
438 '75.0.3730.0',
439 '74.0.3729.3',
440 '73.0.3683.71',
441 '74.0.3729.2',
442 '73.0.3683.70',
443 '74.0.3729.1',
444 '74.0.3729.0',
445 '74.0.3726.4',
446 '73.0.3683.69',
447 '74.0.3726.3',
448 '74.0.3728.0',
449 '74.0.3726.2',
450 '73.0.3683.68',
451 '74.0.3726.1',
452 '74.0.3726.0',
453 '74.0.3725.4',
454 '73.0.3683.67',
455 '73.0.3683.66',
456 '74.0.3725.3',
457 '74.0.3725.2',
458 '74.0.3725.1',
459 '74.0.3724.8',
460 '74.0.3725.0',
461 '73.0.3683.65',
462 '74.0.3724.7',
463 '74.0.3724.6',
464 '74.0.3724.5',
465 '74.0.3724.4',
466 '74.0.3724.3',
467 '74.0.3724.2',
468 '74.0.3724.1',
469 '74.0.3724.0',
470 '73.0.3683.64',
471 '74.0.3723.1',
472 '74.0.3723.0',
473 '73.0.3683.63',
474 '74.0.3722.1',
475 '74.0.3722.0',
476 '73.0.3683.62',
477 '74.0.3718.9',
478 '74.0.3702.3',
479 '74.0.3721.3',
480 '74.0.3721.2',
481 '74.0.3721.1',
482 '74.0.3721.0',
483 '74.0.3720.6',
484 '73.0.3683.61',
485 '72.0.3626.122',
486 '73.0.3683.60',
487 '74.0.3720.5',
488 '72.0.3626.121',
489 '74.0.3718.8',
490 '74.0.3720.4',
491 '74.0.3720.3',
492 '74.0.3718.7',
493 '74.0.3720.2',
494 '74.0.3720.1',
495 '74.0.3720.0',
496 '74.0.3718.6',
497 '74.0.3719.5',
498 '73.0.3683.59',
499 '74.0.3718.5',
500 '74.0.3718.4',
501 '74.0.3719.4',
502 '74.0.3719.3',
503 '74.0.3719.2',
504 '74.0.3719.1',
505 '73.0.3683.58',
506 '74.0.3719.0',
507 '73.0.3683.57',
508 '73.0.3683.56',
509 '74.0.3718.3',
510 '73.0.3683.55',
511 '74.0.3718.2',
512 '74.0.3718.1',
513 '74.0.3718.0',
514 '73.0.3683.54',
515 '74.0.3717.2',
516 '73.0.3683.53',
517 '74.0.3717.1',
518 '74.0.3717.0',
519 '73.0.3683.52',
520 '74.0.3716.1',
521 '74.0.3716.0',
522 '73.0.3683.51',
523 '74.0.3715.1',
524 '74.0.3715.0',
525 '73.0.3683.50',
526 '74.0.3711.2',
527 '74.0.3714.2',
528 '74.0.3713.3',
529 '74.0.3714.1',
530 '74.0.3714.0',
531 '73.0.3683.49',
532 '74.0.3713.1',
533 '74.0.3713.0',
534 '72.0.3626.120',
535 '73.0.3683.48',
536 '74.0.3712.2',
537 '74.0.3712.1',
538 '74.0.3712.0',
539 '73.0.3683.47',
540 '72.0.3626.119',
541 '73.0.3683.46',
542 '74.0.3710.2',
543 '72.0.3626.118',
544 '74.0.3711.1',
545 '74.0.3711.0',
546 '73.0.3683.45',
547 '72.0.3626.117',
548 '74.0.3710.1',
549 '74.0.3710.0',
550 '73.0.3683.44',
551 '72.0.3626.116',
552 '74.0.3709.1',
553 '74.0.3709.0',
554 '74.0.3704.9',
555 '73.0.3683.43',
556 '72.0.3626.115',
557 '74.0.3704.8',
558 '74.0.3704.7',
559 '74.0.3708.0',
560 '74.0.3706.7',
561 '74.0.3704.6',
562 '73.0.3683.42',
563 '72.0.3626.114',
564 '74.0.3706.6',
565 '72.0.3626.113',
566 '74.0.3704.5',
567 '74.0.3706.5',
568 '74.0.3706.4',
569 '74.0.3706.3',
570 '74.0.3706.2',
571 '74.0.3706.1',
572 '74.0.3706.0',
573 '73.0.3683.41',
574 '72.0.3626.112',
575 '74.0.3705.1',
576 '74.0.3705.0',
577 '73.0.3683.40',
578 '72.0.3626.111',
579 '73.0.3683.39',
580 '74.0.3704.4',
581 '73.0.3683.38',
582 '74.0.3704.3',
583 '74.0.3704.2',
584 '74.0.3704.1',
585 '74.0.3704.0',
586 '73.0.3683.37',
587 '72.0.3626.110',
588 '72.0.3626.109',
589 '74.0.3703.3',
590 '74.0.3703.2',
591 '73.0.3683.36',
592 '74.0.3703.1',
593 '74.0.3703.0',
594 '73.0.3683.35',
595 '72.0.3626.108',
596 '74.0.3702.2',
597 '74.0.3699.3',
598 '74.0.3702.1',
599 '74.0.3702.0',
600 '73.0.3683.34',
601 '72.0.3626.107',
602 '73.0.3683.33',
603 '74.0.3701.1',
604 '74.0.3701.0',
605 '73.0.3683.32',
606 '73.0.3683.31',
607 '72.0.3626.105',
608 '74.0.3700.1',
609 '74.0.3700.0',
610 '73.0.3683.29',
611 '72.0.3626.103',
612 '74.0.3699.2',
613 '74.0.3699.1',
614 '74.0.3699.0',
615 '73.0.3683.28',
616 '72.0.3626.102',
617 '73.0.3683.27',
618 '73.0.3683.26',
619 '74.0.3698.0',
620 '74.0.3696.2',
621 '72.0.3626.101',
622 '73.0.3683.25',
623 '74.0.3696.1',
624 '74.0.3696.0',
625 '74.0.3694.8',
626 '72.0.3626.100',
627 '74.0.3694.7',
628 '74.0.3694.6',
629 '74.0.3694.5',
630 '74.0.3694.4',
631 '72.0.3626.99',
632 '72.0.3626.98',
633 '74.0.3694.3',
634 '73.0.3683.24',
635 '72.0.3626.97',
636 '72.0.3626.96',
637 '72.0.3626.95',
638 '73.0.3683.23',
639 '72.0.3626.94',
640 '73.0.3683.22',
641 '73.0.3683.21',
642 '72.0.3626.93',
643 '74.0.3694.2',
644 '72.0.3626.92',
645 '74.0.3694.1',
646 '74.0.3694.0',
647 '74.0.3693.6',
648 '73.0.3683.20',
649 '72.0.3626.91',
650 '74.0.3693.5',
651 '74.0.3693.4',
652 '74.0.3693.3',
653 '74.0.3693.2',
654 '73.0.3683.19',
655 '74.0.3693.1',
656 '74.0.3693.0',
657 '73.0.3683.18',
658 '72.0.3626.90',
659 '74.0.3692.1',
660 '74.0.3692.0',
661 '73.0.3683.17',
662 '72.0.3626.89',
663 '74.0.3687.3',
664 '74.0.3691.1',
665 '74.0.3691.0',
666 '73.0.3683.16',
667 '72.0.3626.88',
668 '72.0.3626.87',
669 '73.0.3683.15',
670 '74.0.3690.1',
671 '74.0.3690.0',
672 '73.0.3683.14',
673 '72.0.3626.86',
674 '73.0.3683.13',
675 '73.0.3683.12',
676 '74.0.3689.1',
677 '74.0.3689.0',
678 '73.0.3683.11',
679 '72.0.3626.85',
680 '73.0.3683.10',
681 '72.0.3626.84',
682 '73.0.3683.9',
683 '74.0.3688.1',
684 '74.0.3688.0',
685 '73.0.3683.8',
686 '72.0.3626.83',
687 '74.0.3687.2',
688 '74.0.3687.1',
689 '74.0.3687.0',
690 '73.0.3683.7',
691 '72.0.3626.82',
692 '74.0.3686.4',
693 '72.0.3626.81',
694 '74.0.3686.3',
695 '74.0.3686.2',
696 '74.0.3686.1',
697 '74.0.3686.0',
698 '73.0.3683.6',
699 '72.0.3626.80',
700 '74.0.3685.1',
701 '74.0.3685.0',
702 '73.0.3683.5',
703 '72.0.3626.79',
704 '74.0.3684.1',
705 '74.0.3684.0',
706 '73.0.3683.4',
707 '72.0.3626.78',
708 '72.0.3626.77',
709 '73.0.3683.3',
710 '73.0.3683.2',
711 '72.0.3626.76',
712 '73.0.3683.1',
713 '73.0.3683.0',
714 '72.0.3626.75',
715 '71.0.3578.141',
716 '73.0.3682.1',
717 '73.0.3682.0',
718 '72.0.3626.74',
719 '71.0.3578.140',
720 '73.0.3681.4',
721 '73.0.3681.3',
722 '73.0.3681.2',
723 '73.0.3681.1',
724 '73.0.3681.0',
725 '72.0.3626.73',
726 '71.0.3578.139',
727 '72.0.3626.72',
728 '72.0.3626.71',
729 '73.0.3680.1',
730 '73.0.3680.0',
731 '72.0.3626.70',
732 '71.0.3578.138',
733 '73.0.3678.2',
734 '73.0.3679.1',
735 '73.0.3679.0',
736 '72.0.3626.69',
737 '71.0.3578.137',
738 '73.0.3678.1',
739 '73.0.3678.0',
740 '71.0.3578.136',
741 '73.0.3677.1',
742 '73.0.3677.0',
743 '72.0.3626.68',
744 '72.0.3626.67',
745 '71.0.3578.135',
746 '73.0.3676.1',
747 '73.0.3676.0',
748 '73.0.3674.2',
749 '72.0.3626.66',
750 '71.0.3578.134',
751 '73.0.3674.1',
752 '73.0.3674.0',
753 '72.0.3626.65',
754 '71.0.3578.133',
755 '73.0.3673.2',
756 '73.0.3673.1',
757 '73.0.3673.0',
758 '72.0.3626.64',
759 '71.0.3578.132',
760 '72.0.3626.63',
761 '72.0.3626.62',
762 '72.0.3626.61',
763 '72.0.3626.60',
764 '73.0.3672.1',
765 '73.0.3672.0',
766 '72.0.3626.59',
767 '71.0.3578.131',
768 '73.0.3671.3',
769 '73.0.3671.2',
770 '73.0.3671.1',
771 '73.0.3671.0',
772 '72.0.3626.58',
773 '71.0.3578.130',
774 '73.0.3670.1',
775 '73.0.3670.0',
776 '72.0.3626.57',
777 '71.0.3578.129',
778 '73.0.3669.1',
779 '73.0.3669.0',
780 '72.0.3626.56',
781 '71.0.3578.128',
782 '73.0.3668.2',
783 '73.0.3668.1',
784 '73.0.3668.0',
785 '72.0.3626.55',
786 '71.0.3578.127',
787 '73.0.3667.2',
788 '73.0.3667.1',
789 '73.0.3667.0',
790 '72.0.3626.54',
791 '71.0.3578.126',
792 '73.0.3666.1',
793 '73.0.3666.0',
794 '72.0.3626.53',
795 '71.0.3578.125',
796 '73.0.3665.4',
797 '73.0.3665.3',
798 '72.0.3626.52',
799 '73.0.3665.2',
800 '73.0.3664.4',
801 '73.0.3665.1',
802 '73.0.3665.0',
803 '72.0.3626.51',
804 '71.0.3578.124',
805 '72.0.3626.50',
806 '73.0.3664.3',
807 '73.0.3664.2',
808 '73.0.3664.1',
809 '73.0.3664.0',
810 '73.0.3663.2',
811 '72.0.3626.49',
812 '71.0.3578.123',
813 '73.0.3663.1',
814 '73.0.3663.0',
815 '72.0.3626.48',
816 '71.0.3578.122',
817 '73.0.3662.1',
818 '73.0.3662.0',
819 '72.0.3626.47',
820 '71.0.3578.121',
821 '73.0.3661.1',
822 '72.0.3626.46',
823 '73.0.3661.0',
824 '72.0.3626.45',
825 '71.0.3578.120',
826 '73.0.3660.2',
827 '73.0.3660.1',
828 '73.0.3660.0',
829 '72.0.3626.44',
830 '71.0.3578.119',
831 '73.0.3659.1',
832 '73.0.3659.0',
833 '72.0.3626.43',
834 '71.0.3578.118',
835 '73.0.3658.1',
836 '73.0.3658.0',
837 '72.0.3626.42',
838 '71.0.3578.117',
839 '73.0.3657.1',
840 '73.0.3657.0',
841 '72.0.3626.41',
842 '71.0.3578.116',
843 '73.0.3656.1',
844 '73.0.3656.0',
845 '72.0.3626.40',
846 '71.0.3578.115',
847 '73.0.3655.1',
848 '73.0.3655.0',
849 '72.0.3626.39',
850 '71.0.3578.114',
851 '73.0.3654.1',
852 '73.0.3654.0',
853 '72.0.3626.38',
854 '71.0.3578.113',
855 '73.0.3653.1',
856 '73.0.3653.0',
857 '72.0.3626.37',
858 '71.0.3578.112',
859 '73.0.3652.1',
860 '73.0.3652.0',
861 '72.0.3626.36',
862 '71.0.3578.111',
863 '73.0.3651.1',
864 '73.0.3651.0',
865 '72.0.3626.35',
866 '71.0.3578.110',
867 '73.0.3650.1',
868 '73.0.3650.0',
869 '72.0.3626.34',
870 '71.0.3578.109',
871 '73.0.3649.1',
872 '73.0.3649.0',
873 '72.0.3626.33',
874 '71.0.3578.108',
875 '73.0.3648.2',
876 '73.0.3648.1',
877 '73.0.3648.0',
878 '72.0.3626.32',
879 '71.0.3578.107',
880 '73.0.3647.2',
881 '73.0.3647.1',
882 '73.0.3647.0',
883 '72.0.3626.31',
884 '71.0.3578.106',
885 '73.0.3635.3',
886 '73.0.3646.2',
887 '73.0.3646.1',
888 '73.0.3646.0',
889 '72.0.3626.30',
890 '71.0.3578.105',
891 '72.0.3626.29',
892 '73.0.3645.2',
893 '73.0.3645.1',
894 '73.0.3645.0',
895 '72.0.3626.28',
896 '71.0.3578.104',
897 '72.0.3626.27',
898 '72.0.3626.26',
899 '72.0.3626.25',
900 '72.0.3626.24',
901 '73.0.3644.0',
902 '73.0.3643.2',
903 '72.0.3626.23',
904 '71.0.3578.103',
905 '73.0.3643.1',
906 '73.0.3643.0',
907 '72.0.3626.22',
908 '71.0.3578.102',
909 '73.0.3642.1',
910 '73.0.3642.0',
911 '72.0.3626.21',
912 '71.0.3578.101',
913 '73.0.3641.1',
914 '73.0.3641.0',
915 '72.0.3626.20',
916 '71.0.3578.100',
917 '72.0.3626.19',
918 '73.0.3640.1',
919 '73.0.3640.0',
920 '72.0.3626.18',
921 '73.0.3639.1',
922 '71.0.3578.99',
923 '73.0.3639.0',
924 '72.0.3626.17',
925 '73.0.3638.2',
926 '72.0.3626.16',
927 '73.0.3638.1',
928 '73.0.3638.0',
929 '72.0.3626.15',
930 '71.0.3578.98',
931 '73.0.3635.2',
932 '71.0.3578.97',
933 '73.0.3637.1',
934 '73.0.3637.0',
935 '72.0.3626.14',
936 '71.0.3578.96',
937 '71.0.3578.95',
938 '72.0.3626.13',
939 '71.0.3578.94',
940 '73.0.3636.2',
941 '71.0.3578.93',
942 '73.0.3636.1',
943 '73.0.3636.0',
944 '72.0.3626.12',
945 '71.0.3578.92',
946 '73.0.3635.1',
947 '73.0.3635.0',
948 '72.0.3626.11',
949 '71.0.3578.91',
950 '73.0.3634.2',
951 '73.0.3634.1',
952 '73.0.3634.0',
953 '72.0.3626.10',
954 '71.0.3578.90',
955 '71.0.3578.89',
956 '73.0.3633.2',
957 '73.0.3633.1',
958 '73.0.3633.0',
959 '72.0.3610.4',
960 '72.0.3626.9',
961 '71.0.3578.88',
962 '73.0.3632.5',
963 '73.0.3632.4',
964 '73.0.3632.3',
965 '73.0.3632.2',
966 '73.0.3632.1',
967 '73.0.3632.0',
968 '72.0.3626.8',
969 '71.0.3578.87',
970 '73.0.3631.2',
971 '73.0.3631.1',
972 '73.0.3631.0',
973 '72.0.3626.7',
974 '71.0.3578.86',
975 '72.0.3626.6',
976 '73.0.3630.1',
977 '73.0.3630.0',
978 '72.0.3626.5',
979 '71.0.3578.85',
980 '72.0.3626.4',
981 '73.0.3628.3',
982 '73.0.3628.2',
983 '73.0.3629.1',
984 '73.0.3629.0',
985 '72.0.3626.3',
986 '71.0.3578.84',
987 '73.0.3628.1',
988 '73.0.3628.0',
989 '71.0.3578.83',
990 '73.0.3627.1',
991 '73.0.3627.0',
992 '72.0.3626.2',
993 '71.0.3578.82',
994 '71.0.3578.81',
995 '71.0.3578.80',
996 '72.0.3626.1',
997 '72.0.3626.0',
998 '71.0.3578.79',
999 '70.0.3538.124',
1000 '71.0.3578.78',
1001 '72.0.3623.4',
1002 '72.0.3625.2',
1003 '72.0.3625.1',
1004 '72.0.3625.0',
1005 '71.0.3578.77',
1006 '70.0.3538.123',
1007 '72.0.3624.4',
1008 '72.0.3624.3',
1009 '72.0.3624.2',
1010 '71.0.3578.76',
1011 '72.0.3624.1',
1012 '72.0.3624.0',
1013 '72.0.3623.3',
1014 '71.0.3578.75',
1015 '70.0.3538.122',
1016 '71.0.3578.74',
1017 '72.0.3623.2',
1018 '72.0.3610.3',
1019 '72.0.3623.1',
1020 '72.0.3623.0',
1021 '72.0.3622.3',
1022 '72.0.3622.2',
1023 '71.0.3578.73',
1024 '70.0.3538.121',
1025 '72.0.3622.1',
1026 '72.0.3622.0',
1027 '71.0.3578.72',
1028 '70.0.3538.120',
1029 '72.0.3621.1',
1030 '72.0.3621.0',
1031 '71.0.3578.71',
1032 '70.0.3538.119',
1033 '72.0.3620.1',
1034 '72.0.3620.0',
1035 '71.0.3578.70',
1036 '70.0.3538.118',
1037 '71.0.3578.69',
1038 '72.0.3619.1',
1039 '72.0.3619.0',
1040 '71.0.3578.68',
1041 '70.0.3538.117',
1042 '71.0.3578.67',
1043 '72.0.3618.1',
1044 '72.0.3618.0',
1045 '71.0.3578.66',
1046 '70.0.3538.116',
1047 '72.0.3617.1',
1048 '72.0.3617.0',
1049 '71.0.3578.65',
1050 '70.0.3538.115',
1051 '72.0.3602.3',
1052 '71.0.3578.64',
1053 '72.0.3616.1',
1054 '72.0.3616.0',
1055 '71.0.3578.63',
1056 '70.0.3538.114',
1057 '71.0.3578.62',
1058 '72.0.3615.1',
1059 '72.0.3615.0',
1060 '71.0.3578.61',
1061 '70.0.3538.113',
1062 '72.0.3614.1',
1063 '72.0.3614.0',
1064 '71.0.3578.60',
1065 '70.0.3538.112',
1066 '72.0.3613.1',
1067 '72.0.3613.0',
1068 '71.0.3578.59',
1069 '70.0.3538.111',
1070 '72.0.3612.2',
1071 '72.0.3612.1',
1072 '72.0.3612.0',
1073 '70.0.3538.110',
1074 '71.0.3578.58',
1075 '70.0.3538.109',
1076 '72.0.3611.2',
1077 '72.0.3611.1',
1078 '72.0.3611.0',
1079 '71.0.3578.57',
1080 '70.0.3538.108',
1081 '72.0.3610.2',
1082 '71.0.3578.56',
1083 '71.0.3578.55',
1084 '72.0.3610.1',
1085 '72.0.3610.0',
1086 '71.0.3578.54',
1087 '70.0.3538.107',
1088 '71.0.3578.53',
1089 '72.0.3609.3',
1090 '71.0.3578.52',
1091 '72.0.3609.2',
1092 '71.0.3578.51',
1093 '72.0.3608.5',
1094 '72.0.3609.1',
1095 '72.0.3609.0',
1096 '71.0.3578.50',
1097 '70.0.3538.106',
1098 '72.0.3608.4',
1099 '72.0.3608.3',
1100 '72.0.3608.2',
1101 '71.0.3578.49',
1102 '72.0.3608.1',
1103 '72.0.3608.0',
1104 '70.0.3538.105',
1105 '71.0.3578.48',
1106 '72.0.3607.1',
1107 '72.0.3607.0',
1108 '71.0.3578.47',
1109 '70.0.3538.104',
1110 '72.0.3606.2',
1111 '72.0.3606.1',
1112 '72.0.3606.0',
1113 '71.0.3578.46',
1114 '70.0.3538.103',
1115 '70.0.3538.102',
1116 '72.0.3605.3',
1117 '72.0.3605.2',
1118 '72.0.3605.1',
1119 '72.0.3605.0',
1120 '71.0.3578.45',
1121 '70.0.3538.101',
1122 '71.0.3578.44',
1123 '71.0.3578.43',
1124 '70.0.3538.100',
1125 '70.0.3538.99',
1126 '71.0.3578.42',
1127 '72.0.3604.1',
1128 '72.0.3604.0',
1129 '71.0.3578.41',
1130 '70.0.3538.98',
1131 '71.0.3578.40',
1132 '72.0.3603.2',
1133 '72.0.3603.1',
1134 '72.0.3603.0',
1135 '71.0.3578.39',
1136 '70.0.3538.97',
1137 '72.0.3602.2',
1138 '71.0.3578.38',
1139 '71.0.3578.37',
1140 '72.0.3602.1',
1141 '72.0.3602.0',
1142 '71.0.3578.36',
1143 '70.0.3538.96',
1144 '72.0.3601.1',
1145 '72.0.3601.0',
1146 '71.0.3578.35',
1147 '70.0.3538.95',
1148 '72.0.3600.1',
1149 '72.0.3600.0',
1150 '71.0.3578.34',
1151 '70.0.3538.94',
1152 '72.0.3599.3',
1153 '72.0.3599.2',
1154 '72.0.3599.1',
1155 '72.0.3599.0',
1156 '71.0.3578.33',
1157 '70.0.3538.93',
1158 '72.0.3598.1',
1159 '72.0.3598.0',
1160 '71.0.3578.32',
1161 '70.0.3538.87',
1162 '72.0.3597.1',
1163 '72.0.3597.0',
1164 '72.0.3596.2',
1165 '71.0.3578.31',
1166 '70.0.3538.86',
1167 '71.0.3578.30',
1168 '71.0.3578.29',
1169 '72.0.3596.1',
1170 '72.0.3596.0',
1171 '71.0.3578.28',
1172 '70.0.3538.85',
1173 '72.0.3595.2',
1174 '72.0.3591.3',
1175 '72.0.3595.1',
1176 '72.0.3595.0',
1177 '71.0.3578.27',
1178 '70.0.3538.84',
1179 '72.0.3594.1',
1180 '72.0.3594.0',
1181 '71.0.3578.26',
1182 '70.0.3538.83',
1183 '72.0.3593.2',
1184 '72.0.3593.1',
1185 '72.0.3593.0',
1186 '71.0.3578.25',
1187 '70.0.3538.82',
1188 '72.0.3589.3',
1189 '72.0.3592.2',
1190 '72.0.3592.1',
1191 '72.0.3592.0',
1192 '71.0.3578.24',
1193 '72.0.3589.2',
1194 '70.0.3538.81',
1195 '70.0.3538.80',
1196 '72.0.3591.2',
1197 '72.0.3591.1',
1198 '72.0.3591.0',
1199 '71.0.3578.23',
1200 '70.0.3538.79',
1201 '71.0.3578.22',
1202 '72.0.3590.1',
1203 '72.0.3590.0',
1204 '71.0.3578.21',
1205 '70.0.3538.78',
1206 '70.0.3538.77',
1207 '72.0.3589.1',
1208 '72.0.3589.0',
1209 '71.0.3578.20',
1210 '70.0.3538.76',
1211 '71.0.3578.19',
1212 '70.0.3538.75',
1213 '72.0.3588.1',
1214 '72.0.3588.0',
1215 '71.0.3578.18',
1216 '70.0.3538.74',
1217 '72.0.3586.2',
1218 '72.0.3587.0',
1219 '71.0.3578.17',
1220 '70.0.3538.73',
1221 '72.0.3586.1',
1222 '72.0.3586.0',
1223 '71.0.3578.16',
1224 '70.0.3538.72',
1225 '72.0.3585.1',
1226 '72.0.3585.0',
1227 '71.0.3578.15',
1228 '70.0.3538.71',
1229 '71.0.3578.14',
1230 '72.0.3584.1',
1231 '72.0.3584.0',
1232 '71.0.3578.13',
1233 '70.0.3538.70',
1234 '72.0.3583.2',
1235 '71.0.3578.12',
1236 '72.0.3583.1',
1237 '72.0.3583.0',
1238 '71.0.3578.11',
1239 '70.0.3538.69',
1240 '71.0.3578.10',
1241 '72.0.3582.0',
1242 '72.0.3581.4',
1243 '71.0.3578.9',
1244 '70.0.3538.67',
1245 '72.0.3581.3',
1246 '72.0.3581.2',
1247 '72.0.3581.1',
1248 '72.0.3581.0',
1249 '71.0.3578.8',
1250 '70.0.3538.66',
1251 '72.0.3580.1',
1252 '72.0.3580.0',
1253 '71.0.3578.7',
1254 '70.0.3538.65',
1255 '71.0.3578.6',
1256 '72.0.3579.1',
1257 '72.0.3579.0',
1258 '71.0.3578.5',
1259 '70.0.3538.64',
1260 '71.0.3578.4',
1261 '71.0.3578.3',
1262 '71.0.3578.2',
1263 '71.0.3578.1',
1264 '71.0.3578.0',
1265 '70.0.3538.63',
1266 '69.0.3497.128',
1267 '70.0.3538.62',
1268 '70.0.3538.61',
1269 '70.0.3538.60',
1270 '70.0.3538.59',
1271 '71.0.3577.1',
1272 '71.0.3577.0',
1273 '70.0.3538.58',
1274 '69.0.3497.127',
1275 '71.0.3576.2',
1276 '71.0.3576.1',
1277 '71.0.3576.0',
1278 '70.0.3538.57',
1279 '70.0.3538.56',
1280 '71.0.3575.2',
1281 '70.0.3538.55',
1282 '69.0.3497.126',
1283 '70.0.3538.54',
1284 '71.0.3575.1',
1285 '71.0.3575.0',
1286 '71.0.3574.1',
1287 '71.0.3574.0',
1288 '70.0.3538.53',
1289 '69.0.3497.125',
1290 '70.0.3538.52',
1291 '71.0.3573.1',
1292 '71.0.3573.0',
1293 '70.0.3538.51',
1294 '69.0.3497.124',
1295 '71.0.3572.1',
1296 '71.0.3572.0',
1297 '70.0.3538.50',
1298 '69.0.3497.123',
1299 '71.0.3571.2',
1300 '70.0.3538.49',
1301 '69.0.3497.122',
1302 '71.0.3571.1',
1303 '71.0.3571.0',
1304 '70.0.3538.48',
1305 '69.0.3497.121',
1306 '71.0.3570.1',
1307 '71.0.3570.0',
1308 '70.0.3538.47',
1309 '69.0.3497.120',
1310 '71.0.3568.2',
1311 '71.0.3569.1',
1312 '71.0.3569.0',
1313 '70.0.3538.46',
1314 '69.0.3497.119',
1315 '70.0.3538.45',
1316 '71.0.3568.1',
1317 '71.0.3568.0',
1318 '70.0.3538.44',
1319 '69.0.3497.118',
1320 '70.0.3538.43',
1321 '70.0.3538.42',
1322 '71.0.3567.1',
1323 '71.0.3567.0',
1324 '70.0.3538.41',
1325 '69.0.3497.117',
1326 '71.0.3566.1',
1327 '71.0.3566.0',
1328 '70.0.3538.40',
1329 '69.0.3497.116',
1330 '71.0.3565.1',
1331 '71.0.3565.0',
1332 '70.0.3538.39',
1333 '69.0.3497.115',
1334 '71.0.3564.1',
1335 '71.0.3564.0',
1336 '70.0.3538.38',
1337 '69.0.3497.114',
1338 '71.0.3563.0',
1339 '71.0.3562.2',
1340 '70.0.3538.37',
1341 '69.0.3497.113',
1342 '70.0.3538.36',
1343 '70.0.3538.35',
1344 '71.0.3562.1',
1345 '71.0.3562.0',
1346 '70.0.3538.34',
1347 '69.0.3497.112',
1348 '70.0.3538.33',
1349 '71.0.3561.1',
1350 '71.0.3561.0',
1351 '70.0.3538.32',
1352 '69.0.3497.111',
1353 '71.0.3559.6',
1354 '71.0.3560.1',
1355 '71.0.3560.0',
1356 '71.0.3559.5',
1357 '71.0.3559.4',
1358 '70.0.3538.31',
1359 '69.0.3497.110',
1360 '71.0.3559.3',
1361 '70.0.3538.30',
1362 '69.0.3497.109',
1363 '71.0.3559.2',
1364 '71.0.3559.1',
1365 '71.0.3559.0',
1366 '70.0.3538.29',
1367 '69.0.3497.108',
1368 '71.0.3558.2',
1369 '71.0.3558.1',
1370 '71.0.3558.0',
1371 '70.0.3538.28',
1372 '69.0.3497.107',
1373 '71.0.3557.2',
1374 '71.0.3557.1',
1375 '71.0.3557.0',
1376 '70.0.3538.27',
1377 '69.0.3497.106',
1378 '71.0.3554.4',
1379 '70.0.3538.26',
1380 '71.0.3556.1',
1381 '71.0.3556.0',
1382 '70.0.3538.25',
1383 '71.0.3554.3',
1384 '69.0.3497.105',
1385 '71.0.3554.2',
1386 '70.0.3538.24',
1387 '69.0.3497.104',
1388 '71.0.3555.2',
1389 '70.0.3538.23',
1390 '71.0.3555.1',
1391 '71.0.3555.0',
1392 '70.0.3538.22',
1393 '69.0.3497.103',
1394 '71.0.3554.1',
1395 '71.0.3554.0',
1396 '70.0.3538.21',
1397 '69.0.3497.102',
1398 '71.0.3553.3',
1399 '70.0.3538.20',
1400 '69.0.3497.101',
1401 '71.0.3553.2',
1402 '69.0.3497.100',
1403 '71.0.3553.1',
1404 '71.0.3553.0',
1405 '70.0.3538.19',
1406 '69.0.3497.99',
1407 '69.0.3497.98',
1408 '69.0.3497.97',
1409 '71.0.3552.6',
1410 '71.0.3552.5',
1411 '71.0.3552.4',
1412 '71.0.3552.3',
1413 '71.0.3552.2',
1414 '71.0.3552.1',
1415 '71.0.3552.0',
1416 '70.0.3538.18',
1417 '69.0.3497.96',
1418 '71.0.3551.3',
1419 '71.0.3551.2',
1420 '71.0.3551.1',
1421 '71.0.3551.0',
1422 '70.0.3538.17',
1423 '69.0.3497.95',
1424 '71.0.3550.3',
1425 '71.0.3550.2',
1426 '71.0.3550.1',
1427 '71.0.3550.0',
1428 '70.0.3538.16',
1429 '69.0.3497.94',
1430 '71.0.3549.1',
1431 '71.0.3549.0',
1432 '70.0.3538.15',
1433 '69.0.3497.93',
1434 '69.0.3497.92',
1435 '71.0.3548.1',
1436 '71.0.3548.0',
1437 '70.0.3538.14',
1438 '69.0.3497.91',
1439 '71.0.3547.1',
1440 '71.0.3547.0',
1441 '70.0.3538.13',
1442 '69.0.3497.90',
1443 '71.0.3546.2',
1444 '69.0.3497.89',
1445 '71.0.3546.1',
1446 '71.0.3546.0',
1447 '70.0.3538.12',
1448 '69.0.3497.88',
1449 '71.0.3545.4',
1450 '71.0.3545.3',
1451 '71.0.3545.2',
1452 '71.0.3545.1',
1453 '71.0.3545.0',
1454 '70.0.3538.11',
1455 '69.0.3497.87',
1456 '71.0.3544.5',
1457 '71.0.3544.4',
1458 '71.0.3544.3',
1459 '71.0.3544.2',
1460 '71.0.3544.1',
1461 '71.0.3544.0',
1462 '69.0.3497.86',
1463 '70.0.3538.10',
1464 '69.0.3497.85',
1465 '70.0.3538.9',
1466 '69.0.3497.84',
1467 '71.0.3543.4',
1468 '70.0.3538.8',
1469 '71.0.3543.3',
1470 '71.0.3543.2',
1471 '71.0.3543.1',
1472 '71.0.3543.0',
1473 '70.0.3538.7',
1474 '69.0.3497.83',
1475 '71.0.3542.2',
1476 '71.0.3542.1',
1477 '71.0.3542.0',
1478 '70.0.3538.6',
1479 '69.0.3497.82',
1480 '69.0.3497.81',
1481 '71.0.3541.1',
1482 '71.0.3541.0',
1483 '70.0.3538.5',
1484 '69.0.3497.80',
1485 '71.0.3540.1',
1486 '71.0.3540.0',
1487 '70.0.3538.4',
1488 '69.0.3497.79',
1489 '70.0.3538.3',
1490 '71.0.3539.1',
1491 '71.0.3539.0',
1492 '69.0.3497.78',
1493 '68.0.3440.134',
1494 '69.0.3497.77',
1495 '70.0.3538.2',
1496 '70.0.3538.1',
1497 '70.0.3538.0',
1498 '69.0.3497.76',
1499 '68.0.3440.133',
1500 '69.0.3497.75',
1501 '70.0.3537.2',
1502 '70.0.3537.1',
1503 '70.0.3537.0',
1504 '69.0.3497.74',
1505 '68.0.3440.132',
1506 '70.0.3536.0',
1507 '70.0.3535.5',
1508 '70.0.3535.4',
1509 '70.0.3535.3',
1510 '69.0.3497.73',
1511 '68.0.3440.131',
1512 '70.0.3532.8',
1513 '70.0.3532.7',
1514 '69.0.3497.72',
1515 '69.0.3497.71',
1516 '70.0.3535.2',
1517 '70.0.3535.1',
1518 '70.0.3535.0',
1519 '69.0.3497.70',
1520 '68.0.3440.130',
1521 '69.0.3497.69',
1522 '68.0.3440.129',
1523 '70.0.3534.4',
1524 '70.0.3534.3',
1525 '70.0.3534.2',
1526 '70.0.3534.1',
1527 '70.0.3534.0',
1528 '69.0.3497.68',
1529 '68.0.3440.128',
1530 '70.0.3533.2',
1531 '70.0.3533.1',
1532 '70.0.3533.0',
1533 '69.0.3497.67',
1534 '68.0.3440.127',
1535 '70.0.3532.6',
1536 '70.0.3532.5',
1537 '70.0.3532.4',
1538 '69.0.3497.66',
1539 '68.0.3440.126',
1540 '70.0.3532.3',
1541 '70.0.3532.2',
1542 '70.0.3532.1',
1543 '69.0.3497.60',
1544 '69.0.3497.65',
1545 '69.0.3497.64',
1546 '70.0.3532.0',
1547 '70.0.3531.0',
1548 '70.0.3530.4',
1549 '70.0.3530.3',
1550 '70.0.3530.2',
1551 '69.0.3497.58',
1552 '68.0.3440.125',
1553 '69.0.3497.57',
1554 '69.0.3497.56',
1555 '69.0.3497.55',
1556 '69.0.3497.54',
1557 '70.0.3530.1',
1558 '70.0.3530.0',
1559 '69.0.3497.53',
1560 '68.0.3440.124',
1561 '69.0.3497.52',
1562 '70.0.3529.3',
1563 '70.0.3529.2',
1564 '70.0.3529.1',
1565 '70.0.3529.0',
1566 '69.0.3497.51',
1567 '70.0.3528.4',
1568 '68.0.3440.123',
1569 '70.0.3528.3',
1570 '70.0.3528.2',
1571 '70.0.3528.1',
1572 '70.0.3528.0',
1573 '69.0.3497.50',
1574 '68.0.3440.122',
1575 '70.0.3527.1',
1576 '70.0.3527.0',
1577 '69.0.3497.49',
1578 '68.0.3440.121',
1579 '70.0.3526.1',
1580 '70.0.3526.0',
1581 '68.0.3440.120',
1582 '69.0.3497.48',
1583 '69.0.3497.47',
1584 '68.0.3440.119',
1585 '68.0.3440.118',
1586 '70.0.3525.5',
1587 '70.0.3525.4',
1588 '70.0.3525.3',
1589 '68.0.3440.117',
1590 '69.0.3497.46',
1591 '70.0.3525.2',
1592 '70.0.3525.1',
1593 '70.0.3525.0',
1594 '69.0.3497.45',
1595 '68.0.3440.116',
1596 '70.0.3524.4',
1597 '70.0.3524.3',
1598 '69.0.3497.44',
1599 '70.0.3524.2',
1600 '70.0.3524.1',
1601 '70.0.3524.0',
1602 '70.0.3523.2',
1603 '69.0.3497.43',
1604 '68.0.3440.115',
1605 '70.0.3505.9',
1606 '69.0.3497.42',
1607 '70.0.3505.8',
1608 '70.0.3523.1',
1609 '70.0.3523.0',
1610 '69.0.3497.41',
1611 '68.0.3440.114',
1612 '70.0.3505.7',
1613 '69.0.3497.40',
1614 '70.0.3522.1',
1615 '70.0.3522.0',
1616 '70.0.3521.2',
1617 '69.0.3497.39',
1618 '68.0.3440.113',
1619 '70.0.3505.6',
1620 '70.0.3521.1',
1621 '70.0.3521.0',
1622 '69.0.3497.38',
1623 '68.0.3440.112',
1624 '70.0.3520.1',
1625 '70.0.3520.0',
1626 '69.0.3497.37',
1627 '68.0.3440.111',
1628 '70.0.3519.3',
1629 '70.0.3519.2',
1630 '70.0.3519.1',
1631 '70.0.3519.0',
1632 '69.0.3497.36',
1633 '68.0.3440.110',
1634 '70.0.3518.1',
1635 '70.0.3518.0',
1636 '69.0.3497.35',
1637 '69.0.3497.34',
1638 '68.0.3440.109',
1639 '70.0.3517.1',
1640 '70.0.3517.0',
1641 '69.0.3497.33',
1642 '68.0.3440.108',
1643 '69.0.3497.32',
1644 '70.0.3516.3',
1645 '70.0.3516.2',
1646 '70.0.3516.1',
1647 '70.0.3516.0',
1648 '69.0.3497.31',
1649 '68.0.3440.107',
1650 '70.0.3515.4',
1651 '68.0.3440.106',
1652 '70.0.3515.3',
1653 '70.0.3515.2',
1654 '70.0.3515.1',
1655 '70.0.3515.0',
1656 '69.0.3497.30',
1657 '68.0.3440.105',
1658 '68.0.3440.104',
1659 '70.0.3514.2',
1660 '70.0.3514.1',
1661 '70.0.3514.0',
1662 '69.0.3497.29',
1663 '68.0.3440.103',
1664 '70.0.3513.1',
1665 '70.0.3513.0',
1666 '69.0.3497.28',
1667 )
1668 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1669
1670
1671 std_headers = {
1672 'User-Agent': random_user_agent(),
1673 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1674 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1675 'Accept-Encoding': 'gzip, deflate',
1676 'Accept-Language': 'en-us,en;q=0.5',
1677 }
1678
1679
1680 USER_AGENTS = {
1681 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1682 }
1683
1684
1685 NO_DEFAULT = object()
1686
1687 ENGLISH_MONTH_NAMES = [
1688 'January', 'February', 'March', 'April', 'May', 'June',
1689 'July', 'August', 'September', 'October', 'November', 'December']
1690
1691 MONTH_NAMES = {
1692 'en': ENGLISH_MONTH_NAMES,
1693 'fr': [
1694 'janvier', 'fƩvrier', 'mars', 'avril', 'mai', 'juin',
1695 'juillet', 'aoƻt', 'septembre', 'octobre', 'novembre', 'dƩcembre'],
1696 }
1697
1698 KNOWN_EXTENSIONS = (
1699 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1700 'flv', 'f4v', 'f4a', 'f4b',
1701 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1702 'mkv', 'mka', 'mk3d',
1703 'avi', 'divx',
1704 'mov',
1705 'asf', 'wmv', 'wma',
1706 '3gp', '3g2',
1707 'mp3',
1708 'flac',
1709 'ape',
1710 'wav',
1711 'f4f', 'f4m', 'm3u8', 'smil')
1712
1713 # needed for sanitizing filenames in restricted mode
1714 ACCENT_CHARS = dict(zip('Ć‚ĆƒĆ„Ć€ĆĆ…Ć†Ć‡ĆˆĆ‰ĆŠĆ‹ĆŒĆĆŽĆĆĆ‘Ć’Ć“Ć”Ć•Ć–ÅĆ˜Å’Ć™ĆšĆ›ĆœÅ°ĆĆžĆŸĆ Ć”Ć¢Ć£Ć¤Ć„Ć¦Ć§ĆØĆ©ĆŖƫƬƭƮĆÆĆ°Ć±Ć²Ć³Ć“ĆµĆ¶Å‘ĆøÅ“Ć¹ĆŗĆ»Ć¼Å±Ć½Ć¾Ćæ',
1715 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1716 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1717
1718 DATE_FORMATS = (
1719 '%d %B %Y',
1720 '%d %b %Y',
1721 '%B %d %Y',
1722 '%B %dst %Y',
1723 '%B %dnd %Y',
1724 '%B %drd %Y',
1725 '%B %dth %Y',
1726 '%b %d %Y',
1727 '%b %dst %Y',
1728 '%b %dnd %Y',
1729 '%b %drd %Y',
1730 '%b %dth %Y',
1731 '%b %dst %Y %I:%M',
1732 '%b %dnd %Y %I:%M',
1733 '%b %drd %Y %I:%M',
1734 '%b %dth %Y %I:%M',
1735 '%Y %m %d',
1736 '%Y-%m-%d',
1737 '%Y/%m/%d',
1738 '%Y/%m/%d %H:%M',
1739 '%Y/%m/%d %H:%M:%S',
1740 '%Y-%m-%d %H:%M',
1741 '%Y-%m-%d %H:%M:%S',
1742 '%Y-%m-%d %H:%M:%S.%f',
1743 '%d.%m.%Y %H:%M',
1744 '%d.%m.%Y %H.%M',
1745 '%Y-%m-%dT%H:%M:%SZ',
1746 '%Y-%m-%dT%H:%M:%S.%fZ',
1747 '%Y-%m-%dT%H:%M:%S.%f0Z',
1748 '%Y-%m-%dT%H:%M:%S',
1749 '%Y-%m-%dT%H:%M:%S.%f',
1750 '%Y-%m-%dT%H:%M',
1751 '%b %d %Y at %H:%M',
1752 '%b %d %Y at %H:%M:%S',
1753 '%B %d %Y at %H:%M',
1754 '%B %d %Y at %H:%M:%S',
1755 )
1756
1757 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1758 DATE_FORMATS_DAY_FIRST.extend([
1759 '%d-%m-%Y',
1760 '%d.%m.%Y',
1761 '%d.%m.%y',
1762 '%d/%m/%Y',
1763 '%d/%m/%y',
1764 '%d/%m/%Y %H:%M:%S',
1765 ])
1766
1767 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1768 DATE_FORMATS_MONTH_FIRST.extend([
1769 '%m-%d-%Y',
1770 '%m.%d.%Y',
1771 '%m/%d/%Y',
1772 '%m/%d/%y',
1773 '%m/%d/%Y %H:%M:%S',
1774 ])
1775
1776 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1777 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1778
1779
1780 def preferredencoding():
1781 """Get preferred encoding.
1782
1783 Returns the best encoding scheme for the system, based on
1784 locale.getpreferredencoding() and some further tweaks.
1785 """
1786 try:
1787 pref = locale.getpreferredencoding()
1788 'TEST'.encode(pref)
1789 except Exception:
1790 pref = 'UTF-8'
1791
1792 return pref
1793
1794
1795 def write_json_file(obj, fn):
1796 """ Encode obj as JSON and write it to fn, atomically if possible """
1797
1798 fn = encodeFilename(fn)
1799 if sys.version_info < (3, 0) and sys.platform != 'win32':
1800 encoding = get_filesystem_encoding()
1801 # os.path.basename returns a bytes object, but NamedTemporaryFile
1802 # will fail if the filename contains non ascii characters unless we
1803 # use a unicode object
1804 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1805 # the same for os.path.dirname
1806 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1807 else:
1808 path_basename = os.path.basename
1809 path_dirname = os.path.dirname
1810
1811 args = {
1812 'suffix': '.tmp',
1813 'prefix': path_basename(fn) + '.',
1814 'dir': path_dirname(fn),
1815 'delete': False,
1816 }
1817
1818 # In Python 2.x, json.dump expects a bytestream.
1819 # In Python 3.x, it writes to a character stream
1820 if sys.version_info < (3, 0):
1821 args['mode'] = 'wb'
1822 else:
1823 args.update({
1824 'mode': 'w',
1825 'encoding': 'utf-8',
1826 })
1827
1828 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1829
1830 try:
1831 with tf:
1832 json.dump(obj, tf)
1833 if sys.platform == 'win32':
1834 # Need to remove existing file on Windows, else os.rename raises
1835 # WindowsError or FileExistsError.
1836 try:
1837 os.unlink(fn)
1838 except OSError:
1839 pass
1840 try:
1841 mask = os.umask(0)
1842 os.umask(mask)
1843 os.chmod(tf.name, 0o666 & ~mask)
1844 except OSError:
1845 pass
1846 os.rename(tf.name, fn)
1847 except Exception:
1848 try:
1849 os.remove(tf.name)
1850 except OSError:
1851 pass
1852 raise
1853
1854
1855 if sys.version_info >= (2, 7):
1856 def find_xpath_attr(node, xpath, key, val=None):
1857 """ Find the xpath xpath[@key=val] """
1858 assert re.match(r'^[a-zA-Z_-]+$', key)
1859 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1860 return node.find(expr)
1861 else:
1862 def find_xpath_attr(node, xpath, key, val=None):
1863 for f in node.findall(compat_xpath(xpath)):
1864 if key not in f.attrib:
1865 continue
1866 if val is None or f.attrib.get(key) == val:
1867 return f
1868 return None
1869
1870 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1871 # the namespace parameter
1872
1873
1874 def xpath_with_ns(path, ns_map):
1875 components = [c.split(':') for c in path.split('/')]
1876 replaced = []
1877 for c in components:
1878 if len(c) == 1:
1879 replaced.append(c[0])
1880 else:
1881 ns, tag = c
1882 replaced.append('{%s}%s' % (ns_map[ns], tag))
1883 return '/'.join(replaced)
1884
1885
1886 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1887 def _find_xpath(xpath):
1888 return node.find(compat_xpath(xpath))
1889
1890 if isinstance(xpath, (str, compat_str)):
1891 n = _find_xpath(xpath)
1892 else:
1893 for xp in xpath:
1894 n = _find_xpath(xp)
1895 if n is not None:
1896 break
1897
1898 if n is None:
1899 if default is not NO_DEFAULT:
1900 return default
1901 elif fatal:
1902 name = xpath if name is None else name
1903 raise ExtractorError('Could not find XML element %s' % name)
1904 else:
1905 return None
1906 return n
1907
1908
1909 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1910 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1911 if n is None or n == default:
1912 return n
1913 if n.text is None:
1914 if default is not NO_DEFAULT:
1915 return default
1916 elif fatal:
1917 name = xpath if name is None else name
1918 raise ExtractorError('Could not find XML element\'s text %s' % name)
1919 else:
1920 return None
1921 return n.text
1922
1923
1924 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1925 n = find_xpath_attr(node, xpath, key)
1926 if n is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = '%s[@%s]' % (xpath, key) if name is None else name
1931 raise ExtractorError('Could not find XML attribute %s' % name)
1932 else:
1933 return None
1934 return n.attrib[key]
1935
1936
1937 def get_element_by_id(id, html):
1938 """Return the content of the tag with the specified ID in the passed HTML document"""
1939 return get_element_by_attribute('id', id, html)
1940
1941
1942 def get_element_by_class(class_name, html):
1943 """Return the content of the first tag with the specified class in the passed HTML document"""
1944 retval = get_elements_by_class(class_name, html)
1945 return retval[0] if retval else None
1946
1947
1948 def get_element_by_attribute(attribute, value, html, escape_value=True):
1949 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1950 return retval[0] if retval else None
1951
1952
1953 def get_elements_by_class(class_name, html):
1954 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1955 return get_elements_by_attribute(
1956 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1957 html, escape_value=False)
1958
1959
1960 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1961 """Return the content of the tag with the specified attribute in the passed HTML document"""
1962
1963 value = re.escape(value) if escape_value else value
1964
1965 retlist = []
1966 for m in re.finditer(r'''(?xs)
1967 <([a-zA-Z0-9:._-]+)
1968 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1969 \s+%s=['"]?%s['"]?
1970 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1971 \s*>
1972 (?P<content>.*?)
1973 </\1>
1974 ''' % (re.escape(attribute), value), html):
1975 res = m.group('content')
1976
1977 if res.startswith('"') or res.startswith("'"):
1978 res = res[1:-1]
1979
1980 retlist.append(unescapeHTML(res))
1981
1982 return retlist
1983
1984
1985 class HTMLAttributeParser(compat_HTMLParser):
1986 """Trivial HTML parser to gather the attributes for a single element"""
1987 def __init__(self):
1988 self.attrs = {}
1989 compat_HTMLParser.__init__(self)
1990
1991 def handle_starttag(self, tag, attrs):
1992 self.attrs = dict(attrs)
1993
1994
1995 def extract_attributes(html_element):
1996 """Given a string for an HTML element such as
1997 <el
1998 a="foo" B="bar" c="&98;az" d=boz
1999 empty= noval entity="&amp;"
2000 sq='"' dq="'"
2001 >
2002 Decode and return a dictionary of attributes.
2003 {
2004 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2005 'empty': '', 'noval': None, 'entity': '&',
2006 'sq': '"', 'dq': '\''
2007 }.
2008 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2009 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2010 """
2011 parser = HTMLAttributeParser()
2012 try:
2013 parser.feed(html_element)
2014 parser.close()
2015 # Older Python may throw HTMLParseError in case of malformed HTML
2016 except compat_HTMLParseError:
2017 pass
2018 return parser.attrs
2019
2020
2021 def clean_html(html):
2022 """Clean an HTML snippet into a readable string"""
2023
2024 if html is None: # Convenience for sanitizing descriptions etc.
2025 return html
2026
2027 # Newline vs <br />
2028 html = html.replace('\n', ' ')
2029 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2030 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2031 # Strip html tags
2032 html = re.sub('<.*?>', '', html)
2033 # Replace html entities
2034 html = unescapeHTML(html)
2035 return html.strip()
2036
2037
2038 def sanitize_open(filename, open_mode):
2039 """Try to open the given filename, and slightly tweak it if this fails.
2040
2041 Attempts to open the given filename. If this fails, it tries to change
2042 the filename slightly, step by step, until it's either able to open it
2043 or it fails and raises a final exception, like the standard open()
2044 function.
2045
2046 It returns the tuple (stream, definitive_file_name).
2047 """
2048 try:
2049 if filename == '-':
2050 if sys.platform == 'win32':
2051 import msvcrt
2052 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2053 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2054 stream = open(encodeFilename(filename), open_mode)
2055 return (stream, filename)
2056 except (IOError, OSError) as err:
2057 if err.errno in (errno.EACCES,):
2058 raise
2059
2060 # In case of error, try to remove win32 forbidden chars
2061 alt_filename = sanitize_path(filename)
2062 if alt_filename == filename:
2063 raise
2064 else:
2065 # An exception here should be caught in the caller
2066 stream = open(encodeFilename(alt_filename), open_mode)
2067 return (stream, alt_filename)
2068
2069
2070 def timeconvert(timestr):
2071 """Convert RFC 2822 defined time string into system timestamp"""
2072 timestamp = None
2073 timetuple = email.utils.parsedate_tz(timestr)
2074 if timetuple is not None:
2075 timestamp = email.utils.mktime_tz(timetuple)
2076 return timestamp
2077
2078
2079 def sanitize_filename(s, restricted=False, is_id=False):
2080 """Sanitizes a string so it could be used as part of a filename.
2081 If restricted is set, use a stricter subset of allowed characters.
2082 Set is_id if this is not an arbitrary string, but an ID that should be kept
2083 if possible.
2084 """
2085 def replace_insane(char):
2086 if restricted and char in ACCENT_CHARS:
2087 return ACCENT_CHARS[char]
2088 if char == '?' or ord(char) < 32 or ord(char) == 127:
2089 return ''
2090 elif char == '"':
2091 return '' if restricted else '\''
2092 elif char == ':':
2093 return '_-' if restricted else ' -'
2094 elif char in '\\/|*<>':
2095 return '_'
2096 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2097 return '_'
2098 if restricted and ord(char) > 127:
2099 return '_'
2100 return char
2101
2102 # Handle timestamps
2103 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2104 result = ''.join(map(replace_insane, s))
2105 if not is_id:
2106 while '__' in result:
2107 result = result.replace('__', '_')
2108 result = result.strip('_')
2109 # Common case of "Foreign band name - English song title"
2110 if restricted and result.startswith('-_'):
2111 result = result[2:]
2112 if result.startswith('-'):
2113 result = '_' + result[len('-'):]
2114 result = result.lstrip('.')
2115 if not result:
2116 result = '_'
2117 return result
2118
2119
2120 def sanitize_path(s):
2121 """Sanitizes and normalizes path on Windows"""
2122 if sys.platform != 'win32':
2123 return s
2124 drive_or_unc, _ = os.path.splitdrive(s)
2125 if sys.version_info < (2, 7) and not drive_or_unc:
2126 drive_or_unc, _ = os.path.splitunc(s)
2127 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2128 if drive_or_unc:
2129 norm_path.pop(0)
2130 sanitized_path = [
2131 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2132 for path_part in norm_path]
2133 if drive_or_unc:
2134 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2135 return os.path.join(*sanitized_path)
2136
2137
2138 def sanitize_url(url):
2139 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2140 # the number of unwanted failures due to missing protocol
2141 if url.startswith('//'):
2142 return 'http:%s' % url
2143 # Fix some common typos seen so far
2144 COMMON_TYPOS = (
2145 # https://github.com/ytdl-org/youtube-dl/issues/15649
2146 (r'^httpss://', r'https://'),
2147 # https://bx1.be/lives/direct-tv/
2148 (r'^rmtp([es]?)://', r'rtmp\1://'),
2149 )
2150 for mistake, fixup in COMMON_TYPOS:
2151 if re.match(mistake, url):
2152 return re.sub(mistake, fixup, url)
2153 return url
2154
2155
2156 def sanitized_Request(url, *args, **kwargs):
2157 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2158
2159
2160 def expand_path(s):
2161 """Expand shell variables and ~"""
2162 return os.path.expandvars(compat_expanduser(s))
2163
2164
2165 def orderedSet(iterable):
2166 """ Remove all duplicates from the input iterable """
2167 res = []
2168 for el in iterable:
2169 if el not in res:
2170 res.append(el)
2171 return res
2172
2173
2174 def _htmlentity_transform(entity_with_semicolon):
2175 """Transforms an HTML entity to a character."""
2176 entity = entity_with_semicolon[:-1]
2177
2178 # Known non-numeric HTML entity
2179 if entity in compat_html_entities.name2codepoint:
2180 return compat_chr(compat_html_entities.name2codepoint[entity])
2181
2182 # TODO: HTML5 allows entities without a semicolon. For example,
2183 # '&Eacuteric' should be decoded as 'Ɖric'.
2184 if entity_with_semicolon in compat_html_entities_html5:
2185 return compat_html_entities_html5[entity_with_semicolon]
2186
2187 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2188 if mobj is not None:
2189 numstr = mobj.group(1)
2190 if numstr.startswith('x'):
2191 base = 16
2192 numstr = '0%s' % numstr
2193 else:
2194 base = 10
2195 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2196 try:
2197 return compat_chr(int(numstr, base))
2198 except ValueError:
2199 pass
2200
2201 # Unknown entity in name, return its literal representation
2202 return '&%s;' % entity
2203
2204
2205 def unescapeHTML(s):
2206 if s is None:
2207 return None
2208 assert type(s) == compat_str
2209
2210 return re.sub(
2211 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2212
2213
2214 def get_subprocess_encoding():
2215 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2216 # For subprocess calls, encode with locale encoding
2217 # Refer to http://stackoverflow.com/a/9951851/35070
2218 encoding = preferredencoding()
2219 else:
2220 encoding = sys.getfilesystemencoding()
2221 if encoding is None:
2222 encoding = 'utf-8'
2223 return encoding
2224
2225
2226 def encodeFilename(s, for_subprocess=False):
2227 """
2228 @param s The name of the file
2229 """
2230
2231 assert type(s) == compat_str
2232
2233 # Python 3 has a Unicode API
2234 if sys.version_info >= (3, 0):
2235 return s
2236
2237 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2238 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2239 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2240 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241 return s
2242
2243 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2244 if sys.platform.startswith('java'):
2245 return s
2246
2247 return s.encode(get_subprocess_encoding(), 'ignore')
2248
2249
2250 def decodeFilename(b, for_subprocess=False):
2251
2252 if sys.version_info >= (3, 0):
2253 return b
2254
2255 if not isinstance(b, bytes):
2256 return b
2257
2258 return b.decode(get_subprocess_encoding(), 'ignore')
2259
2260
2261 def encodeArgument(s):
2262 if not isinstance(s, compat_str):
2263 # Legacy code that uses byte strings
2264 # Uncomment the following line after fixing all post processors
2265 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2266 s = s.decode('ascii')
2267 return encodeFilename(s, True)
2268
2269
2270 def decodeArgument(b):
2271 return decodeFilename(b, True)
2272
2273
2274 def decodeOption(optval):
2275 if optval is None:
2276 return optval
2277 if isinstance(optval, bytes):
2278 optval = optval.decode(preferredencoding())
2279
2280 assert isinstance(optval, compat_str)
2281 return optval
2282
2283
2284 def formatSeconds(secs):
2285 if secs > 3600:
2286 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2287 elif secs > 60:
2288 return '%d:%02d' % (secs // 60, secs % 60)
2289 else:
2290 return '%d' % secs
2291
2292
2293 def make_HTTPS_handler(params, **kwargs):
2294 opts_no_check_certificate = params.get('nocheckcertificate', False)
2295 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2296 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2297 if opts_no_check_certificate:
2298 context.check_hostname = False
2299 context.verify_mode = ssl.CERT_NONE
2300 try:
2301 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2302 except TypeError:
2303 # Python 2.7.8
2304 # (create_default_context present but HTTPSHandler has no context=)
2305 pass
2306
2307 if sys.version_info < (3, 2):
2308 return YoutubeDLHTTPSHandler(params, **kwargs)
2309 else: # Python < 3.4
2310 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2311 context.verify_mode = (ssl.CERT_NONE
2312 if opts_no_check_certificate
2313 else ssl.CERT_REQUIRED)
2314 context.set_default_verify_paths()
2315 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2316
2317
2318 def bug_reports_message():
2319 if ytdl_is_updateable():
2320 update_cmd = 'type youtube-dl -U to update'
2321 else:
2322 update_cmd = 'see https://yt-dl.org/update on how to update'
2323 msg = '; please report this issue on https://yt-dl.org/bug .'
2324 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2325 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2326 return msg
2327
2328
2329 class YoutubeDLError(Exception):
2330 """Base exception for YoutubeDL errors."""
2331 pass
2332
2333
2334 class ExtractorError(YoutubeDLError):
2335 """Error during info extraction."""
2336
2337 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2338 """ tb, if given, is the original traceback (so that it can be printed out).
2339 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2340 """
2341
2342 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2343 expected = True
2344 if video_id is not None:
2345 msg = video_id + ': ' + msg
2346 if cause:
2347 msg += ' (caused by %r)' % cause
2348 if not expected:
2349 msg += bug_reports_message()
2350 super(ExtractorError, self).__init__(msg)
2351
2352 self.traceback = tb
2353 self.exc_info = sys.exc_info() # preserve original exception
2354 self.cause = cause
2355 self.video_id = video_id
2356
2357 def format_traceback(self):
2358 if self.traceback is None:
2359 return None
2360 return ''.join(traceback.format_tb(self.traceback))
2361
2362
2363 class UnsupportedError(ExtractorError):
2364 def __init__(self, url):
2365 super(UnsupportedError, self).__init__(
2366 'Unsupported URL: %s' % url, expected=True)
2367 self.url = url
2368
2369
2370 class RegexNotFoundError(ExtractorError):
2371 """Error when a regex didn't match"""
2372 pass
2373
2374
2375 class GeoRestrictedError(ExtractorError):
2376 """Geographic restriction Error exception.
2377
2378 This exception may be thrown when a video is not available from your
2379 geographic location due to geographic restrictions imposed by a website.
2380 """
2381 def __init__(self, msg, countries=None):
2382 super(GeoRestrictedError, self).__init__(msg, expected=True)
2383 self.msg = msg
2384 self.countries = countries
2385
2386
2387 class DownloadError(YoutubeDLError):
2388 """Download Error exception.
2389
2390 This exception may be thrown by FileDownloader objects if they are not
2391 configured to continue on errors. They will contain the appropriate
2392 error message.
2393 """
2394
2395 def __init__(self, msg, exc_info=None):
2396 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2397 super(DownloadError, self).__init__(msg)
2398 self.exc_info = exc_info
2399
2400
2401 class SameFileError(YoutubeDLError):
2402 """Same File exception.
2403
2404 This exception will be thrown by FileDownloader objects if they detect
2405 multiple files would have to be downloaded to the same file on disk.
2406 """
2407 pass
2408
2409
2410 class PostProcessingError(YoutubeDLError):
2411 """Post Processing exception.
2412
2413 This exception may be raised by PostProcessor's .run() method to
2414 indicate an error in the postprocessing task.
2415 """
2416
2417 def __init__(self, msg):
2418 super(PostProcessingError, self).__init__(msg)
2419 self.msg = msg
2420
2421
2422 class MaxDownloadsReached(YoutubeDLError):
2423 """ --max-downloads limit has been reached. """
2424 pass
2425
2426
2427 class UnavailableVideoError(YoutubeDLError):
2428 """Unavailable Format exception.
2429
2430 This exception will be thrown when a video is requested
2431 in a format that is not available for that video.
2432 """
2433 pass
2434
2435
2436 class ContentTooShortError(YoutubeDLError):
2437 """Content Too Short exception.
2438
2439 This exception may be raised by FileDownloader objects when a file they
2440 download is too small for what the server announced first, indicating
2441 the connection was probably interrupted.
2442 """
2443
2444 def __init__(self, downloaded, expected):
2445 super(ContentTooShortError, self).__init__(
2446 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2447 )
2448 # Both in bytes
2449 self.downloaded = downloaded
2450 self.expected = expected
2451
2452
2453 class XAttrMetadataError(YoutubeDLError):
2454 def __init__(self, code=None, msg='Unknown error'):
2455 super(XAttrMetadataError, self).__init__(msg)
2456 self.code = code
2457 self.msg = msg
2458
2459 # Parsing code and msg
2460 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2461 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2462 self.reason = 'NO_SPACE'
2463 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2464 self.reason = 'VALUE_TOO_LONG'
2465 else:
2466 self.reason = 'NOT_SUPPORTED'
2467
2468
2469 class XAttrUnavailableError(YoutubeDLError):
2470 pass
2471
2472
2473 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2474 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2475 # expected HTTP responses to meet HTTP/1.0 or later (see also
2476 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2477 if sys.version_info < (3, 0):
2478 kwargs['strict'] = True
2479 hc = http_class(*args, **compat_kwargs(kwargs))
2480 source_address = ydl_handler._params.get('source_address')
2481
2482 if source_address is not None:
2483 # This is to workaround _create_connection() from socket where it will try all
2484 # address data from getaddrinfo() including IPv6. This filters the result from
2485 # getaddrinfo() based on the source_address value.
2486 # This is based on the cpython socket.create_connection() function.
2487 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2488 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2489 host, port = address
2490 err = None
2491 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2492 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2493 ip_addrs = [addr for addr in addrs if addr[0] == af]
2494 if addrs and not ip_addrs:
2495 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2496 raise socket.error(
2497 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2498 % (ip_version, source_address[0]))
2499 for res in ip_addrs:
2500 af, socktype, proto, canonname, sa = res
2501 sock = None
2502 try:
2503 sock = socket.socket(af, socktype, proto)
2504 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2505 sock.settimeout(timeout)
2506 sock.bind(source_address)
2507 sock.connect(sa)
2508 err = None # Explicitly break reference cycle
2509 return sock
2510 except socket.error as _:
2511 err = _
2512 if sock is not None:
2513 sock.close()
2514 if err is not None:
2515 raise err
2516 else:
2517 raise socket.error('getaddrinfo returns an empty list')
2518 if hasattr(hc, '_create_connection'):
2519 hc._create_connection = _create_connection
2520 sa = (source_address, 0)
2521 if hasattr(hc, 'source_address'): # Python 2.7+
2522 hc.source_address = sa
2523 else: # Python 2.6
2524 def _hc_connect(self, *args, **kwargs):
2525 sock = _create_connection(
2526 (self.host, self.port), self.timeout, sa)
2527 if is_https:
2528 self.sock = ssl.wrap_socket(
2529 sock, self.key_file, self.cert_file,
2530 ssl_version=ssl.PROTOCOL_TLSv1)
2531 else:
2532 self.sock = sock
2533 hc.connect = functools.partial(_hc_connect, hc)
2534
2535 return hc
2536
2537
2538 def handle_youtubedl_headers(headers):
2539 filtered_headers = headers
2540
2541 if 'Youtubedl-no-compression' in filtered_headers:
2542 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2543 del filtered_headers['Youtubedl-no-compression']
2544
2545 return filtered_headers
2546
2547
2548 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2549 """Handler for HTTP requests and responses.
2550
2551 This class, when installed with an OpenerDirector, automatically adds
2552 the standard headers to every HTTP request and handles gzipped and
2553 deflated responses from web servers. If compression is to be avoided in
2554 a particular request, the original request in the program code only has
2555 to include the HTTP header "Youtubedl-no-compression", which will be
2556 removed before making the real request.
2557
2558 Part of this code was copied from:
2559
2560 http://techknack.net/python-urllib2-handlers/
2561
2562 Andrew Rowls, the author of that code, agreed to release it to the
2563 public domain.
2564 """
2565
2566 def __init__(self, params, *args, **kwargs):
2567 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2568 self._params = params
2569
2570 def http_open(self, req):
2571 conn_class = compat_http_client.HTTPConnection
2572
2573 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2574 if socks_proxy:
2575 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2576 del req.headers['Ytdl-socks-proxy']
2577
2578 return self.do_open(functools.partial(
2579 _create_http_connection, self, conn_class, False),
2580 req)
2581
2582 @staticmethod
2583 def deflate(data):
2584 try:
2585 return zlib.decompress(data, -zlib.MAX_WBITS)
2586 except zlib.error:
2587 return zlib.decompress(data)
2588
2589 def http_request(self, req):
2590 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2591 # always respected by websites, some tend to give out URLs with non percent-encoded
2592 # non-ASCII characters (see telemb.py, ard.py [#3412])
2593 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2594 # To work around aforementioned issue we will replace request's original URL with
2595 # percent-encoded one
2596 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2597 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2598 url = req.get_full_url()
2599 url_escaped = escape_url(url)
2600
2601 # Substitute URL if any change after escaping
2602 if url != url_escaped:
2603 req = update_Request(req, url=url_escaped)
2604
2605 for h, v in std_headers.items():
2606 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2607 # The dict keys are capitalized because of this bug by urllib
2608 if h.capitalize() not in req.headers:
2609 req.add_header(h, v)
2610
2611 req.headers = handle_youtubedl_headers(req.headers)
2612
2613 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2614 # Python 2.6 is brain-dead when it comes to fragments
2615 req._Request__original = req._Request__original.partition('#')[0]
2616 req._Request__r_type = req._Request__r_type.partition('#')[0]
2617
2618 return req
2619
2620 def http_response(self, req, resp):
2621 old_resp = resp
2622 # gzip
2623 if resp.headers.get('Content-encoding', '') == 'gzip':
2624 content = resp.read()
2625 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2626 try:
2627 uncompressed = io.BytesIO(gz.read())
2628 except IOError as original_ioerror:
2629 # There may be junk add the end of the file
2630 # See http://stackoverflow.com/q/4928560/35070 for details
2631 for i in range(1, 1024):
2632 try:
2633 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2634 uncompressed = io.BytesIO(gz.read())
2635 except IOError:
2636 continue
2637 break
2638 else:
2639 raise original_ioerror
2640 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2641 resp.msg = old_resp.msg
2642 del resp.headers['Content-encoding']
2643 # deflate
2644 if resp.headers.get('Content-encoding', '') == 'deflate':
2645 gz = io.BytesIO(self.deflate(resp.read()))
2646 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2647 resp.msg = old_resp.msg
2648 del resp.headers['Content-encoding']
2649 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2650 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2651 if 300 <= resp.code < 400:
2652 location = resp.headers.get('Location')
2653 if location:
2654 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2655 if sys.version_info >= (3, 0):
2656 location = location.encode('iso-8859-1').decode('utf-8')
2657 else:
2658 location = location.decode('utf-8')
2659 location_escaped = escape_url(location)
2660 if location != location_escaped:
2661 del resp.headers['Location']
2662 if sys.version_info < (3, 0):
2663 location_escaped = location_escaped.encode('utf-8')
2664 resp.headers['Location'] = location_escaped
2665 return resp
2666
2667 https_request = http_request
2668 https_response = http_response
2669
2670
2671 def make_socks_conn_class(base_class, socks_proxy):
2672 assert issubclass(base_class, (
2673 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2674
2675 url_components = compat_urlparse.urlparse(socks_proxy)
2676 if url_components.scheme.lower() == 'socks5':
2677 socks_type = ProxyType.SOCKS5
2678 elif url_components.scheme.lower() in ('socks', 'socks4'):
2679 socks_type = ProxyType.SOCKS4
2680 elif url_components.scheme.lower() == 'socks4a':
2681 socks_type = ProxyType.SOCKS4A
2682
2683 def unquote_if_non_empty(s):
2684 if not s:
2685 return s
2686 return compat_urllib_parse_unquote_plus(s)
2687
2688 proxy_args = (
2689 socks_type,
2690 url_components.hostname, url_components.port or 1080,
2691 True, # Remote DNS
2692 unquote_if_non_empty(url_components.username),
2693 unquote_if_non_empty(url_components.password),
2694 )
2695
2696 class SocksConnection(base_class):
2697 def connect(self):
2698 self.sock = sockssocket()
2699 self.sock.setproxy(*proxy_args)
2700 if type(self.timeout) in (int, float):
2701 self.sock.settimeout(self.timeout)
2702 self.sock.connect((self.host, self.port))
2703
2704 if isinstance(self, compat_http_client.HTTPSConnection):
2705 if hasattr(self, '_context'): # Python > 2.6
2706 self.sock = self._context.wrap_socket(
2707 self.sock, server_hostname=self.host)
2708 else:
2709 self.sock = ssl.wrap_socket(self.sock)
2710
2711 return SocksConnection
2712
2713
2714 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2715 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2716 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2717 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2718 self._params = params
2719
2720 def https_open(self, req):
2721 kwargs = {}
2722 conn_class = self._https_conn_class
2723
2724 if hasattr(self, '_context'): # python > 2.6
2725 kwargs['context'] = self._context
2726 if hasattr(self, '_check_hostname'): # python 3.x
2727 kwargs['check_hostname'] = self._check_hostname
2728
2729 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2730 if socks_proxy:
2731 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2732 del req.headers['Ytdl-socks-proxy']
2733
2734 return self.do_open(functools.partial(
2735 _create_http_connection, self, conn_class, True),
2736 req, **kwargs)
2737
2738
2739 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2740 """
2741 See [1] for cookie file format.
2742
2743 1. https://curl.haxx.se/docs/http-cookies.html
2744 """
2745 _HTTPONLY_PREFIX = '#HttpOnly_'
2746 _ENTRY_LEN = 7
2747 _HEADER = '''# Netscape HTTP Cookie File
2748 # This file is generated by youtube-dl. Do not edit.
2749
2750 '''
2751 _CookieFileEntry = collections.namedtuple(
2752 'CookieFileEntry',
2753 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2754
2755 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2756 """
2757 Save cookies to a file.
2758
2759 Most of the code is taken from CPython 3.8 and slightly adapted
2760 to support cookie files with UTF-8 in both python 2 and 3.
2761 """
2762 if filename is None:
2763 if self.filename is not None:
2764 filename = self.filename
2765 else:
2766 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2767
2768 # Store session cookies with `expires` set to 0 instead of an empty
2769 # string
2770 for cookie in self:
2771 if cookie.expires is None:
2772 cookie.expires = 0
2773
2774 with io.open(filename, 'w', encoding='utf-8') as f:
2775 f.write(self._HEADER)
2776 now = time.time()
2777 for cookie in self:
2778 if not ignore_discard and cookie.discard:
2779 continue
2780 if not ignore_expires and cookie.is_expired(now):
2781 continue
2782 if cookie.secure:
2783 secure = 'TRUE'
2784 else:
2785 secure = 'FALSE'
2786 if cookie.domain.startswith('.'):
2787 initial_dot = 'TRUE'
2788 else:
2789 initial_dot = 'FALSE'
2790 if cookie.expires is not None:
2791 expires = compat_str(cookie.expires)
2792 else:
2793 expires = ''
2794 if cookie.value is None:
2795 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2796 # with no name, whereas http.cookiejar regards it as a
2797 # cookie with no value.
2798 name = ''
2799 value = cookie.name
2800 else:
2801 name = cookie.name
2802 value = cookie.value
2803 f.write(
2804 '\t'.join([cookie.domain, initial_dot, cookie.path,
2805 secure, expires, name, value]) + '\n')
2806
2807 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2808 """Load cookies from a file."""
2809 if filename is None:
2810 if self.filename is not None:
2811 filename = self.filename
2812 else:
2813 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2814
2815 def prepare_line(line):
2816 if line.startswith(self._HTTPONLY_PREFIX):
2817 line = line[len(self._HTTPONLY_PREFIX):]
2818 # comments and empty lines are fine
2819 if line.startswith('#') or not line.strip():
2820 return line
2821 cookie_list = line.split('\t')
2822 if len(cookie_list) != self._ENTRY_LEN:
2823 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2824 cookie = self._CookieFileEntry(*cookie_list)
2825 if cookie.expires_at and not cookie.expires_at.isdigit():
2826 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2827 return line
2828
2829 cf = io.StringIO()
2830 with io.open(filename, encoding='utf-8') as f:
2831 for line in f:
2832 try:
2833 cf.write(prepare_line(line))
2834 except compat_cookiejar.LoadError as e:
2835 write_string(
2836 'WARNING: skipping cookie file entry due to %s: %r\n'
2837 % (e, line), sys.stderr)
2838 continue
2839 cf.seek(0)
2840 self._really_load(cf, filename, ignore_discard, ignore_expires)
2841 # Session cookies are denoted by either `expires` field set to
2842 # an empty string or 0. MozillaCookieJar only recognizes the former
2843 # (see [1]). So we need force the latter to be recognized as session
2844 # cookies on our own.
2845 # Session cookies may be important for cookies-based authentication,
2846 # e.g. usually, when user does not check 'Remember me' check box while
2847 # logging in on a site, some important cookies are stored as session
2848 # cookies so that not recognizing them will result in failed login.
2849 # 1. https://bugs.python.org/issue17164
2850 for cookie in self:
2851 # Treat `expires=0` cookies as session cookies
2852 if cookie.expires == 0:
2853 cookie.expires = None
2854 cookie.discard = True
2855
2856
2857 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2858 def __init__(self, cookiejar=None):
2859 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2860
2861 def http_response(self, request, response):
2862 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2863 # characters in Set-Cookie HTTP header of last response (see
2864 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2865 # In order to at least prevent crashing we will percent encode Set-Cookie
2866 # header before HTTPCookieProcessor starts processing it.
2867 # if sys.version_info < (3, 0) and response.headers:
2868 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2869 # set_cookie = response.headers.get(set_cookie_header)
2870 # if set_cookie:
2871 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2872 # if set_cookie != set_cookie_escaped:
2873 # del response.headers[set_cookie_header]
2874 # response.headers[set_cookie_header] = set_cookie_escaped
2875 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2876
2877 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2878 https_response = http_response
2879
2880
2881 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2882 if sys.version_info[0] < 3:
2883 def redirect_request(self, req, fp, code, msg, headers, newurl):
2884 # On python 2 urlh.geturl() may sometimes return redirect URL
2885 # as byte string instead of unicode. This workaround allows
2886 # to force it always return unicode.
2887 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2888
2889
2890 def extract_timezone(date_str):
2891 m = re.search(
2892 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2893 date_str)
2894 if not m:
2895 timezone = datetime.timedelta()
2896 else:
2897 date_str = date_str[:-len(m.group('tz'))]
2898 if not m.group('sign'):
2899 timezone = datetime.timedelta()
2900 else:
2901 sign = 1 if m.group('sign') == '+' else -1
2902 timezone = datetime.timedelta(
2903 hours=sign * int(m.group('hours')),
2904 minutes=sign * int(m.group('minutes')))
2905 return timezone, date_str
2906
2907
2908 def parse_iso8601(date_str, delimiter='T', timezone=None):
2909 """ Return a UNIX timestamp from the given date """
2910
2911 if date_str is None:
2912 return None
2913
2914 date_str = re.sub(r'\.[0-9]+', '', date_str)
2915
2916 if timezone is None:
2917 timezone, date_str = extract_timezone(date_str)
2918
2919 try:
2920 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2921 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2922 return calendar.timegm(dt.timetuple())
2923 except ValueError:
2924 pass
2925
2926
2927 def date_formats(day_first=True):
2928 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2929
2930
2931 def unified_strdate(date_str, day_first=True):
2932 """Return a string with the date in the format YYYYMMDD"""
2933
2934 if date_str is None:
2935 return None
2936 upload_date = None
2937 # Replace commas
2938 date_str = date_str.replace(',', ' ')
2939 # Remove AM/PM + timezone
2940 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2941 _, date_str = extract_timezone(date_str)
2942
2943 for expression in date_formats(day_first):
2944 try:
2945 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2946 except ValueError:
2947 pass
2948 if upload_date is None:
2949 timetuple = email.utils.parsedate_tz(date_str)
2950 if timetuple:
2951 try:
2952 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2953 except ValueError:
2954 pass
2955 if upload_date is not None:
2956 return compat_str(upload_date)
2957
2958
2959 def unified_timestamp(date_str, day_first=True):
2960 if date_str is None:
2961 return None
2962
2963 date_str = re.sub(r'[,|]', '', date_str)
2964
2965 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2966 timezone, date_str = extract_timezone(date_str)
2967
2968 # Remove AM/PM + timezone
2969 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2970
2971 # Remove unrecognized timezones from ISO 8601 alike timestamps
2972 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2973 if m:
2974 date_str = date_str[:-len(m.group('tz'))]
2975
2976 # Python only supports microseconds, so remove nanoseconds
2977 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2978 if m:
2979 date_str = m.group(1)
2980
2981 for expression in date_formats(day_first):
2982 try:
2983 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2984 return calendar.timegm(dt.timetuple())
2985 except ValueError:
2986 pass
2987 timetuple = email.utils.parsedate_tz(date_str)
2988 if timetuple:
2989 return calendar.timegm(timetuple) + pm_delta * 3600
2990
2991
2992 def determine_ext(url, default_ext='unknown_video'):
2993 if url is None or '.' not in url:
2994 return default_ext
2995 guess = url.partition('?')[0].rpartition('.')[2]
2996 if re.match(r'^[A-Za-z0-9]+$', guess):
2997 return guess
2998 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2999 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3000 return guess.rstrip('/')
3001 else:
3002 return default_ext
3003
3004
3005 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3006 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3007
3008
3009 def date_from_str(date_str):
3010 """
3011 Return a datetime object from a string in the format YYYYMMDD or
3012 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3013 today = datetime.date.today()
3014 if date_str in ('now', 'today'):
3015 return today
3016 if date_str == 'yesterday':
3017 return today - datetime.timedelta(days=1)
3018 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3019 if match is not None:
3020 sign = match.group('sign')
3021 time = int(match.group('time'))
3022 if sign == '-':
3023 time = -time
3024 unit = match.group('unit')
3025 # A bad approximation?
3026 if unit == 'month':
3027 unit = 'day'
3028 time *= 30
3029 elif unit == 'year':
3030 unit = 'day'
3031 time *= 365
3032 unit += 's'
3033 delta = datetime.timedelta(**{unit: time})
3034 return today + delta
3035 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3036
3037
3038 def hyphenate_date(date_str):
3039 """
3040 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3041 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3042 if match is not None:
3043 return '-'.join(match.groups())
3044 else:
3045 return date_str
3046
3047
3048 class DateRange(object):
3049 """Represents a time interval between two dates"""
3050
3051 def __init__(self, start=None, end=None):
3052 """start and end must be strings in the format accepted by date"""
3053 if start is not None:
3054 self.start = date_from_str(start)
3055 else:
3056 self.start = datetime.datetime.min.date()
3057 if end is not None:
3058 self.end = date_from_str(end)
3059 else:
3060 self.end = datetime.datetime.max.date()
3061 if self.start > self.end:
3062 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3063
3064 @classmethod
3065 def day(cls, day):
3066 """Returns a range that only contains the given day"""
3067 return cls(day, day)
3068
3069 def __contains__(self, date):
3070 """Check if the date is in the range"""
3071 if not isinstance(date, datetime.date):
3072 date = date_from_str(date)
3073 return self.start <= date <= self.end
3074
3075 def __str__(self):
3076 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3077
3078
3079 def platform_name():
3080 """ Returns the platform name as a compat_str """
3081 res = platform.platform()
3082 if isinstance(res, bytes):
3083 res = res.decode(preferredencoding())
3084
3085 assert isinstance(res, compat_str)
3086 return res
3087
3088
3089 def _windows_write_string(s, out):
3090 """ Returns True if the string was written using special methods,
3091 False if it has yet to be written out."""
3092 # Adapted from http://stackoverflow.com/a/3259271/35070
3093
3094 import ctypes
3095 import ctypes.wintypes
3096
3097 WIN_OUTPUT_IDS = {
3098 1: -11,
3099 2: -12,
3100 }
3101
3102 try:
3103 fileno = out.fileno()
3104 except AttributeError:
3105 # If the output stream doesn't have a fileno, it's virtual
3106 return False
3107 except io.UnsupportedOperation:
3108 # Some strange Windows pseudo files?
3109 return False
3110 if fileno not in WIN_OUTPUT_IDS:
3111 return False
3112
3113 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3114 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3115 ('GetStdHandle', ctypes.windll.kernel32))
3116 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3117
3118 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3119 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3120 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3121 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3122 written = ctypes.wintypes.DWORD(0)
3123
3124 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3125 FILE_TYPE_CHAR = 0x0002
3126 FILE_TYPE_REMOTE = 0x8000
3127 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3128 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3129 ctypes.POINTER(ctypes.wintypes.DWORD))(
3130 ('GetConsoleMode', ctypes.windll.kernel32))
3131 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3132
3133 def not_a_console(handle):
3134 if handle == INVALID_HANDLE_VALUE or handle is None:
3135 return True
3136 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3137 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3138
3139 if not_a_console(h):
3140 return False
3141
3142 def next_nonbmp_pos(s):
3143 try:
3144 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3145 except StopIteration:
3146 return len(s)
3147
3148 while s:
3149 count = min(next_nonbmp_pos(s), 1024)
3150
3151 ret = WriteConsoleW(
3152 h, s, count if count else 2, ctypes.byref(written), None)
3153 if ret == 0:
3154 raise OSError('Failed to write string')
3155 if not count: # We just wrote a non-BMP character
3156 assert written.value == 2
3157 s = s[1:]
3158 else:
3159 assert written.value > 0
3160 s = s[written.value:]
3161 return True
3162
3163
3164 def write_string(s, out=None, encoding=None):
3165 if out is None:
3166 out = sys.stderr
3167 assert type(s) == compat_str
3168
3169 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3170 if _windows_write_string(s, out):
3171 return
3172
3173 if ('b' in getattr(out, 'mode', '')
3174 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3175 byt = s.encode(encoding or preferredencoding(), 'ignore')
3176 out.write(byt)
3177 elif hasattr(out, 'buffer'):
3178 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3179 byt = s.encode(enc, 'ignore')
3180 out.buffer.write(byt)
3181 else:
3182 out.write(s)
3183 out.flush()
3184
3185
3186 def bytes_to_intlist(bs):
3187 if not bs:
3188 return []
3189 if isinstance(bs[0], int): # Python 3
3190 return list(bs)
3191 else:
3192 return [ord(c) for c in bs]
3193
3194
3195 def intlist_to_bytes(xs):
3196 if not xs:
3197 return b''
3198 return compat_struct_pack('%dB' % len(xs), *xs)
3199
3200
3201 # Cross-platform file locking
3202 if sys.platform == 'win32':
3203 import ctypes.wintypes
3204 import msvcrt
3205
3206 class OVERLAPPED(ctypes.Structure):
3207 _fields_ = [
3208 ('Internal', ctypes.wintypes.LPVOID),
3209 ('InternalHigh', ctypes.wintypes.LPVOID),
3210 ('Offset', ctypes.wintypes.DWORD),
3211 ('OffsetHigh', ctypes.wintypes.DWORD),
3212 ('hEvent', ctypes.wintypes.HANDLE),
3213 ]
3214
3215 kernel32 = ctypes.windll.kernel32
3216 LockFileEx = kernel32.LockFileEx
3217 LockFileEx.argtypes = [
3218 ctypes.wintypes.HANDLE, # hFile
3219 ctypes.wintypes.DWORD, # dwFlags
3220 ctypes.wintypes.DWORD, # dwReserved
3221 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3222 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3223 ctypes.POINTER(OVERLAPPED) # Overlapped
3224 ]
3225 LockFileEx.restype = ctypes.wintypes.BOOL
3226 UnlockFileEx = kernel32.UnlockFileEx
3227 UnlockFileEx.argtypes = [
3228 ctypes.wintypes.HANDLE, # hFile
3229 ctypes.wintypes.DWORD, # dwReserved
3230 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3231 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3232 ctypes.POINTER(OVERLAPPED) # Overlapped
3233 ]
3234 UnlockFileEx.restype = ctypes.wintypes.BOOL
3235 whole_low = 0xffffffff
3236 whole_high = 0x7fffffff
3237
3238 def _lock_file(f, exclusive):
3239 overlapped = OVERLAPPED()
3240 overlapped.Offset = 0
3241 overlapped.OffsetHigh = 0
3242 overlapped.hEvent = 0
3243 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3244 handle = msvcrt.get_osfhandle(f.fileno())
3245 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3246 whole_low, whole_high, f._lock_file_overlapped_p):
3247 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3248
3249 def _unlock_file(f):
3250 assert f._lock_file_overlapped_p
3251 handle = msvcrt.get_osfhandle(f.fileno())
3252 if not UnlockFileEx(handle, 0,
3253 whole_low, whole_high, f._lock_file_overlapped_p):
3254 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3255
3256 else:
3257 # Some platforms, such as Jython, is missing fcntl
3258 try:
3259 import fcntl
3260
3261 def _lock_file(f, exclusive):
3262 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3263
3264 def _unlock_file(f):
3265 fcntl.flock(f, fcntl.LOCK_UN)
3266 except ImportError:
3267 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3268
3269 def _lock_file(f, exclusive):
3270 raise IOError(UNSUPPORTED_MSG)
3271
3272 def _unlock_file(f):
3273 raise IOError(UNSUPPORTED_MSG)
3274
3275
3276 class locked_file(object):
3277 def __init__(self, filename, mode, encoding=None):
3278 assert mode in ['r', 'a', 'w']
3279 self.f = io.open(filename, mode, encoding=encoding)
3280 self.mode = mode
3281
3282 def __enter__(self):
3283 exclusive = self.mode != 'r'
3284 try:
3285 _lock_file(self.f, exclusive)
3286 except IOError:
3287 self.f.close()
3288 raise
3289 return self
3290
3291 def __exit__(self, etype, value, traceback):
3292 try:
3293 _unlock_file(self.f)
3294 finally:
3295 self.f.close()
3296
3297 def __iter__(self):
3298 return iter(self.f)
3299
3300 def write(self, *args):
3301 return self.f.write(*args)
3302
3303 def read(self, *args):
3304 return self.f.read(*args)
3305
3306
3307 def get_filesystem_encoding():
3308 encoding = sys.getfilesystemencoding()
3309 return encoding if encoding is not None else 'utf-8'
3310
3311
3312 def shell_quote(args):
3313 quoted_args = []
3314 encoding = get_filesystem_encoding()
3315 for a in args:
3316 if isinstance(a, bytes):
3317 # We may get a filename encoded with 'encodeFilename'
3318 a = a.decode(encoding)
3319 quoted_args.append(compat_shlex_quote(a))
3320 return ' '.join(quoted_args)
3321
3322
3323 def smuggle_url(url, data):
3324 """ Pass additional data in a URL for internal use. """
3325
3326 url, idata = unsmuggle_url(url, {})
3327 data.update(idata)
3328 sdata = compat_urllib_parse_urlencode(
3329 {'__youtubedl_smuggle': json.dumps(data)})
3330 return url + '#' + sdata
3331
3332
3333 def unsmuggle_url(smug_url, default=None):
3334 if '#__youtubedl_smuggle' not in smug_url:
3335 return smug_url, default
3336 url, _, sdata = smug_url.rpartition('#')
3337 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3338 data = json.loads(jsond)
3339 return url, data
3340
3341
3342 def format_bytes(bytes):
3343 if bytes is None:
3344 return 'N/A'
3345 if type(bytes) is str:
3346 bytes = float(bytes)
3347 if bytes == 0.0:
3348 exponent = 0
3349 else:
3350 exponent = int(math.log(bytes, 1024.0))
3351 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3352 converted = float(bytes) / float(1024 ** exponent)
3353 return '%.2f%s' % (converted, suffix)
3354
3355
3356 def lookup_unit_table(unit_table, s):
3357 units_re = '|'.join(re.escape(u) for u in unit_table)
3358 m = re.match(
3359 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3360 if not m:
3361 return None
3362 num_str = m.group('num').replace(',', '.')
3363 mult = unit_table[m.group('unit')]
3364 return int(float(num_str) * mult)
3365
3366
3367 def parse_filesize(s):
3368 if s is None:
3369 return None
3370
3371 # The lower-case forms are of course incorrect and unofficial,
3372 # but we support those too
3373 _UNIT_TABLE = {
3374 'B': 1,
3375 'b': 1,
3376 'bytes': 1,
3377 'KiB': 1024,
3378 'KB': 1000,
3379 'kB': 1024,
3380 'Kb': 1000,
3381 'kb': 1000,
3382 'kilobytes': 1000,
3383 'kibibytes': 1024,
3384 'MiB': 1024 ** 2,
3385 'MB': 1000 ** 2,
3386 'mB': 1024 ** 2,
3387 'Mb': 1000 ** 2,
3388 'mb': 1000 ** 2,
3389 'megabytes': 1000 ** 2,
3390 'mebibytes': 1024 ** 2,
3391 'GiB': 1024 ** 3,
3392 'GB': 1000 ** 3,
3393 'gB': 1024 ** 3,
3394 'Gb': 1000 ** 3,
3395 'gb': 1000 ** 3,
3396 'gigabytes': 1000 ** 3,
3397 'gibibytes': 1024 ** 3,
3398 'TiB': 1024 ** 4,
3399 'TB': 1000 ** 4,
3400 'tB': 1024 ** 4,
3401 'Tb': 1000 ** 4,
3402 'tb': 1000 ** 4,
3403 'terabytes': 1000 ** 4,
3404 'tebibytes': 1024 ** 4,
3405 'PiB': 1024 ** 5,
3406 'PB': 1000 ** 5,
3407 'pB': 1024 ** 5,
3408 'Pb': 1000 ** 5,
3409 'pb': 1000 ** 5,
3410 'petabytes': 1000 ** 5,
3411 'pebibytes': 1024 ** 5,
3412 'EiB': 1024 ** 6,
3413 'EB': 1000 ** 6,
3414 'eB': 1024 ** 6,
3415 'Eb': 1000 ** 6,
3416 'eb': 1000 ** 6,
3417 'exabytes': 1000 ** 6,
3418 'exbibytes': 1024 ** 6,
3419 'ZiB': 1024 ** 7,
3420 'ZB': 1000 ** 7,
3421 'zB': 1024 ** 7,
3422 'Zb': 1000 ** 7,
3423 'zb': 1000 ** 7,
3424 'zettabytes': 1000 ** 7,
3425 'zebibytes': 1024 ** 7,
3426 'YiB': 1024 ** 8,
3427 'YB': 1000 ** 8,
3428 'yB': 1024 ** 8,
3429 'Yb': 1000 ** 8,
3430 'yb': 1000 ** 8,
3431 'yottabytes': 1000 ** 8,
3432 'yobibytes': 1024 ** 8,
3433 }
3434
3435 return lookup_unit_table(_UNIT_TABLE, s)
3436
3437
3438 def parse_count(s):
3439 if s is None:
3440 return None
3441
3442 s = s.strip()
3443
3444 if re.match(r'^[\d,.]+$', s):
3445 return str_to_int(s)
3446
3447 _UNIT_TABLE = {
3448 'k': 1000,
3449 'K': 1000,
3450 'm': 1000 ** 2,
3451 'M': 1000 ** 2,
3452 'kk': 1000 ** 2,
3453 'KK': 1000 ** 2,
3454 }
3455
3456 return lookup_unit_table(_UNIT_TABLE, s)
3457
3458
3459 def parse_resolution(s):
3460 if s is None:
3461 return {}
3462
3463 mobj = re.search(r'\b(?P<w>\d+)\s*[xXƗ]\s*(?P<h>\d+)\b', s)
3464 if mobj:
3465 return {
3466 'width': int(mobj.group('w')),
3467 'height': int(mobj.group('h')),
3468 }
3469
3470 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3471 if mobj:
3472 return {'height': int(mobj.group(1))}
3473
3474 mobj = re.search(r'\b([48])[kK]\b', s)
3475 if mobj:
3476 return {'height': int(mobj.group(1)) * 540}
3477
3478 return {}
3479
3480
3481 def parse_bitrate(s):
3482 if not isinstance(s, compat_str):
3483 return
3484 mobj = re.search(r'\b(\d+)\s*kbps', s)
3485 if mobj:
3486 return int(mobj.group(1))
3487
3488
3489 def month_by_name(name, lang='en'):
3490 """ Return the number of a month by (locale-independently) English name """
3491
3492 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3493
3494 try:
3495 return month_names.index(name) + 1
3496 except ValueError:
3497 return None
3498
3499
3500 def month_by_abbreviation(abbrev):
3501 """ Return the number of a month by (locale-independently) English
3502 abbreviations """
3503
3504 try:
3505 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3506 except ValueError:
3507 return None
3508
3509
3510 def fix_xml_ampersands(xml_str):
3511 """Replace all the '&' by '&amp;' in XML"""
3512 return re.sub(
3513 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3514 '&amp;',
3515 xml_str)
3516
3517
3518 def setproctitle(title):
3519 assert isinstance(title, compat_str)
3520
3521 # ctypes in Jython is not complete
3522 # http://bugs.jython.org/issue2148
3523 if sys.platform.startswith('java'):
3524 return
3525
3526 try:
3527 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3528 except OSError:
3529 return
3530 except TypeError:
3531 # LoadLibrary in Windows Python 2.7.13 only expects
3532 # a bytestring, but since unicode_literals turns
3533 # every string into a unicode string, it fails.
3534 return
3535 title_bytes = title.encode('utf-8')
3536 buf = ctypes.create_string_buffer(len(title_bytes))
3537 buf.value = title_bytes
3538 try:
3539 libc.prctl(15, buf, 0, 0, 0)
3540 except AttributeError:
3541 return # Strange libc, just skip this
3542
3543
3544 def remove_start(s, start):
3545 return s[len(start):] if s is not None and s.startswith(start) else s
3546
3547
3548 def remove_end(s, end):
3549 return s[:-len(end)] if s is not None and s.endswith(end) else s
3550
3551
3552 def remove_quotes(s):
3553 if s is None or len(s) < 2:
3554 return s
3555 for quote in ('"', "'", ):
3556 if s[0] == quote and s[-1] == quote:
3557 return s[1:-1]
3558 return s
3559
3560
3561 def url_basename(url):
3562 path = compat_urlparse.urlparse(url).path
3563 return path.strip('/').split('/')[-1]
3564
3565
3566 def base_url(url):
3567 return re.match(r'https?://[^?#&]+/', url).group()
3568
3569
3570 def urljoin(base, path):
3571 if isinstance(path, bytes):
3572 path = path.decode('utf-8')
3573 if not isinstance(path, compat_str) or not path:
3574 return None
3575 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3576 return path
3577 if isinstance(base, bytes):
3578 base = base.decode('utf-8')
3579 if not isinstance(base, compat_str) or not re.match(
3580 r'^(?:https?:)?//', base):
3581 return None
3582 return compat_urlparse.urljoin(base, path)
3583
3584
3585 class HEADRequest(compat_urllib_request.Request):
3586 def get_method(self):
3587 return 'HEAD'
3588
3589
3590 class PUTRequest(compat_urllib_request.Request):
3591 def get_method(self):
3592 return 'PUT'
3593
3594
3595 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3596 if get_attr:
3597 if v is not None:
3598 v = getattr(v, get_attr, None)
3599 if v == '':
3600 v = None
3601 if v is None:
3602 return default
3603 try:
3604 return int(v) * invscale // scale
3605 except (ValueError, TypeError):
3606 return default
3607
3608
3609 def str_or_none(v, default=None):
3610 return default if v is None else compat_str(v)
3611
3612
3613 def str_to_int(int_str):
3614 """ A more relaxed version of int_or_none """
3615 if isinstance(int_str, compat_integer_types):
3616 return int_str
3617 elif isinstance(int_str, compat_str):
3618 int_str = re.sub(r'[,\.\+]', '', int_str)
3619 return int_or_none(int_str)
3620
3621
3622 def float_or_none(v, scale=1, invscale=1, default=None):
3623 if v is None:
3624 return default
3625 try:
3626 return float(v) * invscale / scale
3627 except (ValueError, TypeError):
3628 return default
3629
3630
3631 def bool_or_none(v, default=None):
3632 return v if isinstance(v, bool) else default
3633
3634
3635 def strip_or_none(v, default=None):
3636 return v.strip() if isinstance(v, compat_str) else default
3637
3638
3639 def url_or_none(url):
3640 if not url or not isinstance(url, compat_str):
3641 return None
3642 url = url.strip()
3643 return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3644
3645
3646 def parse_duration(s):
3647 if not isinstance(s, compat_basestring):
3648 return None
3649
3650 s = s.strip()
3651
3652 days, hours, mins, secs, ms = [None] * 5
3653 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3654 if m:
3655 days, hours, mins, secs, ms = m.groups()
3656 else:
3657 m = re.match(
3658 r'''(?ix)(?:P?
3659 (?:
3660 [0-9]+\s*y(?:ears?)?\s*
3661 )?
3662 (?:
3663 [0-9]+\s*m(?:onths?)?\s*
3664 )?
3665 (?:
3666 [0-9]+\s*w(?:eeks?)?\s*
3667 )?
3668 (?:
3669 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3670 )?
3671 T)?
3672 (?:
3673 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3674 )?
3675 (?:
3676 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3677 )?
3678 (?:
3679 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3680 )?Z?$''', s)
3681 if m:
3682 days, hours, mins, secs, ms = m.groups()
3683 else:
3684 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3685 if m:
3686 hours, mins = m.groups()
3687 else:
3688 return None
3689
3690 duration = 0
3691 if secs:
3692 duration += float(secs)
3693 if mins:
3694 duration += float(mins) * 60
3695 if hours:
3696 duration += float(hours) * 60 * 60
3697 if days:
3698 duration += float(days) * 24 * 60 * 60
3699 if ms:
3700 duration += float(ms)
3701 return duration
3702
3703
3704 def prepend_extension(filename, ext, expected_real_ext=None):
3705 name, real_ext = os.path.splitext(filename)
3706 return (
3707 '{0}.{1}{2}'.format(name, ext, real_ext)
3708 if not expected_real_ext or real_ext[1:] == expected_real_ext
3709 else '{0}.{1}'.format(filename, ext))
3710
3711
3712 def replace_extension(filename, ext, expected_real_ext=None):
3713 name, real_ext = os.path.splitext(filename)
3714 return '{0}.{1}'.format(
3715 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3716 ext)
3717
3718
3719 def check_executable(exe, args=[]):
3720 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3721 args can be a list of arguments for a short output (like -version) """
3722 try:
3723 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3724 except OSError:
3725 return False
3726 return exe
3727
3728
3729 def get_exe_version(exe, args=['--version'],
3730 version_re=None, unrecognized='present'):
3731 """ Returns the version of the specified executable,
3732 or False if the executable is not present """
3733 try:
3734 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3735 # SIGTTOU if youtube-dl is run in the background.
3736 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3737 out, _ = subprocess.Popen(
3738 [encodeArgument(exe)] + args,
3739 stdin=subprocess.PIPE,
3740 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3741 except OSError:
3742 return False
3743 if isinstance(out, bytes): # Python 2.x
3744 out = out.decode('ascii', 'ignore')
3745 return detect_exe_version(out, version_re, unrecognized)
3746
3747
3748 def detect_exe_version(output, version_re=None, unrecognized='present'):
3749 assert isinstance(output, compat_str)
3750 if version_re is None:
3751 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3752 m = re.search(version_re, output)
3753 if m:
3754 return m.group(1)
3755 else:
3756 return unrecognized
3757
3758
3759 class PagedList(object):
3760 def __len__(self):
3761 # This is only useful for tests
3762 return len(self.getslice())
3763
3764
3765 class OnDemandPagedList(PagedList):
3766 def __init__(self, pagefunc, pagesize, use_cache=True):
3767 self._pagefunc = pagefunc
3768 self._pagesize = pagesize
3769 self._use_cache = use_cache
3770 if use_cache:
3771 self._cache = {}
3772
3773 def getslice(self, start=0, end=None):
3774 res = []
3775 for pagenum in itertools.count(start // self._pagesize):
3776 firstid = pagenum * self._pagesize
3777 nextfirstid = pagenum * self._pagesize + self._pagesize
3778 if start >= nextfirstid:
3779 continue
3780
3781 page_results = None
3782 if self._use_cache:
3783 page_results = self._cache.get(pagenum)
3784 if page_results is None:
3785 page_results = list(self._pagefunc(pagenum))
3786 if self._use_cache:
3787 self._cache[pagenum] = page_results
3788
3789 startv = (
3790 start % self._pagesize
3791 if firstid <= start < nextfirstid
3792 else 0)
3793
3794 endv = (
3795 ((end - 1) % self._pagesize) + 1
3796 if (end is not None and firstid <= end <= nextfirstid)
3797 else None)
3798
3799 if startv != 0 or endv is not None:
3800 page_results = page_results[startv:endv]
3801 res.extend(page_results)
3802
3803 # A little optimization - if current page is not "full", ie. does
3804 # not contain page_size videos then we can assume that this page
3805 # is the last one - there are no more ids on further pages -
3806 # i.e. no need to query again.
3807 if len(page_results) + startv < self._pagesize:
3808 break
3809
3810 # If we got the whole page, but the next page is not interesting,
3811 # break out early as well
3812 if end == nextfirstid:
3813 break
3814 return res
3815
3816
3817 class InAdvancePagedList(PagedList):
3818 def __init__(self, pagefunc, pagecount, pagesize):
3819 self._pagefunc = pagefunc
3820 self._pagecount = pagecount
3821 self._pagesize = pagesize
3822
3823 def getslice(self, start=0, end=None):
3824 res = []
3825 start_page = start // self._pagesize
3826 end_page = (
3827 self._pagecount if end is None else (end // self._pagesize + 1))
3828 skip_elems = start - start_page * self._pagesize
3829 only_more = None if end is None else end - start
3830 for pagenum in range(start_page, end_page):
3831 page = list(self._pagefunc(pagenum))
3832 if skip_elems:
3833 page = page[skip_elems:]
3834 skip_elems = None
3835 if only_more is not None:
3836 if len(page) < only_more:
3837 only_more -= len(page)
3838 else:
3839 page = page[:only_more]
3840 res.extend(page)
3841 break
3842 res.extend(page)
3843 return res
3844
3845
3846 def uppercase_escape(s):
3847 unicode_escape = codecs.getdecoder('unicode_escape')
3848 return re.sub(
3849 r'\\U[0-9a-fA-F]{8}',
3850 lambda m: unicode_escape(m.group(0))[0],
3851 s)
3852
3853
3854 def lowercase_escape(s):
3855 unicode_escape = codecs.getdecoder('unicode_escape')
3856 return re.sub(
3857 r'\\u[0-9a-fA-F]{4}',
3858 lambda m: unicode_escape(m.group(0))[0],
3859 s)
3860
3861
3862 def escape_rfc3986(s):
3863 """Escape non-ASCII characters as suggested by RFC 3986"""
3864 if sys.version_info < (3, 0) and isinstance(s, compat_str):
3865 s = s.encode('utf-8')
3866 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3867
3868
3869 def escape_url(url):
3870 """Escape URL as suggested by RFC 3986"""
3871 url_parsed = compat_urllib_parse_urlparse(url)
3872 return url_parsed._replace(
3873 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3874 path=escape_rfc3986(url_parsed.path),
3875 params=escape_rfc3986(url_parsed.params),
3876 query=escape_rfc3986(url_parsed.query),
3877 fragment=escape_rfc3986(url_parsed.fragment)
3878 ).geturl()
3879
3880
3881 def read_batch_urls(batch_fd):
3882 def fixup(url):
3883 if not isinstance(url, compat_str):
3884 url = url.decode('utf-8', 'replace')
3885 BOM_UTF8 = '\xef\xbb\xbf'
3886 if url.startswith(BOM_UTF8):
3887 url = url[len(BOM_UTF8):]
3888 url = url.strip()
3889 if url.startswith(('#', ';', ']')):
3890 return False
3891 return url
3892
3893 with contextlib.closing(batch_fd) as fd:
3894 return [url for url in map(fixup, fd) if url]
3895
3896
3897 def urlencode_postdata(*args, **kargs):
3898 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3899
3900
3901 def update_url_query(url, query):
3902 if not query:
3903 return url
3904 parsed_url = compat_urlparse.urlparse(url)
3905 qs = compat_parse_qs(parsed_url.query)
3906 qs.update(query)
3907 return compat_urlparse.urlunparse(parsed_url._replace(
3908 query=compat_urllib_parse_urlencode(qs, True)))
3909
3910
3911 def update_Request(req, url=None, data=None, headers={}, query={}):
3912 req_headers = req.headers.copy()
3913 req_headers.update(headers)
3914 req_data = data or req.data
3915 req_url = update_url_query(url or req.get_full_url(), query)
3916 req_get_method = req.get_method()
3917 if req_get_method == 'HEAD':
3918 req_type = HEADRequest
3919 elif req_get_method == 'PUT':
3920 req_type = PUTRequest
3921 else:
3922 req_type = compat_urllib_request.Request
3923 new_req = req_type(
3924 req_url, data=req_data, headers=req_headers,
3925 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3926 if hasattr(req, 'timeout'):
3927 new_req.timeout = req.timeout
3928 return new_req
3929
3930
3931 def _multipart_encode_impl(data, boundary):
3932 content_type = 'multipart/form-data; boundary=%s' % boundary
3933
3934 out = b''
3935 for k, v in data.items():
3936 out += b'--' + boundary.encode('ascii') + b'\r\n'
3937 if isinstance(k, compat_str):
3938 k = k.encode('utf-8')
3939 if isinstance(v, compat_str):
3940 v = v.encode('utf-8')
3941 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3942 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3943 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3944 if boundary.encode('ascii') in content:
3945 raise ValueError('Boundary overlaps with data')
3946 out += content
3947
3948 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3949
3950 return out, content_type
3951
3952
3953 def multipart_encode(data, boundary=None):
3954 '''
3955 Encode a dict to RFC 7578-compliant form-data
3956
3957 data:
3958 A dict where keys and values can be either Unicode or bytes-like
3959 objects.
3960 boundary:
3961 If specified a Unicode object, it's used as the boundary. Otherwise
3962 a random boundary is generated.
3963
3964 Reference: https://tools.ietf.org/html/rfc7578
3965 '''
3966 has_specified_boundary = boundary is not None
3967
3968 while True:
3969 if boundary is None:
3970 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3971
3972 try:
3973 out, content_type = _multipart_encode_impl(data, boundary)
3974 break
3975 except ValueError:
3976 if has_specified_boundary:
3977 raise
3978 boundary = None
3979
3980 return out, content_type
3981
3982
3983 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3984 if isinstance(key_or_keys, (list, tuple)):
3985 for key in key_or_keys:
3986 if key not in d or d[key] is None or skip_false_values and not d[key]:
3987 continue
3988 return d[key]
3989 return default
3990 return d.get(key_or_keys, default)
3991
3992
3993 def try_get(src, getter, expected_type=None):
3994 if not isinstance(getter, (list, tuple)):
3995 getter = [getter]
3996 for get in getter:
3997 try:
3998 v = get(src)
3999 except (AttributeError, KeyError, TypeError, IndexError):
4000 pass
4001 else:
4002 if expected_type is None or isinstance(v, expected_type):
4003 return v
4004
4005
4006 def merge_dicts(*dicts):
4007 merged = {}
4008 for a_dict in dicts:
4009 for k, v in a_dict.items():
4010 if v is None:
4011 continue
4012 if (k not in merged
4013 or (isinstance(v, compat_str) and v
4014 and isinstance(merged[k], compat_str)
4015 and not merged[k])):
4016 merged[k] = v
4017 return merged
4018
4019
4020 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4021 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4022
4023
4024 US_RATINGS = {
4025 'G': 0,
4026 'PG': 10,
4027 'PG-13': 13,
4028 'R': 16,
4029 'NC': 18,
4030 }
4031
4032
4033 TV_PARENTAL_GUIDELINES = {
4034 'TV-Y': 0,
4035 'TV-Y7': 7,
4036 'TV-G': 0,
4037 'TV-PG': 0,
4038 'TV-14': 14,
4039 'TV-MA': 17,
4040 }
4041
4042
4043 def parse_age_limit(s):
4044 if type(s) == int:
4045 return s if 0 <= s <= 21 else None
4046 if not isinstance(s, compat_basestring):
4047 return None
4048 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4049 if m:
4050 return int(m.group('age'))
4051 if s in US_RATINGS:
4052 return US_RATINGS[s]
4053 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4054 if m:
4055 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4056 return None
4057
4058
4059 def strip_jsonp(code):
4060 return re.sub(
4061 r'''(?sx)^
4062 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4063 (?:\s*&&\s*(?P=func_name))?
4064 \s*\(\s*(?P<callback_data>.*)\);?
4065 \s*?(?://[^\n]*)*$''',
4066 r'\g<callback_data>', code)
4067
4068
4069 def js_to_json(code):
4070 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4071 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4072 INTEGER_TABLE = (
4073 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4074 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4075 )
4076
4077 def fix_kv(m):
4078 v = m.group(0)
4079 if v in ('true', 'false', 'null'):
4080 return v
4081 elif v.startswith('/*') or v.startswith('//') or v == ',':
4082 return ""
4083
4084 if v[0] in ("'", '"'):
4085 v = re.sub(r'(?s)\\.|"', lambda m: {
4086 '"': '\\"',
4087 "\\'": "'",
4088 '\\\n': '',
4089 '\\x': '\\u00',
4090 }.get(m.group(0), m.group(0)), v[1:-1])
4091
4092 for regex, base in INTEGER_TABLE:
4093 im = re.match(regex, v)
4094 if im:
4095 i = int(im.group(1), base)
4096 return '"%d":' % i if v.endswith(':') else '%d' % i
4097
4098 return '"%s"' % v
4099
4100 return re.sub(r'''(?sx)
4101 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4102 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4103 {comment}|,(?={skip}[\]}}])|
4104 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4105 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4106 [0-9]+(?={skip}:)
4107 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4108
4109
4110 def qualities(quality_ids):
4111 """ Get a numeric quality value out of a list of possible values """
4112 def q(qid):
4113 try:
4114 return quality_ids.index(qid)
4115 except ValueError:
4116 return -1
4117 return q
4118
4119
4120 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4121
4122
4123 def limit_length(s, length):
4124 """ Add ellipses to overly long strings """
4125 if s is None:
4126 return None
4127 ELLIPSES = '...'
4128 if len(s) > length:
4129 return s[:length - len(ELLIPSES)] + ELLIPSES
4130 return s
4131
4132
4133 def version_tuple(v):
4134 return tuple(int(e) for e in re.split(r'[-.]', v))
4135
4136
4137 def is_outdated_version(version, limit, assume_new=True):
4138 if not version:
4139 return not assume_new
4140 try:
4141 return version_tuple(version) < version_tuple(limit)
4142 except ValueError:
4143 return not assume_new
4144
4145
4146 def ytdl_is_updateable():
4147 """ Returns if youtube-dl can be updated with -U """
4148 from zipimport import zipimporter
4149
4150 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4151
4152
4153 def args_to_str(args):
4154 # Get a short string representation for a subprocess command
4155 return ' '.join(compat_shlex_quote(a) for a in args)
4156
4157
4158 def error_to_compat_str(err):
4159 err_str = str(err)
4160 # On python 2 error byte string must be decoded with proper
4161 # encoding rather than ascii
4162 if sys.version_info[0] < 3:
4163 err_str = err_str.decode(preferredencoding())
4164 return err_str
4165
4166
4167 def mimetype2ext(mt):
4168 if mt is None:
4169 return None
4170
4171 ext = {
4172 'audio/mp4': 'm4a',
4173 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4174 # it's the most popular one
4175 'audio/mpeg': 'mp3',
4176 }.get(mt)
4177 if ext is not None:
4178 return ext
4179
4180 _, _, res = mt.rpartition('/')
4181 res = res.split(';')[0].strip().lower()
4182
4183 return {
4184 '3gpp': '3gp',
4185 'smptett+xml': 'tt',
4186 'ttaf+xml': 'dfxp',
4187 'ttml+xml': 'ttml',
4188 'x-flv': 'flv',
4189 'x-mp4-fragmented': 'mp4',
4190 'x-ms-sami': 'sami',
4191 'x-ms-wmv': 'wmv',
4192 'mpegurl': 'm3u8',
4193 'x-mpegurl': 'm3u8',
4194 'vnd.apple.mpegurl': 'm3u8',
4195 'dash+xml': 'mpd',
4196 'f4m+xml': 'f4m',
4197 'hds+xml': 'f4m',
4198 'vnd.ms-sstr+xml': 'ism',
4199 'quicktime': 'mov',
4200 'mp2t': 'ts',
4201 'x-wav': 'wav',
4202 }.get(res, res)
4203
4204
4205 def parse_codecs(codecs_str):
4206 # http://tools.ietf.org/html/rfc6381
4207 if not codecs_str:
4208 return {}
4209 splited_codecs = list(filter(None, map(
4210 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4211 vcodec, acodec = None, None
4212 for full_codec in splited_codecs:
4213 codec = full_codec.split('.')[0]
4214 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4215 if not vcodec:
4216 vcodec = full_codec
4217 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4218 if not acodec:
4219 acodec = full_codec
4220 else:
4221 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4222 if not vcodec and not acodec:
4223 if len(splited_codecs) == 2:
4224 return {
4225 'vcodec': splited_codecs[0],
4226 'acodec': splited_codecs[1],
4227 }
4228 else:
4229 return {
4230 'vcodec': vcodec or 'none',
4231 'acodec': acodec or 'none',
4232 }
4233 return {}
4234
4235
4236 def urlhandle_detect_ext(url_handle):
4237 getheader = url_handle.headers.get
4238
4239 cd = getheader('Content-Disposition')
4240 if cd:
4241 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4242 if m:
4243 e = determine_ext(m.group('filename'), default_ext=None)
4244 if e:
4245 return e
4246
4247 return mimetype2ext(getheader('Content-Type'))
4248
4249
4250 def encode_data_uri(data, mime_type):
4251 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4252
4253
4254 def age_restricted(content_limit, age_limit):
4255 """ Returns True iff the content should be blocked """
4256
4257 if age_limit is None: # No limit set
4258 return False
4259 if content_limit is None:
4260 return False # Content available for everyone
4261 return age_limit < content_limit
4262
4263
4264 def is_html(first_bytes):
4265 """ Detect whether a file contains HTML by examining its first bytes. """
4266
4267 BOMS = [
4268 (b'\xef\xbb\xbf', 'utf-8'),
4269 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4270 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4271 (b'\xff\xfe', 'utf-16-le'),
4272 (b'\xfe\xff', 'utf-16-be'),
4273 ]
4274 for bom, enc in BOMS:
4275 if first_bytes.startswith(bom):
4276 s = first_bytes[len(bom):].decode(enc, 'replace')
4277 break
4278 else:
4279 s = first_bytes.decode('utf-8', 'replace')
4280
4281 return re.match(r'^\s*<', s)
4282
4283
4284 def determine_protocol(info_dict):
4285 protocol = info_dict.get('protocol')
4286 if protocol is not None:
4287 return protocol
4288
4289 url = info_dict['url']
4290 if url.startswith('rtmp'):
4291 return 'rtmp'
4292 elif url.startswith('mms'):
4293 return 'mms'
4294 elif url.startswith('rtsp'):
4295 return 'rtsp'
4296
4297 ext = determine_ext(url)
4298 if ext == 'm3u8':
4299 return 'm3u8'
4300 elif ext == 'f4m':
4301 return 'f4m'
4302
4303 return compat_urllib_parse_urlparse(url).scheme
4304
4305
4306 def render_table(header_row, data):
4307 """ Render a list of rows, each as a list of values """
4308 table = [header_row] + data
4309 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4310 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4311 return '\n'.join(format_str % tuple(row) for row in table)
4312
4313
4314 def _match_one(filter_part, dct):
4315 COMPARISON_OPERATORS = {
4316 '<': operator.lt,
4317 '<=': operator.le,
4318 '>': operator.gt,
4319 '>=': operator.ge,
4320 '=': operator.eq,
4321 '!=': operator.ne,
4322 }
4323 operator_rex = re.compile(r'''(?x)\s*
4324 (?P<key>[a-z_]+)
4325 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4326 (?:
4327 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4328 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4329 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4330 )
4331 \s*$
4332 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4333 m = operator_rex.search(filter_part)
4334 if m:
4335 op = COMPARISON_OPERATORS[m.group('op')]
4336 actual_value = dct.get(m.group('key'))
4337 if (m.group('quotedstrval') is not None
4338 or m.group('strval') is not None
4339 # If the original field is a string and matching comparisonvalue is
4340 # a number we should respect the origin of the original field
4341 # and process comparison value as a string (see
4342 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4343 or actual_value is not None and m.group('intval') is not None
4344 and isinstance(actual_value, compat_str)):
4345 if m.group('op') not in ('=', '!='):
4346 raise ValueError(
4347 'Operator %s does not support string values!' % m.group('op'))
4348 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4349 quote = m.group('quote')
4350 if quote is not None:
4351 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4352 else:
4353 try:
4354 comparison_value = int(m.group('intval'))
4355 except ValueError:
4356 comparison_value = parse_filesize(m.group('intval'))
4357 if comparison_value is None:
4358 comparison_value = parse_filesize(m.group('intval') + 'B')
4359 if comparison_value is None:
4360 raise ValueError(
4361 'Invalid integer value %r in filter part %r' % (
4362 m.group('intval'), filter_part))
4363 if actual_value is None:
4364 return m.group('none_inclusive')
4365 return op(actual_value, comparison_value)
4366
4367 UNARY_OPERATORS = {
4368 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4369 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4370 }
4371 operator_rex = re.compile(r'''(?x)\s*
4372 (?P<op>%s)\s*(?P<key>[a-z_]+)
4373 \s*$
4374 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4375 m = operator_rex.search(filter_part)
4376 if m:
4377 op = UNARY_OPERATORS[m.group('op')]
4378 actual_value = dct.get(m.group('key'))
4379 return op(actual_value)
4380
4381 raise ValueError('Invalid filter part %r' % filter_part)
4382
4383
4384 def match_str(filter_str, dct):
4385 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4386
4387 return all(
4388 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4389
4390
4391 def match_filter_func(filter_str):
4392 def _match_func(info_dict):
4393 if match_str(filter_str, info_dict):
4394 return None
4395 else:
4396 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4397 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4398 return _match_func
4399
4400
4401 def parse_dfxp_time_expr(time_expr):
4402 if not time_expr:
4403 return
4404
4405 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4406 if mobj:
4407 return float(mobj.group('time_offset'))
4408
4409 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4410 if mobj:
4411 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4412
4413
4414 def srt_subtitles_timecode(seconds):
4415 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4416
4417
4418 def dfxp2srt(dfxp_data):
4419 '''
4420 @param dfxp_data A bytes-like object containing DFXP data
4421 @returns A unicode object containing converted SRT data
4422 '''
4423 LEGACY_NAMESPACES = (
4424 (b'http://www.w3.org/ns/ttml', [
4425 b'http://www.w3.org/2004/11/ttaf1',
4426 b'http://www.w3.org/2006/04/ttaf1',
4427 b'http://www.w3.org/2006/10/ttaf1',
4428 ]),
4429 (b'http://www.w3.org/ns/ttml#styling', [
4430 b'http://www.w3.org/ns/ttml#style',
4431 ]),
4432 )
4433
4434 SUPPORTED_STYLING = [
4435 'color',
4436 'fontFamily',
4437 'fontSize',
4438 'fontStyle',
4439 'fontWeight',
4440 'textDecoration'
4441 ]
4442
4443 _x = functools.partial(xpath_with_ns, ns_map={
4444 'xml': 'http://www.w3.org/XML/1998/namespace',
4445 'ttml': 'http://www.w3.org/ns/ttml',
4446 'tts': 'http://www.w3.org/ns/ttml#styling',
4447 })
4448
4449 styles = {}
4450 default_style = {}
4451
4452 class TTMLPElementParser(object):
4453 _out = ''
4454 _unclosed_elements = []
4455 _applied_styles = []
4456
4457 def start(self, tag, attrib):
4458 if tag in (_x('ttml:br'), 'br'):
4459 self._out += '\n'
4460 else:
4461 unclosed_elements = []
4462 style = {}
4463 element_style_id = attrib.get('style')
4464 if default_style:
4465 style.update(default_style)
4466 if element_style_id:
4467 style.update(styles.get(element_style_id, {}))
4468 for prop in SUPPORTED_STYLING:
4469 prop_val = attrib.get(_x('tts:' + prop))
4470 if prop_val:
4471 style[prop] = prop_val
4472 if style:
4473 font = ''
4474 for k, v in sorted(style.items()):
4475 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4476 continue
4477 if k == 'color':
4478 font += ' color="%s"' % v
4479 elif k == 'fontSize':
4480 font += ' size="%s"' % v
4481 elif k == 'fontFamily':
4482 font += ' face="%s"' % v
4483 elif k == 'fontWeight' and v == 'bold':
4484 self._out += '<b>'
4485 unclosed_elements.append('b')
4486 elif k == 'fontStyle' and v == 'italic':
4487 self._out += '<i>'
4488 unclosed_elements.append('i')
4489 elif k == 'textDecoration' and v == 'underline':
4490 self._out += '<u>'
4491 unclosed_elements.append('u')
4492 if font:
4493 self._out += '<font' + font + '>'
4494 unclosed_elements.append('font')
4495 applied_style = {}
4496 if self._applied_styles:
4497 applied_style.update(self._applied_styles[-1])
4498 applied_style.update(style)
4499 self._applied_styles.append(applied_style)
4500 self._unclosed_elements.append(unclosed_elements)
4501
4502 def end(self, tag):
4503 if tag not in (_x('ttml:br'), 'br'):
4504 unclosed_elements = self._unclosed_elements.pop()
4505 for element in reversed(unclosed_elements):
4506 self._out += '</%s>' % element
4507 if unclosed_elements and self._applied_styles:
4508 self._applied_styles.pop()
4509
4510 def data(self, data):
4511 self._out += data
4512
4513 def close(self):
4514 return self._out.strip()
4515
4516 def parse_node(node):
4517 target = TTMLPElementParser()
4518 parser = xml.etree.ElementTree.XMLParser(target=target)
4519 parser.feed(xml.etree.ElementTree.tostring(node))
4520 return parser.close()
4521
4522 for k, v in LEGACY_NAMESPACES:
4523 for ns in v:
4524 dfxp_data = dfxp_data.replace(ns, k)
4525
4526 dfxp = compat_etree_fromstring(dfxp_data)
4527 out = []
4528 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4529
4530 if not paras:
4531 raise ValueError('Invalid dfxp/TTML subtitle')
4532
4533 repeat = False
4534 while True:
4535 for style in dfxp.findall(_x('.//ttml:style')):
4536 style_id = style.get('id') or style.get(_x('xml:id'))
4537 if not style_id:
4538 continue
4539 parent_style_id = style.get('style')
4540 if parent_style_id:
4541 if parent_style_id not in styles:
4542 repeat = True
4543 continue
4544 styles[style_id] = styles[parent_style_id].copy()
4545 for prop in SUPPORTED_STYLING:
4546 prop_val = style.get(_x('tts:' + prop))
4547 if prop_val:
4548 styles.setdefault(style_id, {})[prop] = prop_val
4549 if repeat:
4550 repeat = False
4551 else:
4552 break
4553
4554 for p in ('body', 'div'):
4555 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4556 if ele is None:
4557 continue
4558 style = styles.get(ele.get('style'))
4559 if not style:
4560 continue
4561 default_style.update(style)
4562
4563 for para, index in zip(paras, itertools.count(1)):
4564 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4565 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4566 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4567 if begin_time is None:
4568 continue
4569 if not end_time:
4570 if not dur:
4571 continue
4572 end_time = begin_time + dur
4573 out.append('%d\n%s --> %s\n%s\n\n' % (
4574 index,
4575 srt_subtitles_timecode(begin_time),
4576 srt_subtitles_timecode(end_time),
4577 parse_node(para)))
4578
4579 return ''.join(out)
4580
4581
4582 def cli_option(params, command_option, param):
4583 param = params.get(param)
4584 if param:
4585 param = compat_str(param)
4586 return [command_option, param] if param is not None else []
4587
4588
4589 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4590 param = params.get(param)
4591 if param is None:
4592 return []
4593 assert isinstance(param, bool)
4594 if separator:
4595 return [command_option + separator + (true_value if param else false_value)]
4596 return [command_option, true_value if param else false_value]
4597
4598
4599 def cli_valueless_option(params, command_option, param, expected_value=True):
4600 param = params.get(param)
4601 return [command_option] if param == expected_value else []
4602
4603
4604 def cli_configuration_args(params, param, default=[]):
4605 ex_args = params.get(param)
4606 if ex_args is None:
4607 return default
4608 assert isinstance(ex_args, list)
4609 return ex_args
4610
4611
4612 class ISO639Utils(object):
4613 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4614 _lang_map = {
4615 'aa': 'aar',
4616 'ab': 'abk',
4617 'ae': 'ave',
4618 'af': 'afr',
4619 'ak': 'aka',
4620 'am': 'amh',
4621 'an': 'arg',
4622 'ar': 'ara',
4623 'as': 'asm',
4624 'av': 'ava',
4625 'ay': 'aym',
4626 'az': 'aze',
4627 'ba': 'bak',
4628 'be': 'bel',
4629 'bg': 'bul',
4630 'bh': 'bih',
4631 'bi': 'bis',
4632 'bm': 'bam',
4633 'bn': 'ben',
4634 'bo': 'bod',
4635 'br': 'bre',
4636 'bs': 'bos',
4637 'ca': 'cat',
4638 'ce': 'che',
4639 'ch': 'cha',
4640 'co': 'cos',
4641 'cr': 'cre',
4642 'cs': 'ces',
4643 'cu': 'chu',
4644 'cv': 'chv',
4645 'cy': 'cym',
4646 'da': 'dan',
4647 'de': 'deu',
4648 'dv': 'div',
4649 'dz': 'dzo',
4650 'ee': 'ewe',
4651 'el': 'ell',
4652 'en': 'eng',
4653 'eo': 'epo',
4654 'es': 'spa',
4655 'et': 'est',
4656 'eu': 'eus',
4657 'fa': 'fas',
4658 'ff': 'ful',
4659 'fi': 'fin',
4660 'fj': 'fij',
4661 'fo': 'fao',
4662 'fr': 'fra',
4663 'fy': 'fry',
4664 'ga': 'gle',
4665 'gd': 'gla',
4666 'gl': 'glg',
4667 'gn': 'grn',
4668 'gu': 'guj',
4669 'gv': 'glv',
4670 'ha': 'hau',
4671 'he': 'heb',
4672 'iw': 'heb', # Replaced by he in 1989 revision
4673 'hi': 'hin',
4674 'ho': 'hmo',
4675 'hr': 'hrv',
4676 'ht': 'hat',
4677 'hu': 'hun',
4678 'hy': 'hye',
4679 'hz': 'her',
4680 'ia': 'ina',
4681 'id': 'ind',
4682 'in': 'ind', # Replaced by id in 1989 revision
4683 'ie': 'ile',
4684 'ig': 'ibo',
4685 'ii': 'iii',
4686 'ik': 'ipk',
4687 'io': 'ido',
4688 'is': 'isl',
4689 'it': 'ita',
4690 'iu': 'iku',
4691 'ja': 'jpn',
4692 'jv': 'jav',
4693 'ka': 'kat',
4694 'kg': 'kon',
4695 'ki': 'kik',
4696 'kj': 'kua',
4697 'kk': 'kaz',
4698 'kl': 'kal',
4699 'km': 'khm',
4700 'kn': 'kan',
4701 'ko': 'kor',
4702 'kr': 'kau',
4703 'ks': 'kas',
4704 'ku': 'kur',
4705 'kv': 'kom',
4706 'kw': 'cor',
4707 'ky': 'kir',
4708 'la': 'lat',
4709 'lb': 'ltz',
4710 'lg': 'lug',
4711 'li': 'lim',
4712 'ln': 'lin',
4713 'lo': 'lao',
4714 'lt': 'lit',
4715 'lu': 'lub',
4716 'lv': 'lav',
4717 'mg': 'mlg',
4718 'mh': 'mah',
4719 'mi': 'mri',
4720 'mk': 'mkd',
4721 'ml': 'mal',
4722 'mn': 'mon',
4723 'mr': 'mar',
4724 'ms': 'msa',
4725 'mt': 'mlt',
4726 'my': 'mya',
4727 'na': 'nau',
4728 'nb': 'nob',
4729 'nd': 'nde',
4730 'ne': 'nep',
4731 'ng': 'ndo',
4732 'nl': 'nld',
4733 'nn': 'nno',
4734 'no': 'nor',
4735 'nr': 'nbl',
4736 'nv': 'nav',
4737 'ny': 'nya',
4738 'oc': 'oci',
4739 'oj': 'oji',
4740 'om': 'orm',
4741 'or': 'ori',
4742 'os': 'oss',
4743 'pa': 'pan',
4744 'pi': 'pli',
4745 'pl': 'pol',
4746 'ps': 'pus',
4747 'pt': 'por',
4748 'qu': 'que',
4749 'rm': 'roh',
4750 'rn': 'run',
4751 'ro': 'ron',
4752 'ru': 'rus',
4753 'rw': 'kin',
4754 'sa': 'san',
4755 'sc': 'srd',
4756 'sd': 'snd',
4757 'se': 'sme',
4758 'sg': 'sag',
4759 'si': 'sin',
4760 'sk': 'slk',
4761 'sl': 'slv',
4762 'sm': 'smo',
4763 'sn': 'sna',
4764 'so': 'som',
4765 'sq': 'sqi',
4766 'sr': 'srp',
4767 'ss': 'ssw',
4768 'st': 'sot',
4769 'su': 'sun',
4770 'sv': 'swe',
4771 'sw': 'swa',
4772 'ta': 'tam',
4773 'te': 'tel',
4774 'tg': 'tgk',
4775 'th': 'tha',
4776 'ti': 'tir',
4777 'tk': 'tuk',
4778 'tl': 'tgl',
4779 'tn': 'tsn',
4780 'to': 'ton',
4781 'tr': 'tur',
4782 'ts': 'tso',
4783 'tt': 'tat',
4784 'tw': 'twi',
4785 'ty': 'tah',
4786 'ug': 'uig',
4787 'uk': 'ukr',
4788 'ur': 'urd',
4789 'uz': 'uzb',
4790 've': 'ven',
4791 'vi': 'vie',
4792 'vo': 'vol',
4793 'wa': 'wln',
4794 'wo': 'wol',
4795 'xh': 'xho',
4796 'yi': 'yid',
4797 'ji': 'yid', # Replaced by yi in 1989 revision
4798 'yo': 'yor',
4799 'za': 'zha',
4800 'zh': 'zho',
4801 'zu': 'zul',
4802 }
4803
4804 @classmethod
4805 def short2long(cls, code):
4806 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4807 return cls._lang_map.get(code[:2])
4808
4809 @classmethod
4810 def long2short(cls, code):
4811 """Convert language code from ISO 639-2/T to ISO 639-1"""
4812 for short_name, long_name in cls._lang_map.items():
4813 if long_name == code:
4814 return short_name
4815
4816
4817 class ISO3166Utils(object):
4818 # From http://data.okfn.org/data/core/country-list
4819 _country_map = {
4820 'AF': 'Afghanistan',
4821 'AX': 'ƅland Islands',
4822 'AL': 'Albania',
4823 'DZ': 'Algeria',
4824 'AS': 'American Samoa',
4825 'AD': 'Andorra',
4826 'AO': 'Angola',
4827 'AI': 'Anguilla',
4828 'AQ': 'Antarctica',
4829 'AG': 'Antigua and Barbuda',
4830 'AR': 'Argentina',
4831 'AM': 'Armenia',
4832 'AW': 'Aruba',
4833 'AU': 'Australia',
4834 'AT': 'Austria',
4835 'AZ': 'Azerbaijan',
4836 'BS': 'Bahamas',
4837 'BH': 'Bahrain',
4838 'BD': 'Bangladesh',
4839 'BB': 'Barbados',
4840 'BY': 'Belarus',
4841 'BE': 'Belgium',
4842 'BZ': 'Belize',
4843 'BJ': 'Benin',
4844 'BM': 'Bermuda',
4845 'BT': 'Bhutan',
4846 'BO': 'Bolivia, Plurinational State of',
4847 'BQ': 'Bonaire, Sint Eustatius and Saba',
4848 'BA': 'Bosnia and Herzegovina',
4849 'BW': 'Botswana',
4850 'BV': 'Bouvet Island',
4851 'BR': 'Brazil',
4852 'IO': 'British Indian Ocean Territory',
4853 'BN': 'Brunei Darussalam',
4854 'BG': 'Bulgaria',
4855 'BF': 'Burkina Faso',
4856 'BI': 'Burundi',
4857 'KH': 'Cambodia',
4858 'CM': 'Cameroon',
4859 'CA': 'Canada',
4860 'CV': 'Cape Verde',
4861 'KY': 'Cayman Islands',
4862 'CF': 'Central African Republic',
4863 'TD': 'Chad',
4864 'CL': 'Chile',
4865 'CN': 'China',
4866 'CX': 'Christmas Island',
4867 'CC': 'Cocos (Keeling) Islands',
4868 'CO': 'Colombia',
4869 'KM': 'Comoros',
4870 'CG': 'Congo',
4871 'CD': 'Congo, the Democratic Republic of the',
4872 'CK': 'Cook Islands',
4873 'CR': 'Costa Rica',
4874 'CI': 'CĆ“te d\'Ivoire',
4875 'HR': 'Croatia',
4876 'CU': 'Cuba',
4877 'CW': 'CuraƧao',
4878 'CY': 'Cyprus',
4879 'CZ': 'Czech Republic',
4880 'DK': 'Denmark',
4881 'DJ': 'Djibouti',
4882 'DM': 'Dominica',
4883 'DO': 'Dominican Republic',
4884 'EC': 'Ecuador',
4885 'EG': 'Egypt',
4886 'SV': 'El Salvador',
4887 'GQ': 'Equatorial Guinea',
4888 'ER': 'Eritrea',
4889 'EE': 'Estonia',
4890 'ET': 'Ethiopia',
4891 'FK': 'Falkland Islands (Malvinas)',
4892 'FO': 'Faroe Islands',
4893 'FJ': 'Fiji',
4894 'FI': 'Finland',
4895 'FR': 'France',
4896 'GF': 'French Guiana',
4897 'PF': 'French Polynesia',
4898 'TF': 'French Southern Territories',
4899 'GA': 'Gabon',
4900 'GM': 'Gambia',
4901 'GE': 'Georgia',
4902 'DE': 'Germany',
4903 'GH': 'Ghana',
4904 'GI': 'Gibraltar',
4905 'GR': 'Greece',
4906 'GL': 'Greenland',
4907 'GD': 'Grenada',
4908 'GP': 'Guadeloupe',
4909 'GU': 'Guam',
4910 'GT': 'Guatemala',
4911 'GG': 'Guernsey',
4912 'GN': 'Guinea',
4913 'GW': 'Guinea-Bissau',
4914 'GY': 'Guyana',
4915 'HT': 'Haiti',
4916 'HM': 'Heard Island and McDonald Islands',
4917 'VA': 'Holy See (Vatican City State)',
4918 'HN': 'Honduras',
4919 'HK': 'Hong Kong',
4920 'HU': 'Hungary',
4921 'IS': 'Iceland',
4922 'IN': 'India',
4923 'ID': 'Indonesia',
4924 'IR': 'Iran, Islamic Republic of',
4925 'IQ': 'Iraq',
4926 'IE': 'Ireland',
4927 'IM': 'Isle of Man',
4928 'IL': 'Israel',
4929 'IT': 'Italy',
4930 'JM': 'Jamaica',
4931 'JP': 'Japan',
4932 'JE': 'Jersey',
4933 'JO': 'Jordan',
4934 'KZ': 'Kazakhstan',
4935 'KE': 'Kenya',
4936 'KI': 'Kiribati',
4937 'KP': 'Korea, Democratic People\'s Republic of',
4938 'KR': 'Korea, Republic of',
4939 'KW': 'Kuwait',
4940 'KG': 'Kyrgyzstan',
4941 'LA': 'Lao People\'s Democratic Republic',
4942 'LV': 'Latvia',
4943 'LB': 'Lebanon',
4944 'LS': 'Lesotho',
4945 'LR': 'Liberia',
4946 'LY': 'Libya',
4947 'LI': 'Liechtenstein',
4948 'LT': 'Lithuania',
4949 'LU': 'Luxembourg',
4950 'MO': 'Macao',
4951 'MK': 'Macedonia, the Former Yugoslav Republic of',
4952 'MG': 'Madagascar',
4953 'MW': 'Malawi',
4954 'MY': 'Malaysia',
4955 'MV': 'Maldives',
4956 'ML': 'Mali',
4957 'MT': 'Malta',
4958 'MH': 'Marshall Islands',
4959 'MQ': 'Martinique',
4960 'MR': 'Mauritania',
4961 'MU': 'Mauritius',
4962 'YT': 'Mayotte',
4963 'MX': 'Mexico',
4964 'FM': 'Micronesia, Federated States of',
4965 'MD': 'Moldova, Republic of',
4966 'MC': 'Monaco',
4967 'MN': 'Mongolia',
4968 'ME': 'Montenegro',
4969 'MS': 'Montserrat',
4970 'MA': 'Morocco',
4971 'MZ': 'Mozambique',
4972 'MM': 'Myanmar',
4973 'NA': 'Namibia',
4974 'NR': 'Nauru',
4975 'NP': 'Nepal',
4976 'NL': 'Netherlands',
4977 'NC': 'New Caledonia',
4978 'NZ': 'New Zealand',
4979 'NI': 'Nicaragua',
4980 'NE': 'Niger',
4981 'NG': 'Nigeria',
4982 'NU': 'Niue',
4983 'NF': 'Norfolk Island',
4984 'MP': 'Northern Mariana Islands',
4985 'NO': 'Norway',
4986 'OM': 'Oman',
4987 'PK': 'Pakistan',
4988 'PW': 'Palau',
4989 'PS': 'Palestine, State of',
4990 'PA': 'Panama',
4991 'PG': 'Papua New Guinea',
4992 'PY': 'Paraguay',
4993 'PE': 'Peru',
4994 'PH': 'Philippines',
4995 'PN': 'Pitcairn',
4996 'PL': 'Poland',
4997 'PT': 'Portugal',
4998 'PR': 'Puerto Rico',
4999 'QA': 'Qatar',
5000 'RE': 'RĆ©union',
5001 'RO': 'Romania',
5002 'RU': 'Russian Federation',
5003 'RW': 'Rwanda',
5004 'BL': 'Saint BarthƩlemy',
5005 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5006 'KN': 'Saint Kitts and Nevis',
5007 'LC': 'Saint Lucia',
5008 'MF': 'Saint Martin (French part)',
5009 'PM': 'Saint Pierre and Miquelon',
5010 'VC': 'Saint Vincent and the Grenadines',
5011 'WS': 'Samoa',
5012 'SM': 'San Marino',
5013 'ST': 'Sao Tome and Principe',
5014 'SA': 'Saudi Arabia',
5015 'SN': 'Senegal',
5016 'RS': 'Serbia',
5017 'SC': 'Seychelles',
5018 'SL': 'Sierra Leone',
5019 'SG': 'Singapore',
5020 'SX': 'Sint Maarten (Dutch part)',
5021 'SK': 'Slovakia',
5022 'SI': 'Slovenia',
5023 'SB': 'Solomon Islands',
5024 'SO': 'Somalia',
5025 'ZA': 'South Africa',
5026 'GS': 'South Georgia and the South Sandwich Islands',
5027 'SS': 'South Sudan',
5028 'ES': 'Spain',
5029 'LK': 'Sri Lanka',
5030 'SD': 'Sudan',
5031 'SR': 'Suriname',
5032 'SJ': 'Svalbard and Jan Mayen',
5033 'SZ': 'Swaziland',
5034 'SE': 'Sweden',
5035 'CH': 'Switzerland',
5036 'SY': 'Syrian Arab Republic',
5037 'TW': 'Taiwan, Province of China',
5038 'TJ': 'Tajikistan',
5039 'TZ': 'Tanzania, United Republic of',
5040 'TH': 'Thailand',
5041 'TL': 'Timor-Leste',
5042 'TG': 'Togo',
5043 'TK': 'Tokelau',
5044 'TO': 'Tonga',
5045 'TT': 'Trinidad and Tobago',
5046 'TN': 'Tunisia',
5047 'TR': 'Turkey',
5048 'TM': 'Turkmenistan',
5049 'TC': 'Turks and Caicos Islands',
5050 'TV': 'Tuvalu',
5051 'UG': 'Uganda',
5052 'UA': 'Ukraine',
5053 'AE': 'United Arab Emirates',
5054 'GB': 'United Kingdom',
5055 'US': 'United States',
5056 'UM': 'United States Minor Outlying Islands',
5057 'UY': 'Uruguay',
5058 'UZ': 'Uzbekistan',
5059 'VU': 'Vanuatu',
5060 'VE': 'Venezuela, Bolivarian Republic of',
5061 'VN': 'Viet Nam',
5062 'VG': 'Virgin Islands, British',
5063 'VI': 'Virgin Islands, U.S.',
5064 'WF': 'Wallis and Futuna',
5065 'EH': 'Western Sahara',
5066 'YE': 'Yemen',
5067 'ZM': 'Zambia',
5068 'ZW': 'Zimbabwe',
5069 }
5070
5071 @classmethod
5072 def short2full(cls, code):
5073 """Convert an ISO 3166-2 country code to the corresponding full name"""
5074 return cls._country_map.get(code.upper())
5075
5076
5077 class GeoUtils(object):
5078 # Major IPv4 address blocks per country
5079 _country_ip_map = {
5080 'AD': '46.172.224.0/19',
5081 'AE': '94.200.0.0/13',
5082 'AF': '149.54.0.0/17',
5083 'AG': '209.59.64.0/18',
5084 'AI': '204.14.248.0/21',
5085 'AL': '46.99.0.0/16',
5086 'AM': '46.70.0.0/15',
5087 'AO': '105.168.0.0/13',
5088 'AP': '182.50.184.0/21',
5089 'AQ': '23.154.160.0/24',
5090 'AR': '181.0.0.0/12',
5091 'AS': '202.70.112.0/20',
5092 'AT': '77.116.0.0/14',
5093 'AU': '1.128.0.0/11',
5094 'AW': '181.41.0.0/18',
5095 'AX': '185.217.4.0/22',
5096 'AZ': '5.197.0.0/16',
5097 'BA': '31.176.128.0/17',
5098 'BB': '65.48.128.0/17',
5099 'BD': '114.130.0.0/16',
5100 'BE': '57.0.0.0/8',
5101 'BF': '102.178.0.0/15',
5102 'BG': '95.42.0.0/15',
5103 'BH': '37.131.0.0/17',
5104 'BI': '154.117.192.0/18',
5105 'BJ': '137.255.0.0/16',
5106 'BL': '185.212.72.0/23',
5107 'BM': '196.12.64.0/18',
5108 'BN': '156.31.0.0/16',
5109 'BO': '161.56.0.0/16',
5110 'BQ': '161.0.80.0/20',
5111 'BR': '191.128.0.0/12',
5112 'BS': '24.51.64.0/18',
5113 'BT': '119.2.96.0/19',
5114 'BW': '168.167.0.0/16',
5115 'BY': '178.120.0.0/13',
5116 'BZ': '179.42.192.0/18',
5117 'CA': '99.224.0.0/11',
5118 'CD': '41.243.0.0/16',
5119 'CF': '197.242.176.0/21',
5120 'CG': '160.113.0.0/16',
5121 'CH': '85.0.0.0/13',
5122 'CI': '102.136.0.0/14',
5123 'CK': '202.65.32.0/19',
5124 'CL': '152.172.0.0/14',
5125 'CM': '102.244.0.0/14',
5126 'CN': '36.128.0.0/10',
5127 'CO': '181.240.0.0/12',
5128 'CR': '201.192.0.0/12',
5129 'CU': '152.206.0.0/15',
5130 'CV': '165.90.96.0/19',
5131 'CW': '190.88.128.0/17',
5132 'CY': '31.153.0.0/16',
5133 'CZ': '88.100.0.0/14',
5134 'DE': '53.0.0.0/8',
5135 'DJ': '197.241.0.0/17',
5136 'DK': '87.48.0.0/12',
5137 'DM': '192.243.48.0/20',
5138 'DO': '152.166.0.0/15',
5139 'DZ': '41.96.0.0/12',
5140 'EC': '186.68.0.0/15',
5141 'EE': '90.190.0.0/15',
5142 'EG': '156.160.0.0/11',
5143 'ER': '196.200.96.0/20',
5144 'ES': '88.0.0.0/11',
5145 'ET': '196.188.0.0/14',
5146 'EU': '2.16.0.0/13',
5147 'FI': '91.152.0.0/13',
5148 'FJ': '144.120.0.0/16',
5149 'FK': '80.73.208.0/21',
5150 'FM': '119.252.112.0/20',
5151 'FO': '88.85.32.0/19',
5152 'FR': '90.0.0.0/9',
5153 'GA': '41.158.0.0/15',
5154 'GB': '25.0.0.0/8',
5155 'GD': '74.122.88.0/21',
5156 'GE': '31.146.0.0/16',
5157 'GF': '161.22.64.0/18',
5158 'GG': '62.68.160.0/19',
5159 'GH': '154.160.0.0/12',
5160 'GI': '95.164.0.0/16',
5161 'GL': '88.83.0.0/19',
5162 'GM': '160.182.0.0/15',
5163 'GN': '197.149.192.0/18',
5164 'GP': '104.250.0.0/19',
5165 'GQ': '105.235.224.0/20',
5166 'GR': '94.64.0.0/13',
5167 'GT': '168.234.0.0/16',
5168 'GU': '168.123.0.0/16',
5169 'GW': '197.214.80.0/20',
5170 'GY': '181.41.64.0/18',
5171 'HK': '113.252.0.0/14',
5172 'HN': '181.210.0.0/16',
5173 'HR': '93.136.0.0/13',
5174 'HT': '148.102.128.0/17',
5175 'HU': '84.0.0.0/14',
5176 'ID': '39.192.0.0/10',
5177 'IE': '87.32.0.0/12',
5178 'IL': '79.176.0.0/13',
5179 'IM': '5.62.80.0/20',
5180 'IN': '117.192.0.0/10',
5181 'IO': '203.83.48.0/21',
5182 'IQ': '37.236.0.0/14',
5183 'IR': '2.176.0.0/12',
5184 'IS': '82.221.0.0/16',
5185 'IT': '79.0.0.0/10',
5186 'JE': '87.244.64.0/18',
5187 'JM': '72.27.0.0/17',
5188 'JO': '176.29.0.0/16',
5189 'JP': '133.0.0.0/8',
5190 'KE': '105.48.0.0/12',
5191 'KG': '158.181.128.0/17',
5192 'KH': '36.37.128.0/17',
5193 'KI': '103.25.140.0/22',
5194 'KM': '197.255.224.0/20',
5195 'KN': '198.167.192.0/19',
5196 'KP': '175.45.176.0/22',
5197 'KR': '175.192.0.0/10',
5198 'KW': '37.36.0.0/14',
5199 'KY': '64.96.0.0/15',
5200 'KZ': '2.72.0.0/13',
5201 'LA': '115.84.64.0/18',
5202 'LB': '178.135.0.0/16',
5203 'LC': '24.92.144.0/20',
5204 'LI': '82.117.0.0/19',
5205 'LK': '112.134.0.0/15',
5206 'LR': '102.183.0.0/16',
5207 'LS': '129.232.0.0/17',
5208 'LT': '78.56.0.0/13',
5209 'LU': '188.42.0.0/16',
5210 'LV': '46.109.0.0/16',
5211 'LY': '41.252.0.0/14',
5212 'MA': '105.128.0.0/11',
5213 'MC': '88.209.64.0/18',
5214 'MD': '37.246.0.0/16',
5215 'ME': '178.175.0.0/17',
5216 'MF': '74.112.232.0/21',
5217 'MG': '154.126.0.0/17',
5218 'MH': '117.103.88.0/21',
5219 'MK': '77.28.0.0/15',
5220 'ML': '154.118.128.0/18',
5221 'MM': '37.111.0.0/17',
5222 'MN': '49.0.128.0/17',
5223 'MO': '60.246.0.0/16',
5224 'MP': '202.88.64.0/20',
5225 'MQ': '109.203.224.0/19',
5226 'MR': '41.188.64.0/18',
5227 'MS': '208.90.112.0/22',
5228 'MT': '46.11.0.0/16',
5229 'MU': '105.16.0.0/12',
5230 'MV': '27.114.128.0/18',
5231 'MW': '102.70.0.0/15',
5232 'MX': '187.192.0.0/11',
5233 'MY': '175.136.0.0/13',
5234 'MZ': '197.218.0.0/15',
5235 'NA': '41.182.0.0/16',
5236 'NC': '101.101.0.0/18',
5237 'NE': '197.214.0.0/18',
5238 'NF': '203.17.240.0/22',
5239 'NG': '105.112.0.0/12',
5240 'NI': '186.76.0.0/15',
5241 'NL': '145.96.0.0/11',
5242 'NO': '84.208.0.0/13',
5243 'NP': '36.252.0.0/15',
5244 'NR': '203.98.224.0/19',
5245 'NU': '49.156.48.0/22',
5246 'NZ': '49.224.0.0/14',
5247 'OM': '5.36.0.0/15',
5248 'PA': '186.72.0.0/15',
5249 'PE': '186.160.0.0/14',
5250 'PF': '123.50.64.0/18',
5251 'PG': '124.240.192.0/19',
5252 'PH': '49.144.0.0/13',
5253 'PK': '39.32.0.0/11',
5254 'PL': '83.0.0.0/11',
5255 'PM': '70.36.0.0/20',
5256 'PR': '66.50.0.0/16',
5257 'PS': '188.161.0.0/16',
5258 'PT': '85.240.0.0/13',
5259 'PW': '202.124.224.0/20',
5260 'PY': '181.120.0.0/14',
5261 'QA': '37.210.0.0/15',
5262 'RE': '102.35.0.0/16',
5263 'RO': '79.112.0.0/13',
5264 'RS': '93.86.0.0/15',
5265 'RU': '5.136.0.0/13',
5266 'RW': '41.186.0.0/16',
5267 'SA': '188.48.0.0/13',
5268 'SB': '202.1.160.0/19',
5269 'SC': '154.192.0.0/11',
5270 'SD': '102.120.0.0/13',
5271 'SE': '78.64.0.0/12',
5272 'SG': '8.128.0.0/10',
5273 'SI': '188.196.0.0/14',
5274 'SK': '78.98.0.0/15',
5275 'SL': '102.143.0.0/17',
5276 'SM': '89.186.32.0/19',
5277 'SN': '41.82.0.0/15',
5278 'SO': '154.115.192.0/18',
5279 'SR': '186.179.128.0/17',
5280 'SS': '105.235.208.0/21',
5281 'ST': '197.159.160.0/19',
5282 'SV': '168.243.0.0/16',
5283 'SX': '190.102.0.0/20',
5284 'SY': '5.0.0.0/16',
5285 'SZ': '41.84.224.0/19',
5286 'TC': '65.255.48.0/20',
5287 'TD': '154.68.128.0/19',
5288 'TG': '196.168.0.0/14',
5289 'TH': '171.96.0.0/13',
5290 'TJ': '85.9.128.0/18',
5291 'TK': '27.96.24.0/21',
5292 'TL': '180.189.160.0/20',
5293 'TM': '95.85.96.0/19',
5294 'TN': '197.0.0.0/11',
5295 'TO': '175.176.144.0/21',
5296 'TR': '78.160.0.0/11',
5297 'TT': '186.44.0.0/15',
5298 'TV': '202.2.96.0/19',
5299 'TW': '120.96.0.0/11',
5300 'TZ': '156.156.0.0/14',
5301 'UA': '37.52.0.0/14',
5302 'UG': '102.80.0.0/13',
5303 'US': '6.0.0.0/8',
5304 'UY': '167.56.0.0/13',
5305 'UZ': '84.54.64.0/18',
5306 'VA': '212.77.0.0/19',
5307 'VC': '207.191.240.0/21',
5308 'VE': '186.88.0.0/13',
5309 'VG': '66.81.192.0/20',
5310 'VI': '146.226.0.0/16',
5311 'VN': '14.160.0.0/11',
5312 'VU': '202.80.32.0/20',
5313 'WF': '117.20.32.0/21',
5314 'WS': '202.4.32.0/19',
5315 'YE': '134.35.0.0/16',
5316 'YT': '41.242.116.0/22',
5317 'ZA': '41.0.0.0/11',
5318 'ZM': '102.144.0.0/13',
5319 'ZW': '102.177.192.0/18',
5320 }
5321
5322 @classmethod
5323 def random_ipv4(cls, code_or_block):
5324 if len(code_or_block) == 2:
5325 block = cls._country_ip_map.get(code_or_block.upper())
5326 if not block:
5327 return None
5328 else:
5329 block = code_or_block
5330 addr, preflen = block.split('/')
5331 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5332 addr_max = addr_min | (0xffffffff >> int(preflen))
5333 return compat_str(socket.inet_ntoa(
5334 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5335
5336
5337 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5338 def __init__(self, proxies=None):
5339 # Set default handlers
5340 for type in ('http', 'https'):
5341 setattr(self, '%s_open' % type,
5342 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5343 meth(r, proxy, type))
5344 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5345
5346 def proxy_open(self, req, proxy, type):
5347 req_proxy = req.headers.get('Ytdl-request-proxy')
5348 if req_proxy is not None:
5349 proxy = req_proxy
5350 del req.headers['Ytdl-request-proxy']
5351
5352 if proxy == '__noproxy__':
5353 return None # No Proxy
5354 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5355 req.add_header('Ytdl-socks-proxy', proxy)
5356 # youtube-dl's http/https handlers do wrapping the socket with socks
5357 return None
5358 return compat_urllib_request.ProxyHandler.proxy_open(
5359 self, req, proxy, type)
5360
5361
5362 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5363 # released into Public Domain
5364 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5365
5366 def long_to_bytes(n, blocksize=0):
5367 """long_to_bytes(n:long, blocksize:int) : string
5368 Convert a long integer to a byte string.
5369
5370 If optional blocksize is given and greater than zero, pad the front of the
5371 byte string with binary zeros so that the length is a multiple of
5372 blocksize.
5373 """
5374 # after much testing, this algorithm was deemed to be the fastest
5375 s = b''
5376 n = int(n)
5377 while n > 0:
5378 s = compat_struct_pack('>I', n & 0xffffffff) + s
5379 n = n >> 32
5380 # strip off leading zeros
5381 for i in range(len(s)):
5382 if s[i] != b'\000'[0]:
5383 break
5384 else:
5385 # only happens when n == 0
5386 s = b'\000'
5387 i = 0
5388 s = s[i:]
5389 # add back some pad bytes. this could be done more efficiently w.r.t. the
5390 # de-padding being done above, but sigh...
5391 if blocksize > 0 and len(s) % blocksize:
5392 s = (blocksize - len(s) % blocksize) * b'\000' + s
5393 return s
5394
5395
5396 def bytes_to_long(s):
5397 """bytes_to_long(string) : long
5398 Convert a byte string to a long integer.
5399
5400 This is (essentially) the inverse of long_to_bytes().
5401 """
5402 acc = 0
5403 length = len(s)
5404 if length % 4:
5405 extra = (4 - length % 4)
5406 s = b'\000' * extra + s
5407 length = length + extra
5408 for i in range(0, length, 4):
5409 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5410 return acc
5411
5412
5413 def ohdave_rsa_encrypt(data, exponent, modulus):
5414 '''
5415 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5416
5417 Input:
5418 data: data to encrypt, bytes-like object
5419 exponent, modulus: parameter e and N of RSA algorithm, both integer
5420 Output: hex string of encrypted data
5421
5422 Limitation: supports one block encryption only
5423 '''
5424
5425 payload = int(binascii.hexlify(data[::-1]), 16)
5426 encrypted = pow(payload, exponent, modulus)
5427 return '%x' % encrypted
5428
5429
5430 def pkcs1pad(data, length):
5431 """
5432 Padding input data with PKCS#1 scheme
5433
5434 @param {int[]} data input data
5435 @param {int} length target length
5436 @returns {int[]} padded data
5437 """
5438 if len(data) > length - 11:
5439 raise ValueError('Input data too long for PKCS#1 padding')
5440
5441 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5442 return [0, 2] + pseudo_random + [0] + data
5443
5444
5445 def encode_base_n(num, n, table=None):
5446 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5447 if not table:
5448 table = FULL_TABLE[:n]
5449
5450 if n > len(table):
5451 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5452
5453 if num == 0:
5454 return table[0]
5455
5456 ret = ''
5457 while num:
5458 ret = table[num % n] + ret
5459 num = num // n
5460 return ret
5461
5462
5463 def decode_packed_codes(code):
5464 mobj = re.search(PACKED_CODES_RE, code)
5465 obfucasted_code, base, count, symbols = mobj.groups()
5466 base = int(base)
5467 count = int(count)
5468 symbols = symbols.split('|')
5469 symbol_table = {}
5470
5471 while count:
5472 count -= 1
5473 base_n_count = encode_base_n(count, base)
5474 symbol_table[base_n_count] = symbols[count] or base_n_count
5475
5476 return re.sub(
5477 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5478 obfucasted_code)
5479
5480
5481 def caesar(s, alphabet, shift):
5482 if shift == 0:
5483 return s
5484 l = len(alphabet)
5485 return ''.join(
5486 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5487 for c in s)
5488
5489
5490 def rot47(s):
5491 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5492
5493
5494 def parse_m3u8_attributes(attrib):
5495 info = {}
5496 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5497 if val.startswith('"'):
5498 val = val[1:-1]
5499 info[key] = val
5500 return info
5501
5502
5503 def urshift(val, n):
5504 return val >> n if val >= 0 else (val + 0x100000000) >> n
5505
5506
5507 # Based on png2str() written by @gdkchan and improved by @yokrysty
5508 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5509 def decode_png(png_data):
5510 # Reference: https://www.w3.org/TR/PNG/
5511 header = png_data[8:]
5512
5513 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5514 raise IOError('Not a valid PNG file.')
5515
5516 int_map = {1: '>B', 2: '>H', 4: '>I'}
5517 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5518
5519 chunks = []
5520
5521 while header:
5522 length = unpack_integer(header[:4])
5523 header = header[4:]
5524
5525 chunk_type = header[:4]
5526 header = header[4:]
5527
5528 chunk_data = header[:length]
5529 header = header[length:]
5530
5531 header = header[4:] # Skip CRC
5532
5533 chunks.append({
5534 'type': chunk_type,
5535 'length': length,
5536 'data': chunk_data
5537 })
5538
5539 ihdr = chunks[0]['data']
5540
5541 width = unpack_integer(ihdr[:4])
5542 height = unpack_integer(ihdr[4:8])
5543
5544 idat = b''
5545
5546 for chunk in chunks:
5547 if chunk['type'] == b'IDAT':
5548 idat += chunk['data']
5549
5550 if not idat:
5551 raise IOError('Unable to read PNG data.')
5552
5553 decompressed_data = bytearray(zlib.decompress(idat))
5554
5555 stride = width * 3
5556 pixels = []
5557
5558 def _get_pixel(idx):
5559 x = idx % stride
5560 y = idx // stride
5561 return pixels[y][x]
5562
5563 for y in range(height):
5564 basePos = y * (1 + stride)
5565 filter_type = decompressed_data[basePos]
5566
5567 current_row = []
5568
5569 pixels.append(current_row)
5570
5571 for x in range(stride):
5572 color = decompressed_data[1 + basePos + x]
5573 basex = y * stride + x
5574 left = 0
5575 up = 0
5576
5577 if x > 2:
5578 left = _get_pixel(basex - 3)
5579 if y > 0:
5580 up = _get_pixel(basex - stride)
5581
5582 if filter_type == 1: # Sub
5583 color = (color + left) & 0xff
5584 elif filter_type == 2: # Up
5585 color = (color + up) & 0xff
5586 elif filter_type == 3: # Average
5587 color = (color + ((left + up) >> 1)) & 0xff
5588 elif filter_type == 4: # Paeth
5589 a = left
5590 b = up
5591 c = 0
5592
5593 if x > 2 and y > 0:
5594 c = _get_pixel(basex - stride - 3)
5595
5596 p = a + b - c
5597
5598 pa = abs(p - a)
5599 pb = abs(p - b)
5600 pc = abs(p - c)
5601
5602 if pa <= pb and pa <= pc:
5603 color = (color + a) & 0xff
5604 elif pb <= pc:
5605 color = (color + b) & 0xff
5606 else:
5607 color = (color + c) & 0xff
5608
5609 current_row.append(color)
5610
5611 return width, height, pixels
5612
5613
5614 def write_xattr(path, key, value):
5615 # This mess below finds the best xattr tool for the job
5616 try:
5617 # try the pyxattr module...
5618 import xattr
5619
5620 if hasattr(xattr, 'set'): # pyxattr
5621 # Unicode arguments are not supported in python-pyxattr until
5622 # version 0.5.0
5623 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5624 pyxattr_required_version = '0.5.0'
5625 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5626 # TODO: fallback to CLI tools
5627 raise XAttrUnavailableError(
5628 'python-pyxattr is detected but is too old. '
5629 'youtube-dl requires %s or above while your version is %s. '
5630 'Falling back to other xattr implementations' % (
5631 pyxattr_required_version, xattr.__version__))
5632
5633 setxattr = xattr.set
5634 else: # xattr
5635 setxattr = xattr.setxattr
5636
5637 try:
5638 setxattr(path, key, value)
5639 except EnvironmentError as e:
5640 raise XAttrMetadataError(e.errno, e.strerror)
5641
5642 except ImportError:
5643 if compat_os_name == 'nt':
5644 # Write xattrs to NTFS Alternate Data Streams:
5645 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5646 assert ':' not in key
5647 assert os.path.exists(path)
5648
5649 ads_fn = path + ':' + key
5650 try:
5651 with open(ads_fn, 'wb') as f:
5652 f.write(value)
5653 except EnvironmentError as e:
5654 raise XAttrMetadataError(e.errno, e.strerror)
5655 else:
5656 user_has_setfattr = check_executable('setfattr', ['--version'])
5657 user_has_xattr = check_executable('xattr', ['-h'])
5658
5659 if user_has_setfattr or user_has_xattr:
5660
5661 value = value.decode('utf-8')
5662 if user_has_setfattr:
5663 executable = 'setfattr'
5664 opts = ['-n', key, '-v', value]
5665 elif user_has_xattr:
5666 executable = 'xattr'
5667 opts = ['-w', key, value]
5668
5669 cmd = ([encodeFilename(executable, True)]
5670 + [encodeArgument(o) for o in opts]
5671 + [encodeFilename(path, True)])
5672
5673 try:
5674 p = subprocess.Popen(
5675 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5676 except EnvironmentError as e:
5677 raise XAttrMetadataError(e.errno, e.strerror)
5678 stdout, stderr = p.communicate()
5679 stderr = stderr.decode('utf-8', 'replace')
5680 if p.returncode != 0:
5681 raise XAttrMetadataError(p.returncode, stderr)
5682
5683 else:
5684 # On Unix, and can't find pyxattr, setfattr, or xattr.
5685 if sys.platform.startswith('linux'):
5686 raise XAttrUnavailableError(
5687 "Couldn't find a tool to set the xattrs. "
5688 "Install either the python 'pyxattr' or 'xattr' "
5689 "modules, or the GNU 'attr' package "
5690 "(which contains the 'setfattr' tool).")
5691 else:
5692 raise XAttrUnavailableError(
5693 "Couldn't find a tool to set the xattrs. "
5694 "Install either the python 'xattr' module, "
5695 "or the 'xattr' binary.")
5696
5697
5698 def random_birthday(year_field, month_field, day_field):
5699 start_date = datetime.date(1950, 1, 1)
5700 end_date = datetime.date(1995, 12, 31)
5701 offset = random.randint(0, (end_date - start_date).days)
5702 random_date = start_date + datetime.timedelta(offset)
5703 return {
5704 year_field: str(random_date.year),
5705 month_field: str(random_date.month),
5706 day_field: str(random_date.day),
5707 }