]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/utils.py
New upstream version 2020.06.16
[youtubedl] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import io
20 import itertools
21 import json
22 import locale
23 import math
24 import operator
25 import os
26 import platform
27 import random
28 import re
29 import socket
30 import ssl
31 import subprocess
32 import sys
33 import tempfile
34 import time
35 import traceback
36 import xml.etree.ElementTree
37 import zlib
38
39 from .compat import (
40 compat_HTMLParseError,
41 compat_HTMLParser,
42 compat_basestring,
43 compat_chr,
44 compat_cookiejar,
45 compat_ctypes_WINFUNCTYPE,
46 compat_etree_fromstring,
47 compat_expanduser,
48 compat_html_entities,
49 compat_html_entities_html5,
50 compat_http_client,
51 compat_integer_types,
52 compat_kwargs,
53 compat_os_name,
54 compat_parse_qs,
55 compat_shlex_quote,
56 compat_str,
57 compat_struct_pack,
58 compat_struct_unpack,
59 compat_urllib_error,
60 compat_urllib_parse,
61 compat_urllib_parse_urlencode,
62 compat_urllib_parse_urlparse,
63 compat_urllib_parse_unquote_plus,
64 compat_urllib_request,
65 compat_urlparse,
66 compat_xpath,
67 )
68
69 from .socks import (
70 ProxyType,
71 sockssocket,
72 )
73
74
75 def register_socks_protocols():
76 # "Register" SOCKS protocols
77 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
78 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
79 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
80 if scheme not in compat_urlparse.uses_netloc:
81 compat_urlparse.uses_netloc.append(scheme)
82
83
84 # This is not clearly defined otherwise
85 compiled_regex_type = type(re.compile(''))
86
87
88 def random_user_agent():
89 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
90 _CHROME_VERSIONS = (
91 '74.0.3729.129',
92 '76.0.3780.3',
93 '76.0.3780.2',
94 '74.0.3729.128',
95 '76.0.3780.1',
96 '76.0.3780.0',
97 '75.0.3770.15',
98 '74.0.3729.127',
99 '74.0.3729.126',
100 '76.0.3779.1',
101 '76.0.3779.0',
102 '75.0.3770.14',
103 '74.0.3729.125',
104 '76.0.3778.1',
105 '76.0.3778.0',
106 '75.0.3770.13',
107 '74.0.3729.124',
108 '74.0.3729.123',
109 '73.0.3683.121',
110 '76.0.3777.1',
111 '76.0.3777.0',
112 '75.0.3770.12',
113 '74.0.3729.122',
114 '76.0.3776.4',
115 '75.0.3770.11',
116 '74.0.3729.121',
117 '76.0.3776.3',
118 '76.0.3776.2',
119 '73.0.3683.120',
120 '74.0.3729.120',
121 '74.0.3729.119',
122 '74.0.3729.118',
123 '76.0.3776.1',
124 '76.0.3776.0',
125 '76.0.3775.5',
126 '75.0.3770.10',
127 '74.0.3729.117',
128 '76.0.3775.4',
129 '76.0.3775.3',
130 '74.0.3729.116',
131 '75.0.3770.9',
132 '76.0.3775.2',
133 '76.0.3775.1',
134 '76.0.3775.0',
135 '75.0.3770.8',
136 '74.0.3729.115',
137 '74.0.3729.114',
138 '76.0.3774.1',
139 '76.0.3774.0',
140 '75.0.3770.7',
141 '74.0.3729.113',
142 '74.0.3729.112',
143 '74.0.3729.111',
144 '76.0.3773.1',
145 '76.0.3773.0',
146 '75.0.3770.6',
147 '74.0.3729.110',
148 '74.0.3729.109',
149 '76.0.3772.1',
150 '76.0.3772.0',
151 '75.0.3770.5',
152 '74.0.3729.108',
153 '74.0.3729.107',
154 '76.0.3771.1',
155 '76.0.3771.0',
156 '75.0.3770.4',
157 '74.0.3729.106',
158 '74.0.3729.105',
159 '75.0.3770.3',
160 '74.0.3729.104',
161 '74.0.3729.103',
162 '74.0.3729.102',
163 '75.0.3770.2',
164 '74.0.3729.101',
165 '75.0.3770.1',
166 '75.0.3770.0',
167 '74.0.3729.100',
168 '75.0.3769.5',
169 '75.0.3769.4',
170 '74.0.3729.99',
171 '75.0.3769.3',
172 '75.0.3769.2',
173 '75.0.3768.6',
174 '74.0.3729.98',
175 '75.0.3769.1',
176 '75.0.3769.0',
177 '74.0.3729.97',
178 '73.0.3683.119',
179 '73.0.3683.118',
180 '74.0.3729.96',
181 '75.0.3768.5',
182 '75.0.3768.4',
183 '75.0.3768.3',
184 '75.0.3768.2',
185 '74.0.3729.95',
186 '74.0.3729.94',
187 '75.0.3768.1',
188 '75.0.3768.0',
189 '74.0.3729.93',
190 '74.0.3729.92',
191 '73.0.3683.117',
192 '74.0.3729.91',
193 '75.0.3766.3',
194 '74.0.3729.90',
195 '75.0.3767.2',
196 '75.0.3767.1',
197 '75.0.3767.0',
198 '74.0.3729.89',
199 '73.0.3683.116',
200 '75.0.3766.2',
201 '74.0.3729.88',
202 '75.0.3766.1',
203 '75.0.3766.0',
204 '74.0.3729.87',
205 '73.0.3683.115',
206 '74.0.3729.86',
207 '75.0.3765.1',
208 '75.0.3765.0',
209 '74.0.3729.85',
210 '73.0.3683.114',
211 '74.0.3729.84',
212 '75.0.3764.1',
213 '75.0.3764.0',
214 '74.0.3729.83',
215 '73.0.3683.113',
216 '75.0.3763.2',
217 '75.0.3761.4',
218 '74.0.3729.82',
219 '75.0.3763.1',
220 '75.0.3763.0',
221 '74.0.3729.81',
222 '73.0.3683.112',
223 '75.0.3762.1',
224 '75.0.3762.0',
225 '74.0.3729.80',
226 '75.0.3761.3',
227 '74.0.3729.79',
228 '73.0.3683.111',
229 '75.0.3761.2',
230 '74.0.3729.78',
231 '74.0.3729.77',
232 '75.0.3761.1',
233 '75.0.3761.0',
234 '73.0.3683.110',
235 '74.0.3729.76',
236 '74.0.3729.75',
237 '75.0.3760.0',
238 '74.0.3729.74',
239 '75.0.3759.8',
240 '75.0.3759.7',
241 '75.0.3759.6',
242 '74.0.3729.73',
243 '75.0.3759.5',
244 '74.0.3729.72',
245 '73.0.3683.109',
246 '75.0.3759.4',
247 '75.0.3759.3',
248 '74.0.3729.71',
249 '75.0.3759.2',
250 '74.0.3729.70',
251 '73.0.3683.108',
252 '74.0.3729.69',
253 '75.0.3759.1',
254 '75.0.3759.0',
255 '74.0.3729.68',
256 '73.0.3683.107',
257 '74.0.3729.67',
258 '75.0.3758.1',
259 '75.0.3758.0',
260 '74.0.3729.66',
261 '73.0.3683.106',
262 '74.0.3729.65',
263 '75.0.3757.1',
264 '75.0.3757.0',
265 '74.0.3729.64',
266 '73.0.3683.105',
267 '74.0.3729.63',
268 '75.0.3756.1',
269 '75.0.3756.0',
270 '74.0.3729.62',
271 '73.0.3683.104',
272 '75.0.3755.3',
273 '75.0.3755.2',
274 '73.0.3683.103',
275 '75.0.3755.1',
276 '75.0.3755.0',
277 '74.0.3729.61',
278 '73.0.3683.102',
279 '74.0.3729.60',
280 '75.0.3754.2',
281 '74.0.3729.59',
282 '75.0.3753.4',
283 '74.0.3729.58',
284 '75.0.3754.1',
285 '75.0.3754.0',
286 '74.0.3729.57',
287 '73.0.3683.101',
288 '75.0.3753.3',
289 '75.0.3752.2',
290 '75.0.3753.2',
291 '74.0.3729.56',
292 '75.0.3753.1',
293 '75.0.3753.0',
294 '74.0.3729.55',
295 '73.0.3683.100',
296 '74.0.3729.54',
297 '75.0.3752.1',
298 '75.0.3752.0',
299 '74.0.3729.53',
300 '73.0.3683.99',
301 '74.0.3729.52',
302 '75.0.3751.1',
303 '75.0.3751.0',
304 '74.0.3729.51',
305 '73.0.3683.98',
306 '74.0.3729.50',
307 '75.0.3750.0',
308 '74.0.3729.49',
309 '74.0.3729.48',
310 '74.0.3729.47',
311 '75.0.3749.3',
312 '74.0.3729.46',
313 '73.0.3683.97',
314 '75.0.3749.2',
315 '74.0.3729.45',
316 '75.0.3749.1',
317 '75.0.3749.0',
318 '74.0.3729.44',
319 '73.0.3683.96',
320 '74.0.3729.43',
321 '74.0.3729.42',
322 '75.0.3748.1',
323 '75.0.3748.0',
324 '74.0.3729.41',
325 '75.0.3747.1',
326 '73.0.3683.95',
327 '75.0.3746.4',
328 '74.0.3729.40',
329 '74.0.3729.39',
330 '75.0.3747.0',
331 '75.0.3746.3',
332 '75.0.3746.2',
333 '74.0.3729.38',
334 '75.0.3746.1',
335 '75.0.3746.0',
336 '74.0.3729.37',
337 '73.0.3683.94',
338 '75.0.3745.5',
339 '75.0.3745.4',
340 '75.0.3745.3',
341 '75.0.3745.2',
342 '74.0.3729.36',
343 '75.0.3745.1',
344 '75.0.3745.0',
345 '75.0.3744.2',
346 '74.0.3729.35',
347 '73.0.3683.93',
348 '74.0.3729.34',
349 '75.0.3744.1',
350 '75.0.3744.0',
351 '74.0.3729.33',
352 '73.0.3683.92',
353 '74.0.3729.32',
354 '74.0.3729.31',
355 '73.0.3683.91',
356 '75.0.3741.2',
357 '75.0.3740.5',
358 '74.0.3729.30',
359 '75.0.3741.1',
360 '75.0.3741.0',
361 '74.0.3729.29',
362 '75.0.3740.4',
363 '73.0.3683.90',
364 '74.0.3729.28',
365 '75.0.3740.3',
366 '73.0.3683.89',
367 '75.0.3740.2',
368 '74.0.3729.27',
369 '75.0.3740.1',
370 '75.0.3740.0',
371 '74.0.3729.26',
372 '73.0.3683.88',
373 '73.0.3683.87',
374 '74.0.3729.25',
375 '75.0.3739.1',
376 '75.0.3739.0',
377 '73.0.3683.86',
378 '74.0.3729.24',
379 '73.0.3683.85',
380 '75.0.3738.4',
381 '75.0.3738.3',
382 '75.0.3738.2',
383 '75.0.3738.1',
384 '75.0.3738.0',
385 '74.0.3729.23',
386 '73.0.3683.84',
387 '74.0.3729.22',
388 '74.0.3729.21',
389 '75.0.3737.1',
390 '75.0.3737.0',
391 '74.0.3729.20',
392 '73.0.3683.83',
393 '74.0.3729.19',
394 '75.0.3736.1',
395 '75.0.3736.0',
396 '74.0.3729.18',
397 '73.0.3683.82',
398 '74.0.3729.17',
399 '75.0.3735.1',
400 '75.0.3735.0',
401 '74.0.3729.16',
402 '73.0.3683.81',
403 '75.0.3734.1',
404 '75.0.3734.0',
405 '74.0.3729.15',
406 '73.0.3683.80',
407 '74.0.3729.14',
408 '75.0.3733.1',
409 '75.0.3733.0',
410 '75.0.3732.1',
411 '74.0.3729.13',
412 '74.0.3729.12',
413 '73.0.3683.79',
414 '74.0.3729.11',
415 '75.0.3732.0',
416 '74.0.3729.10',
417 '73.0.3683.78',
418 '74.0.3729.9',
419 '74.0.3729.8',
420 '74.0.3729.7',
421 '75.0.3731.3',
422 '75.0.3731.2',
423 '75.0.3731.0',
424 '74.0.3729.6',
425 '73.0.3683.77',
426 '73.0.3683.76',
427 '75.0.3730.5',
428 '75.0.3730.4',
429 '73.0.3683.75',
430 '74.0.3729.5',
431 '73.0.3683.74',
432 '75.0.3730.3',
433 '75.0.3730.2',
434 '74.0.3729.4',
435 '73.0.3683.73',
436 '73.0.3683.72',
437 '75.0.3730.1',
438 '75.0.3730.0',
439 '74.0.3729.3',
440 '73.0.3683.71',
441 '74.0.3729.2',
442 '73.0.3683.70',
443 '74.0.3729.1',
444 '74.0.3729.0',
445 '74.0.3726.4',
446 '73.0.3683.69',
447 '74.0.3726.3',
448 '74.0.3728.0',
449 '74.0.3726.2',
450 '73.0.3683.68',
451 '74.0.3726.1',
452 '74.0.3726.0',
453 '74.0.3725.4',
454 '73.0.3683.67',
455 '73.0.3683.66',
456 '74.0.3725.3',
457 '74.0.3725.2',
458 '74.0.3725.1',
459 '74.0.3724.8',
460 '74.0.3725.0',
461 '73.0.3683.65',
462 '74.0.3724.7',
463 '74.0.3724.6',
464 '74.0.3724.5',
465 '74.0.3724.4',
466 '74.0.3724.3',
467 '74.0.3724.2',
468 '74.0.3724.1',
469 '74.0.3724.0',
470 '73.0.3683.64',
471 '74.0.3723.1',
472 '74.0.3723.0',
473 '73.0.3683.63',
474 '74.0.3722.1',
475 '74.0.3722.0',
476 '73.0.3683.62',
477 '74.0.3718.9',
478 '74.0.3702.3',
479 '74.0.3721.3',
480 '74.0.3721.2',
481 '74.0.3721.1',
482 '74.0.3721.0',
483 '74.0.3720.6',
484 '73.0.3683.61',
485 '72.0.3626.122',
486 '73.0.3683.60',
487 '74.0.3720.5',
488 '72.0.3626.121',
489 '74.0.3718.8',
490 '74.0.3720.4',
491 '74.0.3720.3',
492 '74.0.3718.7',
493 '74.0.3720.2',
494 '74.0.3720.1',
495 '74.0.3720.0',
496 '74.0.3718.6',
497 '74.0.3719.5',
498 '73.0.3683.59',
499 '74.0.3718.5',
500 '74.0.3718.4',
501 '74.0.3719.4',
502 '74.0.3719.3',
503 '74.0.3719.2',
504 '74.0.3719.1',
505 '73.0.3683.58',
506 '74.0.3719.0',
507 '73.0.3683.57',
508 '73.0.3683.56',
509 '74.0.3718.3',
510 '73.0.3683.55',
511 '74.0.3718.2',
512 '74.0.3718.1',
513 '74.0.3718.0',
514 '73.0.3683.54',
515 '74.0.3717.2',
516 '73.0.3683.53',
517 '74.0.3717.1',
518 '74.0.3717.0',
519 '73.0.3683.52',
520 '74.0.3716.1',
521 '74.0.3716.0',
522 '73.0.3683.51',
523 '74.0.3715.1',
524 '74.0.3715.0',
525 '73.0.3683.50',
526 '74.0.3711.2',
527 '74.0.3714.2',
528 '74.0.3713.3',
529 '74.0.3714.1',
530 '74.0.3714.0',
531 '73.0.3683.49',
532 '74.0.3713.1',
533 '74.0.3713.0',
534 '72.0.3626.120',
535 '73.0.3683.48',
536 '74.0.3712.2',
537 '74.0.3712.1',
538 '74.0.3712.0',
539 '73.0.3683.47',
540 '72.0.3626.119',
541 '73.0.3683.46',
542 '74.0.3710.2',
543 '72.0.3626.118',
544 '74.0.3711.1',
545 '74.0.3711.0',
546 '73.0.3683.45',
547 '72.0.3626.117',
548 '74.0.3710.1',
549 '74.0.3710.0',
550 '73.0.3683.44',
551 '72.0.3626.116',
552 '74.0.3709.1',
553 '74.0.3709.0',
554 '74.0.3704.9',
555 '73.0.3683.43',
556 '72.0.3626.115',
557 '74.0.3704.8',
558 '74.0.3704.7',
559 '74.0.3708.0',
560 '74.0.3706.7',
561 '74.0.3704.6',
562 '73.0.3683.42',
563 '72.0.3626.114',
564 '74.0.3706.6',
565 '72.0.3626.113',
566 '74.0.3704.5',
567 '74.0.3706.5',
568 '74.0.3706.4',
569 '74.0.3706.3',
570 '74.0.3706.2',
571 '74.0.3706.1',
572 '74.0.3706.0',
573 '73.0.3683.41',
574 '72.0.3626.112',
575 '74.0.3705.1',
576 '74.0.3705.0',
577 '73.0.3683.40',
578 '72.0.3626.111',
579 '73.0.3683.39',
580 '74.0.3704.4',
581 '73.0.3683.38',
582 '74.0.3704.3',
583 '74.0.3704.2',
584 '74.0.3704.1',
585 '74.0.3704.0',
586 '73.0.3683.37',
587 '72.0.3626.110',
588 '72.0.3626.109',
589 '74.0.3703.3',
590 '74.0.3703.2',
591 '73.0.3683.36',
592 '74.0.3703.1',
593 '74.0.3703.0',
594 '73.0.3683.35',
595 '72.0.3626.108',
596 '74.0.3702.2',
597 '74.0.3699.3',
598 '74.0.3702.1',
599 '74.0.3702.0',
600 '73.0.3683.34',
601 '72.0.3626.107',
602 '73.0.3683.33',
603 '74.0.3701.1',
604 '74.0.3701.0',
605 '73.0.3683.32',
606 '73.0.3683.31',
607 '72.0.3626.105',
608 '74.0.3700.1',
609 '74.0.3700.0',
610 '73.0.3683.29',
611 '72.0.3626.103',
612 '74.0.3699.2',
613 '74.0.3699.1',
614 '74.0.3699.0',
615 '73.0.3683.28',
616 '72.0.3626.102',
617 '73.0.3683.27',
618 '73.0.3683.26',
619 '74.0.3698.0',
620 '74.0.3696.2',
621 '72.0.3626.101',
622 '73.0.3683.25',
623 '74.0.3696.1',
624 '74.0.3696.0',
625 '74.0.3694.8',
626 '72.0.3626.100',
627 '74.0.3694.7',
628 '74.0.3694.6',
629 '74.0.3694.5',
630 '74.0.3694.4',
631 '72.0.3626.99',
632 '72.0.3626.98',
633 '74.0.3694.3',
634 '73.0.3683.24',
635 '72.0.3626.97',
636 '72.0.3626.96',
637 '72.0.3626.95',
638 '73.0.3683.23',
639 '72.0.3626.94',
640 '73.0.3683.22',
641 '73.0.3683.21',
642 '72.0.3626.93',
643 '74.0.3694.2',
644 '72.0.3626.92',
645 '74.0.3694.1',
646 '74.0.3694.0',
647 '74.0.3693.6',
648 '73.0.3683.20',
649 '72.0.3626.91',
650 '74.0.3693.5',
651 '74.0.3693.4',
652 '74.0.3693.3',
653 '74.0.3693.2',
654 '73.0.3683.19',
655 '74.0.3693.1',
656 '74.0.3693.0',
657 '73.0.3683.18',
658 '72.0.3626.90',
659 '74.0.3692.1',
660 '74.0.3692.0',
661 '73.0.3683.17',
662 '72.0.3626.89',
663 '74.0.3687.3',
664 '74.0.3691.1',
665 '74.0.3691.0',
666 '73.0.3683.16',
667 '72.0.3626.88',
668 '72.0.3626.87',
669 '73.0.3683.15',
670 '74.0.3690.1',
671 '74.0.3690.0',
672 '73.0.3683.14',
673 '72.0.3626.86',
674 '73.0.3683.13',
675 '73.0.3683.12',
676 '74.0.3689.1',
677 '74.0.3689.0',
678 '73.0.3683.11',
679 '72.0.3626.85',
680 '73.0.3683.10',
681 '72.0.3626.84',
682 '73.0.3683.9',
683 '74.0.3688.1',
684 '74.0.3688.0',
685 '73.0.3683.8',
686 '72.0.3626.83',
687 '74.0.3687.2',
688 '74.0.3687.1',
689 '74.0.3687.0',
690 '73.0.3683.7',
691 '72.0.3626.82',
692 '74.0.3686.4',
693 '72.0.3626.81',
694 '74.0.3686.3',
695 '74.0.3686.2',
696 '74.0.3686.1',
697 '74.0.3686.0',
698 '73.0.3683.6',
699 '72.0.3626.80',
700 '74.0.3685.1',
701 '74.0.3685.0',
702 '73.0.3683.5',
703 '72.0.3626.79',
704 '74.0.3684.1',
705 '74.0.3684.0',
706 '73.0.3683.4',
707 '72.0.3626.78',
708 '72.0.3626.77',
709 '73.0.3683.3',
710 '73.0.3683.2',
711 '72.0.3626.76',
712 '73.0.3683.1',
713 '73.0.3683.0',
714 '72.0.3626.75',
715 '71.0.3578.141',
716 '73.0.3682.1',
717 '73.0.3682.0',
718 '72.0.3626.74',
719 '71.0.3578.140',
720 '73.0.3681.4',
721 '73.0.3681.3',
722 '73.0.3681.2',
723 '73.0.3681.1',
724 '73.0.3681.0',
725 '72.0.3626.73',
726 '71.0.3578.139',
727 '72.0.3626.72',
728 '72.0.3626.71',
729 '73.0.3680.1',
730 '73.0.3680.0',
731 '72.0.3626.70',
732 '71.0.3578.138',
733 '73.0.3678.2',
734 '73.0.3679.1',
735 '73.0.3679.0',
736 '72.0.3626.69',
737 '71.0.3578.137',
738 '73.0.3678.1',
739 '73.0.3678.0',
740 '71.0.3578.136',
741 '73.0.3677.1',
742 '73.0.3677.0',
743 '72.0.3626.68',
744 '72.0.3626.67',
745 '71.0.3578.135',
746 '73.0.3676.1',
747 '73.0.3676.0',
748 '73.0.3674.2',
749 '72.0.3626.66',
750 '71.0.3578.134',
751 '73.0.3674.1',
752 '73.0.3674.0',
753 '72.0.3626.65',
754 '71.0.3578.133',
755 '73.0.3673.2',
756 '73.0.3673.1',
757 '73.0.3673.0',
758 '72.0.3626.64',
759 '71.0.3578.132',
760 '72.0.3626.63',
761 '72.0.3626.62',
762 '72.0.3626.61',
763 '72.0.3626.60',
764 '73.0.3672.1',
765 '73.0.3672.0',
766 '72.0.3626.59',
767 '71.0.3578.131',
768 '73.0.3671.3',
769 '73.0.3671.2',
770 '73.0.3671.1',
771 '73.0.3671.0',
772 '72.0.3626.58',
773 '71.0.3578.130',
774 '73.0.3670.1',
775 '73.0.3670.0',
776 '72.0.3626.57',
777 '71.0.3578.129',
778 '73.0.3669.1',
779 '73.0.3669.0',
780 '72.0.3626.56',
781 '71.0.3578.128',
782 '73.0.3668.2',
783 '73.0.3668.1',
784 '73.0.3668.0',
785 '72.0.3626.55',
786 '71.0.3578.127',
787 '73.0.3667.2',
788 '73.0.3667.1',
789 '73.0.3667.0',
790 '72.0.3626.54',
791 '71.0.3578.126',
792 '73.0.3666.1',
793 '73.0.3666.0',
794 '72.0.3626.53',
795 '71.0.3578.125',
796 '73.0.3665.4',
797 '73.0.3665.3',
798 '72.0.3626.52',
799 '73.0.3665.2',
800 '73.0.3664.4',
801 '73.0.3665.1',
802 '73.0.3665.0',
803 '72.0.3626.51',
804 '71.0.3578.124',
805 '72.0.3626.50',
806 '73.0.3664.3',
807 '73.0.3664.2',
808 '73.0.3664.1',
809 '73.0.3664.0',
810 '73.0.3663.2',
811 '72.0.3626.49',
812 '71.0.3578.123',
813 '73.0.3663.1',
814 '73.0.3663.0',
815 '72.0.3626.48',
816 '71.0.3578.122',
817 '73.0.3662.1',
818 '73.0.3662.0',
819 '72.0.3626.47',
820 '71.0.3578.121',
821 '73.0.3661.1',
822 '72.0.3626.46',
823 '73.0.3661.0',
824 '72.0.3626.45',
825 '71.0.3578.120',
826 '73.0.3660.2',
827 '73.0.3660.1',
828 '73.0.3660.0',
829 '72.0.3626.44',
830 '71.0.3578.119',
831 '73.0.3659.1',
832 '73.0.3659.0',
833 '72.0.3626.43',
834 '71.0.3578.118',
835 '73.0.3658.1',
836 '73.0.3658.0',
837 '72.0.3626.42',
838 '71.0.3578.117',
839 '73.0.3657.1',
840 '73.0.3657.0',
841 '72.0.3626.41',
842 '71.0.3578.116',
843 '73.0.3656.1',
844 '73.0.3656.0',
845 '72.0.3626.40',
846 '71.0.3578.115',
847 '73.0.3655.1',
848 '73.0.3655.0',
849 '72.0.3626.39',
850 '71.0.3578.114',
851 '73.0.3654.1',
852 '73.0.3654.0',
853 '72.0.3626.38',
854 '71.0.3578.113',
855 '73.0.3653.1',
856 '73.0.3653.0',
857 '72.0.3626.37',
858 '71.0.3578.112',
859 '73.0.3652.1',
860 '73.0.3652.0',
861 '72.0.3626.36',
862 '71.0.3578.111',
863 '73.0.3651.1',
864 '73.0.3651.0',
865 '72.0.3626.35',
866 '71.0.3578.110',
867 '73.0.3650.1',
868 '73.0.3650.0',
869 '72.0.3626.34',
870 '71.0.3578.109',
871 '73.0.3649.1',
872 '73.0.3649.0',
873 '72.0.3626.33',
874 '71.0.3578.108',
875 '73.0.3648.2',
876 '73.0.3648.1',
877 '73.0.3648.0',
878 '72.0.3626.32',
879 '71.0.3578.107',
880 '73.0.3647.2',
881 '73.0.3647.1',
882 '73.0.3647.0',
883 '72.0.3626.31',
884 '71.0.3578.106',
885 '73.0.3635.3',
886 '73.0.3646.2',
887 '73.0.3646.1',
888 '73.0.3646.0',
889 '72.0.3626.30',
890 '71.0.3578.105',
891 '72.0.3626.29',
892 '73.0.3645.2',
893 '73.0.3645.1',
894 '73.0.3645.0',
895 '72.0.3626.28',
896 '71.0.3578.104',
897 '72.0.3626.27',
898 '72.0.3626.26',
899 '72.0.3626.25',
900 '72.0.3626.24',
901 '73.0.3644.0',
902 '73.0.3643.2',
903 '72.0.3626.23',
904 '71.0.3578.103',
905 '73.0.3643.1',
906 '73.0.3643.0',
907 '72.0.3626.22',
908 '71.0.3578.102',
909 '73.0.3642.1',
910 '73.0.3642.0',
911 '72.0.3626.21',
912 '71.0.3578.101',
913 '73.0.3641.1',
914 '73.0.3641.0',
915 '72.0.3626.20',
916 '71.0.3578.100',
917 '72.0.3626.19',
918 '73.0.3640.1',
919 '73.0.3640.0',
920 '72.0.3626.18',
921 '73.0.3639.1',
922 '71.0.3578.99',
923 '73.0.3639.0',
924 '72.0.3626.17',
925 '73.0.3638.2',
926 '72.0.3626.16',
927 '73.0.3638.1',
928 '73.0.3638.0',
929 '72.0.3626.15',
930 '71.0.3578.98',
931 '73.0.3635.2',
932 '71.0.3578.97',
933 '73.0.3637.1',
934 '73.0.3637.0',
935 '72.0.3626.14',
936 '71.0.3578.96',
937 '71.0.3578.95',
938 '72.0.3626.13',
939 '71.0.3578.94',
940 '73.0.3636.2',
941 '71.0.3578.93',
942 '73.0.3636.1',
943 '73.0.3636.0',
944 '72.0.3626.12',
945 '71.0.3578.92',
946 '73.0.3635.1',
947 '73.0.3635.0',
948 '72.0.3626.11',
949 '71.0.3578.91',
950 '73.0.3634.2',
951 '73.0.3634.1',
952 '73.0.3634.0',
953 '72.0.3626.10',
954 '71.0.3578.90',
955 '71.0.3578.89',
956 '73.0.3633.2',
957 '73.0.3633.1',
958 '73.0.3633.0',
959 '72.0.3610.4',
960 '72.0.3626.9',
961 '71.0.3578.88',
962 '73.0.3632.5',
963 '73.0.3632.4',
964 '73.0.3632.3',
965 '73.0.3632.2',
966 '73.0.3632.1',
967 '73.0.3632.0',
968 '72.0.3626.8',
969 '71.0.3578.87',
970 '73.0.3631.2',
971 '73.0.3631.1',
972 '73.0.3631.0',
973 '72.0.3626.7',
974 '71.0.3578.86',
975 '72.0.3626.6',
976 '73.0.3630.1',
977 '73.0.3630.0',
978 '72.0.3626.5',
979 '71.0.3578.85',
980 '72.0.3626.4',
981 '73.0.3628.3',
982 '73.0.3628.2',
983 '73.0.3629.1',
984 '73.0.3629.0',
985 '72.0.3626.3',
986 '71.0.3578.84',
987 '73.0.3628.1',
988 '73.0.3628.0',
989 '71.0.3578.83',
990 '73.0.3627.1',
991 '73.0.3627.0',
992 '72.0.3626.2',
993 '71.0.3578.82',
994 '71.0.3578.81',
995 '71.0.3578.80',
996 '72.0.3626.1',
997 '72.0.3626.0',
998 '71.0.3578.79',
999 '70.0.3538.124',
1000 '71.0.3578.78',
1001 '72.0.3623.4',
1002 '72.0.3625.2',
1003 '72.0.3625.1',
1004 '72.0.3625.0',
1005 '71.0.3578.77',
1006 '70.0.3538.123',
1007 '72.0.3624.4',
1008 '72.0.3624.3',
1009 '72.0.3624.2',
1010 '71.0.3578.76',
1011 '72.0.3624.1',
1012 '72.0.3624.0',
1013 '72.0.3623.3',
1014 '71.0.3578.75',
1015 '70.0.3538.122',
1016 '71.0.3578.74',
1017 '72.0.3623.2',
1018 '72.0.3610.3',
1019 '72.0.3623.1',
1020 '72.0.3623.0',
1021 '72.0.3622.3',
1022 '72.0.3622.2',
1023 '71.0.3578.73',
1024 '70.0.3538.121',
1025 '72.0.3622.1',
1026 '72.0.3622.0',
1027 '71.0.3578.72',
1028 '70.0.3538.120',
1029 '72.0.3621.1',
1030 '72.0.3621.0',
1031 '71.0.3578.71',
1032 '70.0.3538.119',
1033 '72.0.3620.1',
1034 '72.0.3620.0',
1035 '71.0.3578.70',
1036 '70.0.3538.118',
1037 '71.0.3578.69',
1038 '72.0.3619.1',
1039 '72.0.3619.0',
1040 '71.0.3578.68',
1041 '70.0.3538.117',
1042 '71.0.3578.67',
1043 '72.0.3618.1',
1044 '72.0.3618.0',
1045 '71.0.3578.66',
1046 '70.0.3538.116',
1047 '72.0.3617.1',
1048 '72.0.3617.0',
1049 '71.0.3578.65',
1050 '70.0.3538.115',
1051 '72.0.3602.3',
1052 '71.0.3578.64',
1053 '72.0.3616.1',
1054 '72.0.3616.0',
1055 '71.0.3578.63',
1056 '70.0.3538.114',
1057 '71.0.3578.62',
1058 '72.0.3615.1',
1059 '72.0.3615.0',
1060 '71.0.3578.61',
1061 '70.0.3538.113',
1062 '72.0.3614.1',
1063 '72.0.3614.0',
1064 '71.0.3578.60',
1065 '70.0.3538.112',
1066 '72.0.3613.1',
1067 '72.0.3613.0',
1068 '71.0.3578.59',
1069 '70.0.3538.111',
1070 '72.0.3612.2',
1071 '72.0.3612.1',
1072 '72.0.3612.0',
1073 '70.0.3538.110',
1074 '71.0.3578.58',
1075 '70.0.3538.109',
1076 '72.0.3611.2',
1077 '72.0.3611.1',
1078 '72.0.3611.0',
1079 '71.0.3578.57',
1080 '70.0.3538.108',
1081 '72.0.3610.2',
1082 '71.0.3578.56',
1083 '71.0.3578.55',
1084 '72.0.3610.1',
1085 '72.0.3610.0',
1086 '71.0.3578.54',
1087 '70.0.3538.107',
1088 '71.0.3578.53',
1089 '72.0.3609.3',
1090 '71.0.3578.52',
1091 '72.0.3609.2',
1092 '71.0.3578.51',
1093 '72.0.3608.5',
1094 '72.0.3609.1',
1095 '72.0.3609.0',
1096 '71.0.3578.50',
1097 '70.0.3538.106',
1098 '72.0.3608.4',
1099 '72.0.3608.3',
1100 '72.0.3608.2',
1101 '71.0.3578.49',
1102 '72.0.3608.1',
1103 '72.0.3608.0',
1104 '70.0.3538.105',
1105 '71.0.3578.48',
1106 '72.0.3607.1',
1107 '72.0.3607.0',
1108 '71.0.3578.47',
1109 '70.0.3538.104',
1110 '72.0.3606.2',
1111 '72.0.3606.1',
1112 '72.0.3606.0',
1113 '71.0.3578.46',
1114 '70.0.3538.103',
1115 '70.0.3538.102',
1116 '72.0.3605.3',
1117 '72.0.3605.2',
1118 '72.0.3605.1',
1119 '72.0.3605.0',
1120 '71.0.3578.45',
1121 '70.0.3538.101',
1122 '71.0.3578.44',
1123 '71.0.3578.43',
1124 '70.0.3538.100',
1125 '70.0.3538.99',
1126 '71.0.3578.42',
1127 '72.0.3604.1',
1128 '72.0.3604.0',
1129 '71.0.3578.41',
1130 '70.0.3538.98',
1131 '71.0.3578.40',
1132 '72.0.3603.2',
1133 '72.0.3603.1',
1134 '72.0.3603.0',
1135 '71.0.3578.39',
1136 '70.0.3538.97',
1137 '72.0.3602.2',
1138 '71.0.3578.38',
1139 '71.0.3578.37',
1140 '72.0.3602.1',
1141 '72.0.3602.0',
1142 '71.0.3578.36',
1143 '70.0.3538.96',
1144 '72.0.3601.1',
1145 '72.0.3601.0',
1146 '71.0.3578.35',
1147 '70.0.3538.95',
1148 '72.0.3600.1',
1149 '72.0.3600.0',
1150 '71.0.3578.34',
1151 '70.0.3538.94',
1152 '72.0.3599.3',
1153 '72.0.3599.2',
1154 '72.0.3599.1',
1155 '72.0.3599.0',
1156 '71.0.3578.33',
1157 '70.0.3538.93',
1158 '72.0.3598.1',
1159 '72.0.3598.0',
1160 '71.0.3578.32',
1161 '70.0.3538.87',
1162 '72.0.3597.1',
1163 '72.0.3597.0',
1164 '72.0.3596.2',
1165 '71.0.3578.31',
1166 '70.0.3538.86',
1167 '71.0.3578.30',
1168 '71.0.3578.29',
1169 '72.0.3596.1',
1170 '72.0.3596.0',
1171 '71.0.3578.28',
1172 '70.0.3538.85',
1173 '72.0.3595.2',
1174 '72.0.3591.3',
1175 '72.0.3595.1',
1176 '72.0.3595.0',
1177 '71.0.3578.27',
1178 '70.0.3538.84',
1179 '72.0.3594.1',
1180 '72.0.3594.0',
1181 '71.0.3578.26',
1182 '70.0.3538.83',
1183 '72.0.3593.2',
1184 '72.0.3593.1',
1185 '72.0.3593.0',
1186 '71.0.3578.25',
1187 '70.0.3538.82',
1188 '72.0.3589.3',
1189 '72.0.3592.2',
1190 '72.0.3592.1',
1191 '72.0.3592.0',
1192 '71.0.3578.24',
1193 '72.0.3589.2',
1194 '70.0.3538.81',
1195 '70.0.3538.80',
1196 '72.0.3591.2',
1197 '72.0.3591.1',
1198 '72.0.3591.0',
1199 '71.0.3578.23',
1200 '70.0.3538.79',
1201 '71.0.3578.22',
1202 '72.0.3590.1',
1203 '72.0.3590.0',
1204 '71.0.3578.21',
1205 '70.0.3538.78',
1206 '70.0.3538.77',
1207 '72.0.3589.1',
1208 '72.0.3589.0',
1209 '71.0.3578.20',
1210 '70.0.3538.76',
1211 '71.0.3578.19',
1212 '70.0.3538.75',
1213 '72.0.3588.1',
1214 '72.0.3588.0',
1215 '71.0.3578.18',
1216 '70.0.3538.74',
1217 '72.0.3586.2',
1218 '72.0.3587.0',
1219 '71.0.3578.17',
1220 '70.0.3538.73',
1221 '72.0.3586.1',
1222 '72.0.3586.0',
1223 '71.0.3578.16',
1224 '70.0.3538.72',
1225 '72.0.3585.1',
1226 '72.0.3585.0',
1227 '71.0.3578.15',
1228 '70.0.3538.71',
1229 '71.0.3578.14',
1230 '72.0.3584.1',
1231 '72.0.3584.0',
1232 '71.0.3578.13',
1233 '70.0.3538.70',
1234 '72.0.3583.2',
1235 '71.0.3578.12',
1236 '72.0.3583.1',
1237 '72.0.3583.0',
1238 '71.0.3578.11',
1239 '70.0.3538.69',
1240 '71.0.3578.10',
1241 '72.0.3582.0',
1242 '72.0.3581.4',
1243 '71.0.3578.9',
1244 '70.0.3538.67',
1245 '72.0.3581.3',
1246 '72.0.3581.2',
1247 '72.0.3581.1',
1248 '72.0.3581.0',
1249 '71.0.3578.8',
1250 '70.0.3538.66',
1251 '72.0.3580.1',
1252 '72.0.3580.0',
1253 '71.0.3578.7',
1254 '70.0.3538.65',
1255 '71.0.3578.6',
1256 '72.0.3579.1',
1257 '72.0.3579.0',
1258 '71.0.3578.5',
1259 '70.0.3538.64',
1260 '71.0.3578.4',
1261 '71.0.3578.3',
1262 '71.0.3578.2',
1263 '71.0.3578.1',
1264 '71.0.3578.0',
1265 '70.0.3538.63',
1266 '69.0.3497.128',
1267 '70.0.3538.62',
1268 '70.0.3538.61',
1269 '70.0.3538.60',
1270 '70.0.3538.59',
1271 '71.0.3577.1',
1272 '71.0.3577.0',
1273 '70.0.3538.58',
1274 '69.0.3497.127',
1275 '71.0.3576.2',
1276 '71.0.3576.1',
1277 '71.0.3576.0',
1278 '70.0.3538.57',
1279 '70.0.3538.56',
1280 '71.0.3575.2',
1281 '70.0.3538.55',
1282 '69.0.3497.126',
1283 '70.0.3538.54',
1284 '71.0.3575.1',
1285 '71.0.3575.0',
1286 '71.0.3574.1',
1287 '71.0.3574.0',
1288 '70.0.3538.53',
1289 '69.0.3497.125',
1290 '70.0.3538.52',
1291 '71.0.3573.1',
1292 '71.0.3573.0',
1293 '70.0.3538.51',
1294 '69.0.3497.124',
1295 '71.0.3572.1',
1296 '71.0.3572.0',
1297 '70.0.3538.50',
1298 '69.0.3497.123',
1299 '71.0.3571.2',
1300 '70.0.3538.49',
1301 '69.0.3497.122',
1302 '71.0.3571.1',
1303 '71.0.3571.0',
1304 '70.0.3538.48',
1305 '69.0.3497.121',
1306 '71.0.3570.1',
1307 '71.0.3570.0',
1308 '70.0.3538.47',
1309 '69.0.3497.120',
1310 '71.0.3568.2',
1311 '71.0.3569.1',
1312 '71.0.3569.0',
1313 '70.0.3538.46',
1314 '69.0.3497.119',
1315 '70.0.3538.45',
1316 '71.0.3568.1',
1317 '71.0.3568.0',
1318 '70.0.3538.44',
1319 '69.0.3497.118',
1320 '70.0.3538.43',
1321 '70.0.3538.42',
1322 '71.0.3567.1',
1323 '71.0.3567.0',
1324 '70.0.3538.41',
1325 '69.0.3497.117',
1326 '71.0.3566.1',
1327 '71.0.3566.0',
1328 '70.0.3538.40',
1329 '69.0.3497.116',
1330 '71.0.3565.1',
1331 '71.0.3565.0',
1332 '70.0.3538.39',
1333 '69.0.3497.115',
1334 '71.0.3564.1',
1335 '71.0.3564.0',
1336 '70.0.3538.38',
1337 '69.0.3497.114',
1338 '71.0.3563.0',
1339 '71.0.3562.2',
1340 '70.0.3538.37',
1341 '69.0.3497.113',
1342 '70.0.3538.36',
1343 '70.0.3538.35',
1344 '71.0.3562.1',
1345 '71.0.3562.0',
1346 '70.0.3538.34',
1347 '69.0.3497.112',
1348 '70.0.3538.33',
1349 '71.0.3561.1',
1350 '71.0.3561.0',
1351 '70.0.3538.32',
1352 '69.0.3497.111',
1353 '71.0.3559.6',
1354 '71.0.3560.1',
1355 '71.0.3560.0',
1356 '71.0.3559.5',
1357 '71.0.3559.4',
1358 '70.0.3538.31',
1359 '69.0.3497.110',
1360 '71.0.3559.3',
1361 '70.0.3538.30',
1362 '69.0.3497.109',
1363 '71.0.3559.2',
1364 '71.0.3559.1',
1365 '71.0.3559.0',
1366 '70.0.3538.29',
1367 '69.0.3497.108',
1368 '71.0.3558.2',
1369 '71.0.3558.1',
1370 '71.0.3558.0',
1371 '70.0.3538.28',
1372 '69.0.3497.107',
1373 '71.0.3557.2',
1374 '71.0.3557.1',
1375 '71.0.3557.0',
1376 '70.0.3538.27',
1377 '69.0.3497.106',
1378 '71.0.3554.4',
1379 '70.0.3538.26',
1380 '71.0.3556.1',
1381 '71.0.3556.0',
1382 '70.0.3538.25',
1383 '71.0.3554.3',
1384 '69.0.3497.105',
1385 '71.0.3554.2',
1386 '70.0.3538.24',
1387 '69.0.3497.104',
1388 '71.0.3555.2',
1389 '70.0.3538.23',
1390 '71.0.3555.1',
1391 '71.0.3555.0',
1392 '70.0.3538.22',
1393 '69.0.3497.103',
1394 '71.0.3554.1',
1395 '71.0.3554.0',
1396 '70.0.3538.21',
1397 '69.0.3497.102',
1398 '71.0.3553.3',
1399 '70.0.3538.20',
1400 '69.0.3497.101',
1401 '71.0.3553.2',
1402 '69.0.3497.100',
1403 '71.0.3553.1',
1404 '71.0.3553.0',
1405 '70.0.3538.19',
1406 '69.0.3497.99',
1407 '69.0.3497.98',
1408 '69.0.3497.97',
1409 '71.0.3552.6',
1410 '71.0.3552.5',
1411 '71.0.3552.4',
1412 '71.0.3552.3',
1413 '71.0.3552.2',
1414 '71.0.3552.1',
1415 '71.0.3552.0',
1416 '70.0.3538.18',
1417 '69.0.3497.96',
1418 '71.0.3551.3',
1419 '71.0.3551.2',
1420 '71.0.3551.1',
1421 '71.0.3551.0',
1422 '70.0.3538.17',
1423 '69.0.3497.95',
1424 '71.0.3550.3',
1425 '71.0.3550.2',
1426 '71.0.3550.1',
1427 '71.0.3550.0',
1428 '70.0.3538.16',
1429 '69.0.3497.94',
1430 '71.0.3549.1',
1431 '71.0.3549.0',
1432 '70.0.3538.15',
1433 '69.0.3497.93',
1434 '69.0.3497.92',
1435 '71.0.3548.1',
1436 '71.0.3548.0',
1437 '70.0.3538.14',
1438 '69.0.3497.91',
1439 '71.0.3547.1',
1440 '71.0.3547.0',
1441 '70.0.3538.13',
1442 '69.0.3497.90',
1443 '71.0.3546.2',
1444 '69.0.3497.89',
1445 '71.0.3546.1',
1446 '71.0.3546.0',
1447 '70.0.3538.12',
1448 '69.0.3497.88',
1449 '71.0.3545.4',
1450 '71.0.3545.3',
1451 '71.0.3545.2',
1452 '71.0.3545.1',
1453 '71.0.3545.0',
1454 '70.0.3538.11',
1455 '69.0.3497.87',
1456 '71.0.3544.5',
1457 '71.0.3544.4',
1458 '71.0.3544.3',
1459 '71.0.3544.2',
1460 '71.0.3544.1',
1461 '71.0.3544.0',
1462 '69.0.3497.86',
1463 '70.0.3538.10',
1464 '69.0.3497.85',
1465 '70.0.3538.9',
1466 '69.0.3497.84',
1467 '71.0.3543.4',
1468 '70.0.3538.8',
1469 '71.0.3543.3',
1470 '71.0.3543.2',
1471 '71.0.3543.1',
1472 '71.0.3543.0',
1473 '70.0.3538.7',
1474 '69.0.3497.83',
1475 '71.0.3542.2',
1476 '71.0.3542.1',
1477 '71.0.3542.0',
1478 '70.0.3538.6',
1479 '69.0.3497.82',
1480 '69.0.3497.81',
1481 '71.0.3541.1',
1482 '71.0.3541.0',
1483 '70.0.3538.5',
1484 '69.0.3497.80',
1485 '71.0.3540.1',
1486 '71.0.3540.0',
1487 '70.0.3538.4',
1488 '69.0.3497.79',
1489 '70.0.3538.3',
1490 '71.0.3539.1',
1491 '71.0.3539.0',
1492 '69.0.3497.78',
1493 '68.0.3440.134',
1494 '69.0.3497.77',
1495 '70.0.3538.2',
1496 '70.0.3538.1',
1497 '70.0.3538.0',
1498 '69.0.3497.76',
1499 '68.0.3440.133',
1500 '69.0.3497.75',
1501 '70.0.3537.2',
1502 '70.0.3537.1',
1503 '70.0.3537.0',
1504 '69.0.3497.74',
1505 '68.0.3440.132',
1506 '70.0.3536.0',
1507 '70.0.3535.5',
1508 '70.0.3535.4',
1509 '70.0.3535.3',
1510 '69.0.3497.73',
1511 '68.0.3440.131',
1512 '70.0.3532.8',
1513 '70.0.3532.7',
1514 '69.0.3497.72',
1515 '69.0.3497.71',
1516 '70.0.3535.2',
1517 '70.0.3535.1',
1518 '70.0.3535.0',
1519 '69.0.3497.70',
1520 '68.0.3440.130',
1521 '69.0.3497.69',
1522 '68.0.3440.129',
1523 '70.0.3534.4',
1524 '70.0.3534.3',
1525 '70.0.3534.2',
1526 '70.0.3534.1',
1527 '70.0.3534.0',
1528 '69.0.3497.68',
1529 '68.0.3440.128',
1530 '70.0.3533.2',
1531 '70.0.3533.1',
1532 '70.0.3533.0',
1533 '69.0.3497.67',
1534 '68.0.3440.127',
1535 '70.0.3532.6',
1536 '70.0.3532.5',
1537 '70.0.3532.4',
1538 '69.0.3497.66',
1539 '68.0.3440.126',
1540 '70.0.3532.3',
1541 '70.0.3532.2',
1542 '70.0.3532.1',
1543 '69.0.3497.60',
1544 '69.0.3497.65',
1545 '69.0.3497.64',
1546 '70.0.3532.0',
1547 '70.0.3531.0',
1548 '70.0.3530.4',
1549 '70.0.3530.3',
1550 '70.0.3530.2',
1551 '69.0.3497.58',
1552 '68.0.3440.125',
1553 '69.0.3497.57',
1554 '69.0.3497.56',
1555 '69.0.3497.55',
1556 '69.0.3497.54',
1557 '70.0.3530.1',
1558 '70.0.3530.0',
1559 '69.0.3497.53',
1560 '68.0.3440.124',
1561 '69.0.3497.52',
1562 '70.0.3529.3',
1563 '70.0.3529.2',
1564 '70.0.3529.1',
1565 '70.0.3529.0',
1566 '69.0.3497.51',
1567 '70.0.3528.4',
1568 '68.0.3440.123',
1569 '70.0.3528.3',
1570 '70.0.3528.2',
1571 '70.0.3528.1',
1572 '70.0.3528.0',
1573 '69.0.3497.50',
1574 '68.0.3440.122',
1575 '70.0.3527.1',
1576 '70.0.3527.0',
1577 '69.0.3497.49',
1578 '68.0.3440.121',
1579 '70.0.3526.1',
1580 '70.0.3526.0',
1581 '68.0.3440.120',
1582 '69.0.3497.48',
1583 '69.0.3497.47',
1584 '68.0.3440.119',
1585 '68.0.3440.118',
1586 '70.0.3525.5',
1587 '70.0.3525.4',
1588 '70.0.3525.3',
1589 '68.0.3440.117',
1590 '69.0.3497.46',
1591 '70.0.3525.2',
1592 '70.0.3525.1',
1593 '70.0.3525.0',
1594 '69.0.3497.45',
1595 '68.0.3440.116',
1596 '70.0.3524.4',
1597 '70.0.3524.3',
1598 '69.0.3497.44',
1599 '70.0.3524.2',
1600 '70.0.3524.1',
1601 '70.0.3524.0',
1602 '70.0.3523.2',
1603 '69.0.3497.43',
1604 '68.0.3440.115',
1605 '70.0.3505.9',
1606 '69.0.3497.42',
1607 '70.0.3505.8',
1608 '70.0.3523.1',
1609 '70.0.3523.0',
1610 '69.0.3497.41',
1611 '68.0.3440.114',
1612 '70.0.3505.7',
1613 '69.0.3497.40',
1614 '70.0.3522.1',
1615 '70.0.3522.0',
1616 '70.0.3521.2',
1617 '69.0.3497.39',
1618 '68.0.3440.113',
1619 '70.0.3505.6',
1620 '70.0.3521.1',
1621 '70.0.3521.0',
1622 '69.0.3497.38',
1623 '68.0.3440.112',
1624 '70.0.3520.1',
1625 '70.0.3520.0',
1626 '69.0.3497.37',
1627 '68.0.3440.111',
1628 '70.0.3519.3',
1629 '70.0.3519.2',
1630 '70.0.3519.1',
1631 '70.0.3519.0',
1632 '69.0.3497.36',
1633 '68.0.3440.110',
1634 '70.0.3518.1',
1635 '70.0.3518.0',
1636 '69.0.3497.35',
1637 '69.0.3497.34',
1638 '68.0.3440.109',
1639 '70.0.3517.1',
1640 '70.0.3517.0',
1641 '69.0.3497.33',
1642 '68.0.3440.108',
1643 '69.0.3497.32',
1644 '70.0.3516.3',
1645 '70.0.3516.2',
1646 '70.0.3516.1',
1647 '70.0.3516.0',
1648 '69.0.3497.31',
1649 '68.0.3440.107',
1650 '70.0.3515.4',
1651 '68.0.3440.106',
1652 '70.0.3515.3',
1653 '70.0.3515.2',
1654 '70.0.3515.1',
1655 '70.0.3515.0',
1656 '69.0.3497.30',
1657 '68.0.3440.105',
1658 '68.0.3440.104',
1659 '70.0.3514.2',
1660 '70.0.3514.1',
1661 '70.0.3514.0',
1662 '69.0.3497.29',
1663 '68.0.3440.103',
1664 '70.0.3513.1',
1665 '70.0.3513.0',
1666 '69.0.3497.28',
1667 )
1668 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1669
1670
1671 std_headers = {
1672 'User-Agent': random_user_agent(),
1673 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1674 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1675 'Accept-Encoding': 'gzip, deflate',
1676 'Accept-Language': 'en-us,en;q=0.5',
1677 }
1678
1679
1680 USER_AGENTS = {
1681 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1682 }
1683
1684
1685 NO_DEFAULT = object()
1686
1687 ENGLISH_MONTH_NAMES = [
1688 'January', 'February', 'March', 'April', 'May', 'June',
1689 'July', 'August', 'September', 'October', 'November', 'December']
1690
1691 MONTH_NAMES = {
1692 'en': ENGLISH_MONTH_NAMES,
1693 'fr': [
1694 'janvier', 'fƩvrier', 'mars', 'avril', 'mai', 'juin',
1695 'juillet', 'aoƻt', 'septembre', 'octobre', 'novembre', 'dƩcembre'],
1696 }
1697
1698 KNOWN_EXTENSIONS = (
1699 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1700 'flv', 'f4v', 'f4a', 'f4b',
1701 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1702 'mkv', 'mka', 'mk3d',
1703 'avi', 'divx',
1704 'mov',
1705 'asf', 'wmv', 'wma',
1706 '3gp', '3g2',
1707 'mp3',
1708 'flac',
1709 'ape',
1710 'wav',
1711 'f4f', 'f4m', 'm3u8', 'smil')
1712
1713 # needed for sanitizing filenames in restricted mode
1714 ACCENT_CHARS = dict(zip('Ć‚ĆƒĆ„Ć€ĆĆ…Ć†Ć‡ĆˆĆ‰ĆŠĆ‹ĆŒĆĆŽĆĆĆ‘Ć’Ć“Ć”Ć•Ć–ÅĆ˜Å’Ć™ĆšĆ›ĆœÅ°ĆĆžĆŸĆ Ć”Ć¢Ć£Ć¤Ć„Ć¦Ć§ĆØĆ©ĆŖƫƬƭƮĆÆĆ°Ć±Ć²Ć³Ć“ĆµĆ¶Å‘ĆøÅ“Ć¹ĆŗĆ»Ć¼Å±Ć½Ć¾Ćæ',
1715 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1716 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1717
1718 DATE_FORMATS = (
1719 '%d %B %Y',
1720 '%d %b %Y',
1721 '%B %d %Y',
1722 '%B %dst %Y',
1723 '%B %dnd %Y',
1724 '%B %drd %Y',
1725 '%B %dth %Y',
1726 '%b %d %Y',
1727 '%b %dst %Y',
1728 '%b %dnd %Y',
1729 '%b %drd %Y',
1730 '%b %dth %Y',
1731 '%b %dst %Y %I:%M',
1732 '%b %dnd %Y %I:%M',
1733 '%b %drd %Y %I:%M',
1734 '%b %dth %Y %I:%M',
1735 '%Y %m %d',
1736 '%Y-%m-%d',
1737 '%Y/%m/%d',
1738 '%Y/%m/%d %H:%M',
1739 '%Y/%m/%d %H:%M:%S',
1740 '%Y-%m-%d %H:%M',
1741 '%Y-%m-%d %H:%M:%S',
1742 '%Y-%m-%d %H:%M:%S.%f',
1743 '%d.%m.%Y %H:%M',
1744 '%d.%m.%Y %H.%M',
1745 '%Y-%m-%dT%H:%M:%SZ',
1746 '%Y-%m-%dT%H:%M:%S.%fZ',
1747 '%Y-%m-%dT%H:%M:%S.%f0Z',
1748 '%Y-%m-%dT%H:%M:%S',
1749 '%Y-%m-%dT%H:%M:%S.%f',
1750 '%Y-%m-%dT%H:%M',
1751 '%b %d %Y at %H:%M',
1752 '%b %d %Y at %H:%M:%S',
1753 '%B %d %Y at %H:%M',
1754 '%B %d %Y at %H:%M:%S',
1755 )
1756
1757 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1758 DATE_FORMATS_DAY_FIRST.extend([
1759 '%d-%m-%Y',
1760 '%d.%m.%Y',
1761 '%d.%m.%y',
1762 '%d/%m/%Y',
1763 '%d/%m/%y',
1764 '%d/%m/%Y %H:%M:%S',
1765 ])
1766
1767 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1768 DATE_FORMATS_MONTH_FIRST.extend([
1769 '%m-%d-%Y',
1770 '%m.%d.%Y',
1771 '%m/%d/%Y',
1772 '%m/%d/%y',
1773 '%m/%d/%Y %H:%M:%S',
1774 ])
1775
1776 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1777 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1778
1779
1780 def preferredencoding():
1781 """Get preferred encoding.
1782
1783 Returns the best encoding scheme for the system, based on
1784 locale.getpreferredencoding() and some further tweaks.
1785 """
1786 try:
1787 pref = locale.getpreferredencoding()
1788 'TEST'.encode(pref)
1789 except Exception:
1790 pref = 'UTF-8'
1791
1792 return pref
1793
1794
1795 def write_json_file(obj, fn):
1796 """ Encode obj as JSON and write it to fn, atomically if possible """
1797
1798 fn = encodeFilename(fn)
1799 if sys.version_info < (3, 0) and sys.platform != 'win32':
1800 encoding = get_filesystem_encoding()
1801 # os.path.basename returns a bytes object, but NamedTemporaryFile
1802 # will fail if the filename contains non ascii characters unless we
1803 # use a unicode object
1804 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1805 # the same for os.path.dirname
1806 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1807 else:
1808 path_basename = os.path.basename
1809 path_dirname = os.path.dirname
1810
1811 args = {
1812 'suffix': '.tmp',
1813 'prefix': path_basename(fn) + '.',
1814 'dir': path_dirname(fn),
1815 'delete': False,
1816 }
1817
1818 # In Python 2.x, json.dump expects a bytestream.
1819 # In Python 3.x, it writes to a character stream
1820 if sys.version_info < (3, 0):
1821 args['mode'] = 'wb'
1822 else:
1823 args.update({
1824 'mode': 'w',
1825 'encoding': 'utf-8',
1826 })
1827
1828 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1829
1830 try:
1831 with tf:
1832 json.dump(obj, tf)
1833 if sys.platform == 'win32':
1834 # Need to remove existing file on Windows, else os.rename raises
1835 # WindowsError or FileExistsError.
1836 try:
1837 os.unlink(fn)
1838 except OSError:
1839 pass
1840 try:
1841 mask = os.umask(0)
1842 os.umask(mask)
1843 os.chmod(tf.name, 0o666 & ~mask)
1844 except OSError:
1845 pass
1846 os.rename(tf.name, fn)
1847 except Exception:
1848 try:
1849 os.remove(tf.name)
1850 except OSError:
1851 pass
1852 raise
1853
1854
1855 if sys.version_info >= (2, 7):
1856 def find_xpath_attr(node, xpath, key, val=None):
1857 """ Find the xpath xpath[@key=val] """
1858 assert re.match(r'^[a-zA-Z_-]+$', key)
1859 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1860 return node.find(expr)
1861 else:
1862 def find_xpath_attr(node, xpath, key, val=None):
1863 for f in node.findall(compat_xpath(xpath)):
1864 if key not in f.attrib:
1865 continue
1866 if val is None or f.attrib.get(key) == val:
1867 return f
1868 return None
1869
1870 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1871 # the namespace parameter
1872
1873
1874 def xpath_with_ns(path, ns_map):
1875 components = [c.split(':') for c in path.split('/')]
1876 replaced = []
1877 for c in components:
1878 if len(c) == 1:
1879 replaced.append(c[0])
1880 else:
1881 ns, tag = c
1882 replaced.append('{%s}%s' % (ns_map[ns], tag))
1883 return '/'.join(replaced)
1884
1885
1886 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1887 def _find_xpath(xpath):
1888 return node.find(compat_xpath(xpath))
1889
1890 if isinstance(xpath, (str, compat_str)):
1891 n = _find_xpath(xpath)
1892 else:
1893 for xp in xpath:
1894 n = _find_xpath(xp)
1895 if n is not None:
1896 break
1897
1898 if n is None:
1899 if default is not NO_DEFAULT:
1900 return default
1901 elif fatal:
1902 name = xpath if name is None else name
1903 raise ExtractorError('Could not find XML element %s' % name)
1904 else:
1905 return None
1906 return n
1907
1908
1909 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1910 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1911 if n is None or n == default:
1912 return n
1913 if n.text is None:
1914 if default is not NO_DEFAULT:
1915 return default
1916 elif fatal:
1917 name = xpath if name is None else name
1918 raise ExtractorError('Could not find XML element\'s text %s' % name)
1919 else:
1920 return None
1921 return n.text
1922
1923
1924 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1925 n = find_xpath_attr(node, xpath, key)
1926 if n is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = '%s[@%s]' % (xpath, key) if name is None else name
1931 raise ExtractorError('Could not find XML attribute %s' % name)
1932 else:
1933 return None
1934 return n.attrib[key]
1935
1936
1937 def get_element_by_id(id, html):
1938 """Return the content of the tag with the specified ID in the passed HTML document"""
1939 return get_element_by_attribute('id', id, html)
1940
1941
1942 def get_element_by_class(class_name, html):
1943 """Return the content of the first tag with the specified class in the passed HTML document"""
1944 retval = get_elements_by_class(class_name, html)
1945 return retval[0] if retval else None
1946
1947
1948 def get_element_by_attribute(attribute, value, html, escape_value=True):
1949 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1950 return retval[0] if retval else None
1951
1952
1953 def get_elements_by_class(class_name, html):
1954 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1955 return get_elements_by_attribute(
1956 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1957 html, escape_value=False)
1958
1959
1960 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1961 """Return the content of the tag with the specified attribute in the passed HTML document"""
1962
1963 value = re.escape(value) if escape_value else value
1964
1965 retlist = []
1966 for m in re.finditer(r'''(?xs)
1967 <([a-zA-Z0-9:._-]+)
1968 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1969 \s+%s=['"]?%s['"]?
1970 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1971 \s*>
1972 (?P<content>.*?)
1973 </\1>
1974 ''' % (re.escape(attribute), value), html):
1975 res = m.group('content')
1976
1977 if res.startswith('"') or res.startswith("'"):
1978 res = res[1:-1]
1979
1980 retlist.append(unescapeHTML(res))
1981
1982 return retlist
1983
1984
1985 class HTMLAttributeParser(compat_HTMLParser):
1986 """Trivial HTML parser to gather the attributes for a single element"""
1987 def __init__(self):
1988 self.attrs = {}
1989 compat_HTMLParser.__init__(self)
1990
1991 def handle_starttag(self, tag, attrs):
1992 self.attrs = dict(attrs)
1993
1994
1995 def extract_attributes(html_element):
1996 """Given a string for an HTML element such as
1997 <el
1998 a="foo" B="bar" c="&98;az" d=boz
1999 empty= noval entity="&amp;"
2000 sq='"' dq="'"
2001 >
2002 Decode and return a dictionary of attributes.
2003 {
2004 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2005 'empty': '', 'noval': None, 'entity': '&',
2006 'sq': '"', 'dq': '\''
2007 }.
2008 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2009 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2010 """
2011 parser = HTMLAttributeParser()
2012 try:
2013 parser.feed(html_element)
2014 parser.close()
2015 # Older Python may throw HTMLParseError in case of malformed HTML
2016 except compat_HTMLParseError:
2017 pass
2018 return parser.attrs
2019
2020
2021 def clean_html(html):
2022 """Clean an HTML snippet into a readable string"""
2023
2024 if html is None: # Convenience for sanitizing descriptions etc.
2025 return html
2026
2027 # Newline vs <br />
2028 html = html.replace('\n', ' ')
2029 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2030 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2031 # Strip html tags
2032 html = re.sub('<.*?>', '', html)
2033 # Replace html entities
2034 html = unescapeHTML(html)
2035 return html.strip()
2036
2037
2038 def sanitize_open(filename, open_mode):
2039 """Try to open the given filename, and slightly tweak it if this fails.
2040
2041 Attempts to open the given filename. If this fails, it tries to change
2042 the filename slightly, step by step, until it's either able to open it
2043 or it fails and raises a final exception, like the standard open()
2044 function.
2045
2046 It returns the tuple (stream, definitive_file_name).
2047 """
2048 try:
2049 if filename == '-':
2050 if sys.platform == 'win32':
2051 import msvcrt
2052 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2053 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2054 stream = open(encodeFilename(filename), open_mode)
2055 return (stream, filename)
2056 except (IOError, OSError) as err:
2057 if err.errno in (errno.EACCES,):
2058 raise
2059
2060 # In case of error, try to remove win32 forbidden chars
2061 alt_filename = sanitize_path(filename)
2062 if alt_filename == filename:
2063 raise
2064 else:
2065 # An exception here should be caught in the caller
2066 stream = open(encodeFilename(alt_filename), open_mode)
2067 return (stream, alt_filename)
2068
2069
2070 def timeconvert(timestr):
2071 """Convert RFC 2822 defined time string into system timestamp"""
2072 timestamp = None
2073 timetuple = email.utils.parsedate_tz(timestr)
2074 if timetuple is not None:
2075 timestamp = email.utils.mktime_tz(timetuple)
2076 return timestamp
2077
2078
2079 def sanitize_filename(s, restricted=False, is_id=False):
2080 """Sanitizes a string so it could be used as part of a filename.
2081 If restricted is set, use a stricter subset of allowed characters.
2082 Set is_id if this is not an arbitrary string, but an ID that should be kept
2083 if possible.
2084 """
2085 def replace_insane(char):
2086 if restricted and char in ACCENT_CHARS:
2087 return ACCENT_CHARS[char]
2088 if char == '?' or ord(char) < 32 or ord(char) == 127:
2089 return ''
2090 elif char == '"':
2091 return '' if restricted else '\''
2092 elif char == ':':
2093 return '_-' if restricted else ' -'
2094 elif char in '\\/|*<>':
2095 return '_'
2096 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2097 return '_'
2098 if restricted and ord(char) > 127:
2099 return '_'
2100 return char
2101
2102 # Handle timestamps
2103 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2104 result = ''.join(map(replace_insane, s))
2105 if not is_id:
2106 while '__' in result:
2107 result = result.replace('__', '_')
2108 result = result.strip('_')
2109 # Common case of "Foreign band name - English song title"
2110 if restricted and result.startswith('-_'):
2111 result = result[2:]
2112 if result.startswith('-'):
2113 result = '_' + result[len('-'):]
2114 result = result.lstrip('.')
2115 if not result:
2116 result = '_'
2117 return result
2118
2119
2120 def sanitize_path(s):
2121 """Sanitizes and normalizes path on Windows"""
2122 if sys.platform != 'win32':
2123 return s
2124 drive_or_unc, _ = os.path.splitdrive(s)
2125 if sys.version_info < (2, 7) and not drive_or_unc:
2126 drive_or_unc, _ = os.path.splitunc(s)
2127 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2128 if drive_or_unc:
2129 norm_path.pop(0)
2130 sanitized_path = [
2131 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2132 for path_part in norm_path]
2133 if drive_or_unc:
2134 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2135 return os.path.join(*sanitized_path)
2136
2137
2138 def sanitize_url(url):
2139 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2140 # the number of unwanted failures due to missing protocol
2141 if url.startswith('//'):
2142 return 'http:%s' % url
2143 # Fix some common typos seen so far
2144 COMMON_TYPOS = (
2145 # https://github.com/ytdl-org/youtube-dl/issues/15649
2146 (r'^httpss://', r'https://'),
2147 # https://bx1.be/lives/direct-tv/
2148 (r'^rmtp([es]?)://', r'rtmp\1://'),
2149 )
2150 for mistake, fixup in COMMON_TYPOS:
2151 if re.match(mistake, url):
2152 return re.sub(mistake, fixup, url)
2153 return url
2154
2155
2156 def sanitized_Request(url, *args, **kwargs):
2157 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2158
2159
2160 def expand_path(s):
2161 """Expand shell variables and ~"""
2162 return os.path.expandvars(compat_expanduser(s))
2163
2164
2165 def orderedSet(iterable):
2166 """ Remove all duplicates from the input iterable """
2167 res = []
2168 for el in iterable:
2169 if el not in res:
2170 res.append(el)
2171 return res
2172
2173
2174 def _htmlentity_transform(entity_with_semicolon):
2175 """Transforms an HTML entity to a character."""
2176 entity = entity_with_semicolon[:-1]
2177
2178 # Known non-numeric HTML entity
2179 if entity in compat_html_entities.name2codepoint:
2180 return compat_chr(compat_html_entities.name2codepoint[entity])
2181
2182 # TODO: HTML5 allows entities without a semicolon. For example,
2183 # '&Eacuteric' should be decoded as 'Ɖric'.
2184 if entity_with_semicolon in compat_html_entities_html5:
2185 return compat_html_entities_html5[entity_with_semicolon]
2186
2187 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2188 if mobj is not None:
2189 numstr = mobj.group(1)
2190 if numstr.startswith('x'):
2191 base = 16
2192 numstr = '0%s' % numstr
2193 else:
2194 base = 10
2195 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2196 try:
2197 return compat_chr(int(numstr, base))
2198 except ValueError:
2199 pass
2200
2201 # Unknown entity in name, return its literal representation
2202 return '&%s;' % entity
2203
2204
2205 def unescapeHTML(s):
2206 if s is None:
2207 return None
2208 assert type(s) == compat_str
2209
2210 return re.sub(
2211 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2212
2213
2214 def get_subprocess_encoding():
2215 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2216 # For subprocess calls, encode with locale encoding
2217 # Refer to http://stackoverflow.com/a/9951851/35070
2218 encoding = preferredencoding()
2219 else:
2220 encoding = sys.getfilesystemencoding()
2221 if encoding is None:
2222 encoding = 'utf-8'
2223 return encoding
2224
2225
2226 def encodeFilename(s, for_subprocess=False):
2227 """
2228 @param s The name of the file
2229 """
2230
2231 assert type(s) == compat_str
2232
2233 # Python 3 has a Unicode API
2234 if sys.version_info >= (3, 0):
2235 return s
2236
2237 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2238 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2239 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2240 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241 return s
2242
2243 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2244 if sys.platform.startswith('java'):
2245 return s
2246
2247 return s.encode(get_subprocess_encoding(), 'ignore')
2248
2249
2250 def decodeFilename(b, for_subprocess=False):
2251
2252 if sys.version_info >= (3, 0):
2253 return b
2254
2255 if not isinstance(b, bytes):
2256 return b
2257
2258 return b.decode(get_subprocess_encoding(), 'ignore')
2259
2260
2261 def encodeArgument(s):
2262 if not isinstance(s, compat_str):
2263 # Legacy code that uses byte strings
2264 # Uncomment the following line after fixing all post processors
2265 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2266 s = s.decode('ascii')
2267 return encodeFilename(s, True)
2268
2269
2270 def decodeArgument(b):
2271 return decodeFilename(b, True)
2272
2273
2274 def decodeOption(optval):
2275 if optval is None:
2276 return optval
2277 if isinstance(optval, bytes):
2278 optval = optval.decode(preferredencoding())
2279
2280 assert isinstance(optval, compat_str)
2281 return optval
2282
2283
2284 def formatSeconds(secs):
2285 if secs > 3600:
2286 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2287 elif secs > 60:
2288 return '%d:%02d' % (secs // 60, secs % 60)
2289 else:
2290 return '%d' % secs
2291
2292
2293 def make_HTTPS_handler(params, **kwargs):
2294 opts_no_check_certificate = params.get('nocheckcertificate', False)
2295 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2296 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2297 if opts_no_check_certificate:
2298 context.check_hostname = False
2299 context.verify_mode = ssl.CERT_NONE
2300 try:
2301 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2302 except TypeError:
2303 # Python 2.7.8
2304 # (create_default_context present but HTTPSHandler has no context=)
2305 pass
2306
2307 if sys.version_info < (3, 2):
2308 return YoutubeDLHTTPSHandler(params, **kwargs)
2309 else: # Python < 3.4
2310 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2311 context.verify_mode = (ssl.CERT_NONE
2312 if opts_no_check_certificate
2313 else ssl.CERT_REQUIRED)
2314 context.set_default_verify_paths()
2315 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2316
2317
2318 def bug_reports_message():
2319 if ytdl_is_updateable():
2320 update_cmd = 'type youtube-dl -U to update'
2321 else:
2322 update_cmd = 'see https://yt-dl.org/update on how to update'
2323 msg = '; please report this issue on https://yt-dl.org/bug .'
2324 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2325 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2326 return msg
2327
2328
2329 class YoutubeDLError(Exception):
2330 """Base exception for YoutubeDL errors."""
2331 pass
2332
2333
2334 class ExtractorError(YoutubeDLError):
2335 """Error during info extraction."""
2336
2337 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2338 """ tb, if given, is the original traceback (so that it can be printed out).
2339 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2340 """
2341
2342 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2343 expected = True
2344 if video_id is not None:
2345 msg = video_id + ': ' + msg
2346 if cause:
2347 msg += ' (caused by %r)' % cause
2348 if not expected:
2349 msg += bug_reports_message()
2350 super(ExtractorError, self).__init__(msg)
2351
2352 self.traceback = tb
2353 self.exc_info = sys.exc_info() # preserve original exception
2354 self.cause = cause
2355 self.video_id = video_id
2356
2357 def format_traceback(self):
2358 if self.traceback is None:
2359 return None
2360 return ''.join(traceback.format_tb(self.traceback))
2361
2362
2363 class UnsupportedError(ExtractorError):
2364 def __init__(self, url):
2365 super(UnsupportedError, self).__init__(
2366 'Unsupported URL: %s' % url, expected=True)
2367 self.url = url
2368
2369
2370 class RegexNotFoundError(ExtractorError):
2371 """Error when a regex didn't match"""
2372 pass
2373
2374
2375 class GeoRestrictedError(ExtractorError):
2376 """Geographic restriction Error exception.
2377
2378 This exception may be thrown when a video is not available from your
2379 geographic location due to geographic restrictions imposed by a website.
2380 """
2381 def __init__(self, msg, countries=None):
2382 super(GeoRestrictedError, self).__init__(msg, expected=True)
2383 self.msg = msg
2384 self.countries = countries
2385
2386
2387 class DownloadError(YoutubeDLError):
2388 """Download Error exception.
2389
2390 This exception may be thrown by FileDownloader objects if they are not
2391 configured to continue on errors. They will contain the appropriate
2392 error message.
2393 """
2394
2395 def __init__(self, msg, exc_info=None):
2396 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2397 super(DownloadError, self).__init__(msg)
2398 self.exc_info = exc_info
2399
2400
2401 class SameFileError(YoutubeDLError):
2402 """Same File exception.
2403
2404 This exception will be thrown by FileDownloader objects if they detect
2405 multiple files would have to be downloaded to the same file on disk.
2406 """
2407 pass
2408
2409
2410 class PostProcessingError(YoutubeDLError):
2411 """Post Processing exception.
2412
2413 This exception may be raised by PostProcessor's .run() method to
2414 indicate an error in the postprocessing task.
2415 """
2416
2417 def __init__(self, msg):
2418 super(PostProcessingError, self).__init__(msg)
2419 self.msg = msg
2420
2421
2422 class MaxDownloadsReached(YoutubeDLError):
2423 """ --max-downloads limit has been reached. """
2424 pass
2425
2426
2427 class UnavailableVideoError(YoutubeDLError):
2428 """Unavailable Format exception.
2429
2430 This exception will be thrown when a video is requested
2431 in a format that is not available for that video.
2432 """
2433 pass
2434
2435
2436 class ContentTooShortError(YoutubeDLError):
2437 """Content Too Short exception.
2438
2439 This exception may be raised by FileDownloader objects when a file they
2440 download is too small for what the server announced first, indicating
2441 the connection was probably interrupted.
2442 """
2443
2444 def __init__(self, downloaded, expected):
2445 super(ContentTooShortError, self).__init__(
2446 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2447 )
2448 # Both in bytes
2449 self.downloaded = downloaded
2450 self.expected = expected
2451
2452
2453 class XAttrMetadataError(YoutubeDLError):
2454 def __init__(self, code=None, msg='Unknown error'):
2455 super(XAttrMetadataError, self).__init__(msg)
2456 self.code = code
2457 self.msg = msg
2458
2459 # Parsing code and msg
2460 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2461 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2462 self.reason = 'NO_SPACE'
2463 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2464 self.reason = 'VALUE_TOO_LONG'
2465 else:
2466 self.reason = 'NOT_SUPPORTED'
2467
2468
2469 class XAttrUnavailableError(YoutubeDLError):
2470 pass
2471
2472
2473 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2474 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2475 # expected HTTP responses to meet HTTP/1.0 or later (see also
2476 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2477 if sys.version_info < (3, 0):
2478 kwargs['strict'] = True
2479 hc = http_class(*args, **compat_kwargs(kwargs))
2480 source_address = ydl_handler._params.get('source_address')
2481
2482 if source_address is not None:
2483 # This is to workaround _create_connection() from socket where it will try all
2484 # address data from getaddrinfo() including IPv6. This filters the result from
2485 # getaddrinfo() based on the source_address value.
2486 # This is based on the cpython socket.create_connection() function.
2487 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2488 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2489 host, port = address
2490 err = None
2491 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2492 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2493 ip_addrs = [addr for addr in addrs if addr[0] == af]
2494 if addrs and not ip_addrs:
2495 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2496 raise socket.error(
2497 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2498 % (ip_version, source_address[0]))
2499 for res in ip_addrs:
2500 af, socktype, proto, canonname, sa = res
2501 sock = None
2502 try:
2503 sock = socket.socket(af, socktype, proto)
2504 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2505 sock.settimeout(timeout)
2506 sock.bind(source_address)
2507 sock.connect(sa)
2508 err = None # Explicitly break reference cycle
2509 return sock
2510 except socket.error as _:
2511 err = _
2512 if sock is not None:
2513 sock.close()
2514 if err is not None:
2515 raise err
2516 else:
2517 raise socket.error('getaddrinfo returns an empty list')
2518 if hasattr(hc, '_create_connection'):
2519 hc._create_connection = _create_connection
2520 sa = (source_address, 0)
2521 if hasattr(hc, 'source_address'): # Python 2.7+
2522 hc.source_address = sa
2523 else: # Python 2.6
2524 def _hc_connect(self, *args, **kwargs):
2525 sock = _create_connection(
2526 (self.host, self.port), self.timeout, sa)
2527 if is_https:
2528 self.sock = ssl.wrap_socket(
2529 sock, self.key_file, self.cert_file,
2530 ssl_version=ssl.PROTOCOL_TLSv1)
2531 else:
2532 self.sock = sock
2533 hc.connect = functools.partial(_hc_connect, hc)
2534
2535 return hc
2536
2537
2538 def handle_youtubedl_headers(headers):
2539 filtered_headers = headers
2540
2541 if 'Youtubedl-no-compression' in filtered_headers:
2542 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2543 del filtered_headers['Youtubedl-no-compression']
2544
2545 return filtered_headers
2546
2547
2548 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2549 """Handler for HTTP requests and responses.
2550
2551 This class, when installed with an OpenerDirector, automatically adds
2552 the standard headers to every HTTP request and handles gzipped and
2553 deflated responses from web servers. If compression is to be avoided in
2554 a particular request, the original request in the program code only has
2555 to include the HTTP header "Youtubedl-no-compression", which will be
2556 removed before making the real request.
2557
2558 Part of this code was copied from:
2559
2560 http://techknack.net/python-urllib2-handlers/
2561
2562 Andrew Rowls, the author of that code, agreed to release it to the
2563 public domain.
2564 """
2565
2566 def __init__(self, params, *args, **kwargs):
2567 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2568 self._params = params
2569
2570 def http_open(self, req):
2571 conn_class = compat_http_client.HTTPConnection
2572
2573 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2574 if socks_proxy:
2575 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2576 del req.headers['Ytdl-socks-proxy']
2577
2578 return self.do_open(functools.partial(
2579 _create_http_connection, self, conn_class, False),
2580 req)
2581
2582 @staticmethod
2583 def deflate(data):
2584 try:
2585 return zlib.decompress(data, -zlib.MAX_WBITS)
2586 except zlib.error:
2587 return zlib.decompress(data)
2588
2589 def http_request(self, req):
2590 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2591 # always respected by websites, some tend to give out URLs with non percent-encoded
2592 # non-ASCII characters (see telemb.py, ard.py [#3412])
2593 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2594 # To work around aforementioned issue we will replace request's original URL with
2595 # percent-encoded one
2596 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2597 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2598 url = req.get_full_url()
2599 url_escaped = escape_url(url)
2600
2601 # Substitute URL if any change after escaping
2602 if url != url_escaped:
2603 req = update_Request(req, url=url_escaped)
2604
2605 for h, v in std_headers.items():
2606 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2607 # The dict keys are capitalized because of this bug by urllib
2608 if h.capitalize() not in req.headers:
2609 req.add_header(h, v)
2610
2611 req.headers = handle_youtubedl_headers(req.headers)
2612
2613 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2614 # Python 2.6 is brain-dead when it comes to fragments
2615 req._Request__original = req._Request__original.partition('#')[0]
2616 req._Request__r_type = req._Request__r_type.partition('#')[0]
2617
2618 return req
2619
2620 def http_response(self, req, resp):
2621 old_resp = resp
2622 # gzip
2623 if resp.headers.get('Content-encoding', '') == 'gzip':
2624 content = resp.read()
2625 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2626 try:
2627 uncompressed = io.BytesIO(gz.read())
2628 except IOError as original_ioerror:
2629 # There may be junk add the end of the file
2630 # See http://stackoverflow.com/q/4928560/35070 for details
2631 for i in range(1, 1024):
2632 try:
2633 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2634 uncompressed = io.BytesIO(gz.read())
2635 except IOError:
2636 continue
2637 break
2638 else:
2639 raise original_ioerror
2640 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2641 resp.msg = old_resp.msg
2642 del resp.headers['Content-encoding']
2643 # deflate
2644 if resp.headers.get('Content-encoding', '') == 'deflate':
2645 gz = io.BytesIO(self.deflate(resp.read()))
2646 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2647 resp.msg = old_resp.msg
2648 del resp.headers['Content-encoding']
2649 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2650 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2651 if 300 <= resp.code < 400:
2652 location = resp.headers.get('Location')
2653 if location:
2654 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2655 if sys.version_info >= (3, 0):
2656 location = location.encode('iso-8859-1').decode('utf-8')
2657 else:
2658 location = location.decode('utf-8')
2659 location_escaped = escape_url(location)
2660 if location != location_escaped:
2661 del resp.headers['Location']
2662 if sys.version_info < (3, 0):
2663 location_escaped = location_escaped.encode('utf-8')
2664 resp.headers['Location'] = location_escaped
2665 return resp
2666
2667 https_request = http_request
2668 https_response = http_response
2669
2670
2671 def make_socks_conn_class(base_class, socks_proxy):
2672 assert issubclass(base_class, (
2673 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2674
2675 url_components = compat_urlparse.urlparse(socks_proxy)
2676 if url_components.scheme.lower() == 'socks5':
2677 socks_type = ProxyType.SOCKS5
2678 elif url_components.scheme.lower() in ('socks', 'socks4'):
2679 socks_type = ProxyType.SOCKS4
2680 elif url_components.scheme.lower() == 'socks4a':
2681 socks_type = ProxyType.SOCKS4A
2682
2683 def unquote_if_non_empty(s):
2684 if not s:
2685 return s
2686 return compat_urllib_parse_unquote_plus(s)
2687
2688 proxy_args = (
2689 socks_type,
2690 url_components.hostname, url_components.port or 1080,
2691 True, # Remote DNS
2692 unquote_if_non_empty(url_components.username),
2693 unquote_if_non_empty(url_components.password),
2694 )
2695
2696 class SocksConnection(base_class):
2697 def connect(self):
2698 self.sock = sockssocket()
2699 self.sock.setproxy(*proxy_args)
2700 if type(self.timeout) in (int, float):
2701 self.sock.settimeout(self.timeout)
2702 self.sock.connect((self.host, self.port))
2703
2704 if isinstance(self, compat_http_client.HTTPSConnection):
2705 if hasattr(self, '_context'): # Python > 2.6
2706 self.sock = self._context.wrap_socket(
2707 self.sock, server_hostname=self.host)
2708 else:
2709 self.sock = ssl.wrap_socket(self.sock)
2710
2711 return SocksConnection
2712
2713
2714 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2715 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2716 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2717 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2718 self._params = params
2719
2720 def https_open(self, req):
2721 kwargs = {}
2722 conn_class = self._https_conn_class
2723
2724 if hasattr(self, '_context'): # python > 2.6
2725 kwargs['context'] = self._context
2726 if hasattr(self, '_check_hostname'): # python 3.x
2727 kwargs['check_hostname'] = self._check_hostname
2728
2729 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2730 if socks_proxy:
2731 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2732 del req.headers['Ytdl-socks-proxy']
2733
2734 return self.do_open(functools.partial(
2735 _create_http_connection, self, conn_class, True),
2736 req, **kwargs)
2737
2738
2739 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2740 """
2741 See [1] for cookie file format.
2742
2743 1. https://curl.haxx.se/docs/http-cookies.html
2744 """
2745 _HTTPONLY_PREFIX = '#HttpOnly_'
2746 _ENTRY_LEN = 7
2747 _HEADER = '''# Netscape HTTP Cookie File
2748 # This file is generated by youtube-dl. Do not edit.
2749
2750 '''
2751 _CookieFileEntry = collections.namedtuple(
2752 'CookieFileEntry',
2753 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2754
2755 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2756 """
2757 Save cookies to a file.
2758
2759 Most of the code is taken from CPython 3.8 and slightly adapted
2760 to support cookie files with UTF-8 in both python 2 and 3.
2761 """
2762 if filename is None:
2763 if self.filename is not None:
2764 filename = self.filename
2765 else:
2766 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2767
2768 # Store session cookies with `expires` set to 0 instead of an empty
2769 # string
2770 for cookie in self:
2771 if cookie.expires is None:
2772 cookie.expires = 0
2773
2774 with io.open(filename, 'w', encoding='utf-8') as f:
2775 f.write(self._HEADER)
2776 now = time.time()
2777 for cookie in self:
2778 if not ignore_discard and cookie.discard:
2779 continue
2780 if not ignore_expires and cookie.is_expired(now):
2781 continue
2782 if cookie.secure:
2783 secure = 'TRUE'
2784 else:
2785 secure = 'FALSE'
2786 if cookie.domain.startswith('.'):
2787 initial_dot = 'TRUE'
2788 else:
2789 initial_dot = 'FALSE'
2790 if cookie.expires is not None:
2791 expires = compat_str(cookie.expires)
2792 else:
2793 expires = ''
2794 if cookie.value is None:
2795 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2796 # with no name, whereas http.cookiejar regards it as a
2797 # cookie with no value.
2798 name = ''
2799 value = cookie.name
2800 else:
2801 name = cookie.name
2802 value = cookie.value
2803 f.write(
2804 '\t'.join([cookie.domain, initial_dot, cookie.path,
2805 secure, expires, name, value]) + '\n')
2806
2807 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2808 """Load cookies from a file."""
2809 if filename is None:
2810 if self.filename is not None:
2811 filename = self.filename
2812 else:
2813 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2814
2815 def prepare_line(line):
2816 if line.startswith(self._HTTPONLY_PREFIX):
2817 line = line[len(self._HTTPONLY_PREFIX):]
2818 # comments and empty lines are fine
2819 if line.startswith('#') or not line.strip():
2820 return line
2821 cookie_list = line.split('\t')
2822 if len(cookie_list) != self._ENTRY_LEN:
2823 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2824 cookie = self._CookieFileEntry(*cookie_list)
2825 if cookie.expires_at and not cookie.expires_at.isdigit():
2826 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2827 return line
2828
2829 cf = io.StringIO()
2830 with io.open(filename, encoding='utf-8') as f:
2831 for line in f:
2832 try:
2833 cf.write(prepare_line(line))
2834 except compat_cookiejar.LoadError as e:
2835 write_string(
2836 'WARNING: skipping cookie file entry due to %s: %r\n'
2837 % (e, line), sys.stderr)
2838 continue
2839 cf.seek(0)
2840 self._really_load(cf, filename, ignore_discard, ignore_expires)
2841 # Session cookies are denoted by either `expires` field set to
2842 # an empty string or 0. MozillaCookieJar only recognizes the former
2843 # (see [1]). So we need force the latter to be recognized as session
2844 # cookies on our own.
2845 # Session cookies may be important for cookies-based authentication,
2846 # e.g. usually, when user does not check 'Remember me' check box while
2847 # logging in on a site, some important cookies are stored as session
2848 # cookies so that not recognizing them will result in failed login.
2849 # 1. https://bugs.python.org/issue17164
2850 for cookie in self:
2851 # Treat `expires=0` cookies as session cookies
2852 if cookie.expires == 0:
2853 cookie.expires = None
2854 cookie.discard = True
2855
2856
2857 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2858 def __init__(self, cookiejar=None):
2859 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2860
2861 def http_response(self, request, response):
2862 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2863 # characters in Set-Cookie HTTP header of last response (see
2864 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2865 # In order to at least prevent crashing we will percent encode Set-Cookie
2866 # header before HTTPCookieProcessor starts processing it.
2867 # if sys.version_info < (3, 0) and response.headers:
2868 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2869 # set_cookie = response.headers.get(set_cookie_header)
2870 # if set_cookie:
2871 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2872 # if set_cookie != set_cookie_escaped:
2873 # del response.headers[set_cookie_header]
2874 # response.headers[set_cookie_header] = set_cookie_escaped
2875 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2876
2877 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2878 https_response = http_response
2879
2880
2881 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2882 if sys.version_info[0] < 3:
2883 def redirect_request(self, req, fp, code, msg, headers, newurl):
2884 # On python 2 urlh.geturl() may sometimes return redirect URL
2885 # as byte string instead of unicode. This workaround allows
2886 # to force it always return unicode.
2887 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2888
2889
2890 def extract_timezone(date_str):
2891 m = re.search(
2892 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2893 date_str)
2894 if not m:
2895 timezone = datetime.timedelta()
2896 else:
2897 date_str = date_str[:-len(m.group('tz'))]
2898 if not m.group('sign'):
2899 timezone = datetime.timedelta()
2900 else:
2901 sign = 1 if m.group('sign') == '+' else -1
2902 timezone = datetime.timedelta(
2903 hours=sign * int(m.group('hours')),
2904 minutes=sign * int(m.group('minutes')))
2905 return timezone, date_str
2906
2907
2908 def parse_iso8601(date_str, delimiter='T', timezone=None):
2909 """ Return a UNIX timestamp from the given date """
2910
2911 if date_str is None:
2912 return None
2913
2914 date_str = re.sub(r'\.[0-9]+', '', date_str)
2915
2916 if timezone is None:
2917 timezone, date_str = extract_timezone(date_str)
2918
2919 try:
2920 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2921 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2922 return calendar.timegm(dt.timetuple())
2923 except ValueError:
2924 pass
2925
2926
2927 def date_formats(day_first=True):
2928 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2929
2930
2931 def unified_strdate(date_str, day_first=True):
2932 """Return a string with the date in the format YYYYMMDD"""
2933
2934 if date_str is None:
2935 return None
2936 upload_date = None
2937 # Replace commas
2938 date_str = date_str.replace(',', ' ')
2939 # Remove AM/PM + timezone
2940 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2941 _, date_str = extract_timezone(date_str)
2942
2943 for expression in date_formats(day_first):
2944 try:
2945 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2946 except ValueError:
2947 pass
2948 if upload_date is None:
2949 timetuple = email.utils.parsedate_tz(date_str)
2950 if timetuple:
2951 try:
2952 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2953 except ValueError:
2954 pass
2955 if upload_date is not None:
2956 return compat_str(upload_date)
2957
2958
2959 def unified_timestamp(date_str, day_first=True):
2960 if date_str is None:
2961 return None
2962
2963 date_str = re.sub(r'[,|]', '', date_str)
2964
2965 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2966 timezone, date_str = extract_timezone(date_str)
2967
2968 # Remove AM/PM + timezone
2969 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2970
2971 # Remove unrecognized timezones from ISO 8601 alike timestamps
2972 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2973 if m:
2974 date_str = date_str[:-len(m.group('tz'))]
2975
2976 # Python only supports microseconds, so remove nanoseconds
2977 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2978 if m:
2979 date_str = m.group(1)
2980
2981 for expression in date_formats(day_first):
2982 try:
2983 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2984 return calendar.timegm(dt.timetuple())
2985 except ValueError:
2986 pass
2987 timetuple = email.utils.parsedate_tz(date_str)
2988 if timetuple:
2989 return calendar.timegm(timetuple) + pm_delta * 3600
2990
2991
2992 def determine_ext(url, default_ext='unknown_video'):
2993 if url is None or '.' not in url:
2994 return default_ext
2995 guess = url.partition('?')[0].rpartition('.')[2]
2996 if re.match(r'^[A-Za-z0-9]+$', guess):
2997 return guess
2998 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2999 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3000 return guess.rstrip('/')
3001 else:
3002 return default_ext
3003
3004
3005 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3006 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3007
3008
3009 def date_from_str(date_str):
3010 """
3011 Return a datetime object from a string in the format YYYYMMDD or
3012 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3013 today = datetime.date.today()
3014 if date_str in ('now', 'today'):
3015 return today
3016 if date_str == 'yesterday':
3017 return today - datetime.timedelta(days=1)
3018 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3019 if match is not None:
3020 sign = match.group('sign')
3021 time = int(match.group('time'))
3022 if sign == '-':
3023 time = -time
3024 unit = match.group('unit')
3025 # A bad approximation?
3026 if unit == 'month':
3027 unit = 'day'
3028 time *= 30
3029 elif unit == 'year':
3030 unit = 'day'
3031 time *= 365
3032 unit += 's'
3033 delta = datetime.timedelta(**{unit: time})
3034 return today + delta
3035 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3036
3037
3038 def hyphenate_date(date_str):
3039 """
3040 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3041 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3042 if match is not None:
3043 return '-'.join(match.groups())
3044 else:
3045 return date_str
3046
3047
3048 class DateRange(object):
3049 """Represents a time interval between two dates"""
3050
3051 def __init__(self, start=None, end=None):
3052 """start and end must be strings in the format accepted by date"""
3053 if start is not None:
3054 self.start = date_from_str(start)
3055 else:
3056 self.start = datetime.datetime.min.date()
3057 if end is not None:
3058 self.end = date_from_str(end)
3059 else:
3060 self.end = datetime.datetime.max.date()
3061 if self.start > self.end:
3062 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3063
3064 @classmethod
3065 def day(cls, day):
3066 """Returns a range that only contains the given day"""
3067 return cls(day, day)
3068
3069 def __contains__(self, date):
3070 """Check if the date is in the range"""
3071 if not isinstance(date, datetime.date):
3072 date = date_from_str(date)
3073 return self.start <= date <= self.end
3074
3075 def __str__(self):
3076 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3077
3078
3079 def platform_name():
3080 """ Returns the platform name as a compat_str """
3081 res = platform.platform()
3082 if isinstance(res, bytes):
3083 res = res.decode(preferredencoding())
3084
3085 assert isinstance(res, compat_str)
3086 return res
3087
3088
3089 def _windows_write_string(s, out):
3090 """ Returns True if the string was written using special methods,
3091 False if it has yet to be written out."""
3092 # Adapted from http://stackoverflow.com/a/3259271/35070
3093
3094 import ctypes
3095 import ctypes.wintypes
3096
3097 WIN_OUTPUT_IDS = {
3098 1: -11,
3099 2: -12,
3100 }
3101
3102 try:
3103 fileno = out.fileno()
3104 except AttributeError:
3105 # If the output stream doesn't have a fileno, it's virtual
3106 return False
3107 except io.UnsupportedOperation:
3108 # Some strange Windows pseudo files?
3109 return False
3110 if fileno not in WIN_OUTPUT_IDS:
3111 return False
3112
3113 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3114 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3115 ('GetStdHandle', ctypes.windll.kernel32))
3116 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3117
3118 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3119 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3120 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3121 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3122 written = ctypes.wintypes.DWORD(0)
3123
3124 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3125 FILE_TYPE_CHAR = 0x0002
3126 FILE_TYPE_REMOTE = 0x8000
3127 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3128 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3129 ctypes.POINTER(ctypes.wintypes.DWORD))(
3130 ('GetConsoleMode', ctypes.windll.kernel32))
3131 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3132
3133 def not_a_console(handle):
3134 if handle == INVALID_HANDLE_VALUE or handle is None:
3135 return True
3136 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3137 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3138
3139 if not_a_console(h):
3140 return False
3141
3142 def next_nonbmp_pos(s):
3143 try:
3144 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3145 except StopIteration:
3146 return len(s)
3147
3148 while s:
3149 count = min(next_nonbmp_pos(s), 1024)
3150
3151 ret = WriteConsoleW(
3152 h, s, count if count else 2, ctypes.byref(written), None)
3153 if ret == 0:
3154 raise OSError('Failed to write string')
3155 if not count: # We just wrote a non-BMP character
3156 assert written.value == 2
3157 s = s[1:]
3158 else:
3159 assert written.value > 0
3160 s = s[written.value:]
3161 return True
3162
3163
3164 def write_string(s, out=None, encoding=None):
3165 if out is None:
3166 out = sys.stderr
3167 assert type(s) == compat_str
3168
3169 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3170 if _windows_write_string(s, out):
3171 return
3172
3173 if ('b' in getattr(out, 'mode', '')
3174 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3175 byt = s.encode(encoding or preferredencoding(), 'ignore')
3176 out.write(byt)
3177 elif hasattr(out, 'buffer'):
3178 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3179 byt = s.encode(enc, 'ignore')
3180 out.buffer.write(byt)
3181 else:
3182 out.write(s)
3183 out.flush()
3184
3185
3186 def bytes_to_intlist(bs):
3187 if not bs:
3188 return []
3189 if isinstance(bs[0], int): # Python 3
3190 return list(bs)
3191 else:
3192 return [ord(c) for c in bs]
3193
3194
3195 def intlist_to_bytes(xs):
3196 if not xs:
3197 return b''
3198 return compat_struct_pack('%dB' % len(xs), *xs)
3199
3200
3201 # Cross-platform file locking
3202 if sys.platform == 'win32':
3203 import ctypes.wintypes
3204 import msvcrt
3205
3206 class OVERLAPPED(ctypes.Structure):
3207 _fields_ = [
3208 ('Internal', ctypes.wintypes.LPVOID),
3209 ('InternalHigh', ctypes.wintypes.LPVOID),
3210 ('Offset', ctypes.wintypes.DWORD),
3211 ('OffsetHigh', ctypes.wintypes.DWORD),
3212 ('hEvent', ctypes.wintypes.HANDLE),
3213 ]
3214
3215 kernel32 = ctypes.windll.kernel32
3216 LockFileEx = kernel32.LockFileEx
3217 LockFileEx.argtypes = [
3218 ctypes.wintypes.HANDLE, # hFile
3219 ctypes.wintypes.DWORD, # dwFlags
3220 ctypes.wintypes.DWORD, # dwReserved
3221 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3222 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3223 ctypes.POINTER(OVERLAPPED) # Overlapped
3224 ]
3225 LockFileEx.restype = ctypes.wintypes.BOOL
3226 UnlockFileEx = kernel32.UnlockFileEx
3227 UnlockFileEx.argtypes = [
3228 ctypes.wintypes.HANDLE, # hFile
3229 ctypes.wintypes.DWORD, # dwReserved
3230 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3231 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3232 ctypes.POINTER(OVERLAPPED) # Overlapped
3233 ]
3234 UnlockFileEx.restype = ctypes.wintypes.BOOL
3235 whole_low = 0xffffffff
3236 whole_high = 0x7fffffff
3237
3238 def _lock_file(f, exclusive):
3239 overlapped = OVERLAPPED()
3240 overlapped.Offset = 0
3241 overlapped.OffsetHigh = 0
3242 overlapped.hEvent = 0
3243 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3244 handle = msvcrt.get_osfhandle(f.fileno())
3245 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3246 whole_low, whole_high, f._lock_file_overlapped_p):
3247 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3248
3249 def _unlock_file(f):
3250 assert f._lock_file_overlapped_p
3251 handle = msvcrt.get_osfhandle(f.fileno())
3252 if not UnlockFileEx(handle, 0,
3253 whole_low, whole_high, f._lock_file_overlapped_p):
3254 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3255
3256 else:
3257 # Some platforms, such as Jython, is missing fcntl
3258 try:
3259 import fcntl
3260
3261 def _lock_file(f, exclusive):
3262 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3263
3264 def _unlock_file(f):
3265 fcntl.flock(f, fcntl.LOCK_UN)
3266 except ImportError:
3267 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3268
3269 def _lock_file(f, exclusive):
3270 raise IOError(UNSUPPORTED_MSG)
3271
3272 def _unlock_file(f):
3273 raise IOError(UNSUPPORTED_MSG)
3274
3275
3276 class locked_file(object):
3277 def __init__(self, filename, mode, encoding=None):
3278 assert mode in ['r', 'a', 'w']
3279 self.f = io.open(filename, mode, encoding=encoding)
3280 self.mode = mode
3281
3282 def __enter__(self):
3283 exclusive = self.mode != 'r'
3284 try:
3285 _lock_file(self.f, exclusive)
3286 except IOError:
3287 self.f.close()
3288 raise
3289 return self
3290
3291 def __exit__(self, etype, value, traceback):
3292 try:
3293 _unlock_file(self.f)
3294 finally:
3295 self.f.close()
3296
3297 def __iter__(self):
3298 return iter(self.f)
3299
3300 def write(self, *args):
3301 return self.f.write(*args)
3302
3303 def read(self, *args):
3304 return self.f.read(*args)
3305
3306
3307 def get_filesystem_encoding():
3308 encoding = sys.getfilesystemencoding()
3309 return encoding if encoding is not None else 'utf-8'
3310
3311
3312 def shell_quote(args):
3313 quoted_args = []
3314 encoding = get_filesystem_encoding()
3315 for a in args:
3316 if isinstance(a, bytes):
3317 # We may get a filename encoded with 'encodeFilename'
3318 a = a.decode(encoding)
3319 quoted_args.append(compat_shlex_quote(a))
3320 return ' '.join(quoted_args)
3321
3322
3323 def smuggle_url(url, data):
3324 """ Pass additional data in a URL for internal use. """
3325
3326 url, idata = unsmuggle_url(url, {})
3327 data.update(idata)
3328 sdata = compat_urllib_parse_urlencode(
3329 {'__youtubedl_smuggle': json.dumps(data)})
3330 return url + '#' + sdata
3331
3332
3333 def unsmuggle_url(smug_url, default=None):
3334 if '#__youtubedl_smuggle' not in smug_url:
3335 return smug_url, default
3336 url, _, sdata = smug_url.rpartition('#')
3337 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3338 data = json.loads(jsond)
3339 return url, data
3340
3341
3342 def format_bytes(bytes):
3343 if bytes is None:
3344 return 'N/A'
3345 if type(bytes) is str:
3346 bytes = float(bytes)
3347 if bytes == 0.0:
3348 exponent = 0
3349 else:
3350 exponent = int(math.log(bytes, 1024.0))
3351 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3352 converted = float(bytes) / float(1024 ** exponent)
3353 return '%.2f%s' % (converted, suffix)
3354
3355
3356 def lookup_unit_table(unit_table, s):
3357 units_re = '|'.join(re.escape(u) for u in unit_table)
3358 m = re.match(
3359 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3360 if not m:
3361 return None
3362 num_str = m.group('num').replace(',', '.')
3363 mult = unit_table[m.group('unit')]
3364 return int(float(num_str) * mult)
3365
3366
3367 def parse_filesize(s):
3368 if s is None:
3369 return None
3370
3371 # The lower-case forms are of course incorrect and unofficial,
3372 # but we support those too
3373 _UNIT_TABLE = {
3374 'B': 1,
3375 'b': 1,
3376 'bytes': 1,
3377 'KiB': 1024,
3378 'KB': 1000,
3379 'kB': 1024,
3380 'Kb': 1000,
3381 'kb': 1000,
3382 'kilobytes': 1000,
3383 'kibibytes': 1024,
3384 'MiB': 1024 ** 2,
3385 'MB': 1000 ** 2,
3386 'mB': 1024 ** 2,
3387 'Mb': 1000 ** 2,
3388 'mb': 1000 ** 2,
3389 'megabytes': 1000 ** 2,
3390 'mebibytes': 1024 ** 2,
3391 'GiB': 1024 ** 3,
3392 'GB': 1000 ** 3,
3393 'gB': 1024 ** 3,
3394 'Gb': 1000 ** 3,
3395 'gb': 1000 ** 3,
3396 'gigabytes': 1000 ** 3,
3397 'gibibytes': 1024 ** 3,
3398 'TiB': 1024 ** 4,
3399 'TB': 1000 ** 4,
3400 'tB': 1024 ** 4,
3401 'Tb': 1000 ** 4,
3402 'tb': 1000 ** 4,
3403 'terabytes': 1000 ** 4,
3404 'tebibytes': 1024 ** 4,
3405 'PiB': 1024 ** 5,
3406 'PB': 1000 ** 5,
3407 'pB': 1024 ** 5,
3408 'Pb': 1000 ** 5,
3409 'pb': 1000 ** 5,
3410 'petabytes': 1000 ** 5,
3411 'pebibytes': 1024 ** 5,
3412 'EiB': 1024 ** 6,
3413 'EB': 1000 ** 6,
3414 'eB': 1024 ** 6,
3415 'Eb': 1000 ** 6,
3416 'eb': 1000 ** 6,
3417 'exabytes': 1000 ** 6,
3418 'exbibytes': 1024 ** 6,
3419 'ZiB': 1024 ** 7,
3420 'ZB': 1000 ** 7,
3421 'zB': 1024 ** 7,
3422 'Zb': 1000 ** 7,
3423 'zb': 1000 ** 7,
3424 'zettabytes': 1000 ** 7,
3425 'zebibytes': 1024 ** 7,
3426 'YiB': 1024 ** 8,
3427 'YB': 1000 ** 8,
3428 'yB': 1024 ** 8,
3429 'Yb': 1000 ** 8,
3430 'yb': 1000 ** 8,
3431 'yottabytes': 1000 ** 8,
3432 'yobibytes': 1024 ** 8,
3433 }
3434
3435 return lookup_unit_table(_UNIT_TABLE, s)
3436
3437
3438 def parse_count(s):
3439 if s is None:
3440 return None
3441
3442 s = s.strip()
3443
3444 if re.match(r'^[\d,.]+$', s):
3445 return str_to_int(s)
3446
3447 _UNIT_TABLE = {
3448 'k': 1000,
3449 'K': 1000,
3450 'm': 1000 ** 2,
3451 'M': 1000 ** 2,
3452 'kk': 1000 ** 2,
3453 'KK': 1000 ** 2,
3454 }
3455
3456 return lookup_unit_table(_UNIT_TABLE, s)
3457
3458
3459 def parse_resolution(s):
3460 if s is None:
3461 return {}
3462
3463 mobj = re.search(r'\b(?P<w>\d+)\s*[xXƗ]\s*(?P<h>\d+)\b', s)
3464 if mobj:
3465 return {
3466 'width': int(mobj.group('w')),
3467 'height': int(mobj.group('h')),
3468 }
3469
3470 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3471 if mobj:
3472 return {'height': int(mobj.group(1))}
3473
3474 mobj = re.search(r'\b([48])[kK]\b', s)
3475 if mobj:
3476 return {'height': int(mobj.group(1)) * 540}
3477
3478 return {}
3479
3480
3481 def parse_bitrate(s):
3482 if not isinstance(s, compat_str):
3483 return
3484 mobj = re.search(r'\b(\d+)\s*kbps', s)
3485 if mobj:
3486 return int(mobj.group(1))
3487
3488
3489 def month_by_name(name, lang='en'):
3490 """ Return the number of a month by (locale-independently) English name """
3491
3492 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3493
3494 try:
3495 return month_names.index(name) + 1
3496 except ValueError:
3497 return None
3498
3499
3500 def month_by_abbreviation(abbrev):
3501 """ Return the number of a month by (locale-independently) English
3502 abbreviations """
3503
3504 try:
3505 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3506 except ValueError:
3507 return None
3508
3509
3510 def fix_xml_ampersands(xml_str):
3511 """Replace all the '&' by '&amp;' in XML"""
3512 return re.sub(
3513 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3514 '&amp;',
3515 xml_str)
3516
3517
3518 def setproctitle(title):
3519 assert isinstance(title, compat_str)
3520
3521 # ctypes in Jython is not complete
3522 # http://bugs.jython.org/issue2148
3523 if sys.platform.startswith('java'):
3524 return
3525
3526 try:
3527 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3528 except OSError:
3529 return
3530 except TypeError:
3531 # LoadLibrary in Windows Python 2.7.13 only expects
3532 # a bytestring, but since unicode_literals turns
3533 # every string into a unicode string, it fails.
3534 return
3535 title_bytes = title.encode('utf-8')
3536 buf = ctypes.create_string_buffer(len(title_bytes))
3537 buf.value = title_bytes
3538 try:
3539 libc.prctl(15, buf, 0, 0, 0)
3540 except AttributeError:
3541 return # Strange libc, just skip this
3542
3543
3544 def remove_start(s, start):
3545 return s[len(start):] if s is not None and s.startswith(start) else s
3546
3547
3548 def remove_end(s, end):
3549 return s[:-len(end)] if s is not None and s.endswith(end) else s
3550
3551
3552 def remove_quotes(s):
3553 if s is None or len(s) < 2:
3554 return s
3555 for quote in ('"', "'", ):
3556 if s[0] == quote and s[-1] == quote:
3557 return s[1:-1]
3558 return s
3559
3560
3561 def url_basename(url):
3562 path = compat_urlparse.urlparse(url).path
3563 return path.strip('/').split('/')[-1]
3564
3565
3566 def base_url(url):
3567 return re.match(r'https?://[^?#&]+/', url).group()
3568
3569
3570 def urljoin(base, path):
3571 if isinstance(path, bytes):
3572 path = path.decode('utf-8')
3573 if not isinstance(path, compat_str) or not path:
3574 return None
3575 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3576 return path
3577 if isinstance(base, bytes):
3578 base = base.decode('utf-8')
3579 if not isinstance(base, compat_str) or not re.match(
3580 r'^(?:https?:)?//', base):
3581 return None
3582 return compat_urlparse.urljoin(base, path)
3583
3584
3585 class HEADRequest(compat_urllib_request.Request):
3586 def get_method(self):
3587 return 'HEAD'
3588
3589
3590 class PUTRequest(compat_urllib_request.Request):
3591 def get_method(self):
3592 return 'PUT'
3593
3594
3595 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3596 if get_attr:
3597 if v is not None:
3598 v = getattr(v, get_attr, None)
3599 if v == '':
3600 v = None
3601 if v is None:
3602 return default
3603 try:
3604 return int(v) * invscale // scale
3605 except (ValueError, TypeError):
3606 return default
3607
3608
3609 def str_or_none(v, default=None):
3610 return default if v is None else compat_str(v)
3611
3612
3613 def str_to_int(int_str):
3614 """ A more relaxed version of int_or_none """
3615 if isinstance(int_str, compat_integer_types):
3616 return int_str
3617 elif isinstance(int_str, compat_str):
3618 int_str = re.sub(r'[,\.\+]', '', int_str)
3619 return int_or_none(int_str)
3620
3621
3622 def float_or_none(v, scale=1, invscale=1, default=None):
3623 if v is None:
3624 return default
3625 try:
3626 return float(v) * invscale / scale
3627 except (ValueError, TypeError):
3628 return default
3629
3630
3631 def bool_or_none(v, default=None):
3632 return v if isinstance(v, bool) else default
3633
3634
3635 def strip_or_none(v, default=None):
3636 return v.strip() if isinstance(v, compat_str) else default
3637
3638
3639 def url_or_none(url):
3640 if not url or not isinstance(url, compat_str):
3641 return None
3642 url = url.strip()
3643 return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3644
3645
3646 def parse_duration(s):
3647 if not isinstance(s, compat_basestring):
3648 return None
3649
3650 s = s.strip()
3651
3652 days, hours, mins, secs, ms = [None] * 5
3653 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3654 if m:
3655 days, hours, mins, secs, ms = m.groups()
3656 else:
3657 m = re.match(
3658 r'''(?ix)(?:P?
3659 (?:
3660 [0-9]+\s*y(?:ears?)?\s*
3661 )?
3662 (?:
3663 [0-9]+\s*m(?:onths?)?\s*
3664 )?
3665 (?:
3666 [0-9]+\s*w(?:eeks?)?\s*
3667 )?
3668 (?:
3669 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3670 )?
3671 T)?
3672 (?:
3673 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3674 )?
3675 (?:
3676 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3677 )?
3678 (?:
3679 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3680 )?Z?$''', s)
3681 if m:
3682 days, hours, mins, secs, ms = m.groups()
3683 else:
3684 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3685 if m:
3686 hours, mins = m.groups()
3687 else:
3688 return None
3689
3690 duration = 0
3691 if secs:
3692 duration += float(secs)
3693 if mins:
3694 duration += float(mins) * 60
3695 if hours:
3696 duration += float(hours) * 60 * 60
3697 if days:
3698 duration += float(days) * 24 * 60 * 60
3699 if ms:
3700 duration += float(ms)
3701 return duration
3702
3703
3704 def prepend_extension(filename, ext, expected_real_ext=None):
3705 name, real_ext = os.path.splitext(filename)
3706 return (
3707 '{0}.{1}{2}'.format(name, ext, real_ext)
3708 if not expected_real_ext or real_ext[1:] == expected_real_ext
3709 else '{0}.{1}'.format(filename, ext))
3710
3711
3712 def replace_extension(filename, ext, expected_real_ext=None):
3713 name, real_ext = os.path.splitext(filename)
3714 return '{0}.{1}'.format(
3715 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3716 ext)
3717
3718
3719 def check_executable(exe, args=[]):
3720 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3721 args can be a list of arguments for a short output (like -version) """
3722 try:
3723 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3724 except OSError:
3725 return False
3726 return exe
3727
3728
3729 def get_exe_version(exe, args=['--version'],
3730 version_re=None, unrecognized='present'):
3731 """ Returns the version of the specified executable,
3732 or False if the executable is not present """
3733 try:
3734 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3735 # SIGTTOU if youtube-dl is run in the background.
3736 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3737 out, _ = subprocess.Popen(
3738 [encodeArgument(exe)] + args,
3739 stdin=subprocess.PIPE,
3740 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3741 except OSError:
3742 return False
3743 if isinstance(out, bytes): # Python 2.x
3744 out = out.decode('ascii', 'ignore')
3745 return detect_exe_version(out, version_re, unrecognized)
3746
3747
3748 def detect_exe_version(output, version_re=None, unrecognized='present'):
3749 assert isinstance(output, compat_str)
3750 if version_re is None:
3751 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3752 m = re.search(version_re, output)
3753 if m:
3754 return m.group(1)
3755 else:
3756 return unrecognized
3757
3758
3759 class PagedList(object):
3760 def __len__(self):
3761 # This is only useful for tests
3762 return len(self.getslice())
3763
3764
3765 class OnDemandPagedList(PagedList):
3766 def __init__(self, pagefunc, pagesize, use_cache=True):
3767 self._pagefunc = pagefunc
3768 self._pagesize = pagesize
3769 self._use_cache = use_cache
3770 if use_cache:
3771 self._cache = {}
3772
3773 def getslice(self, start=0, end=None):
3774 res = []
3775 for pagenum in itertools.count(start // self._pagesize):
3776 firstid = pagenum * self._pagesize
3777 nextfirstid = pagenum * self._pagesize + self._pagesize
3778 if start >= nextfirstid:
3779 continue
3780
3781 page_results = None
3782 if self._use_cache:
3783 page_results = self._cache.get(pagenum)
3784 if page_results is None:
3785 page_results = list(self._pagefunc(pagenum))
3786 if self._use_cache:
3787 self._cache[pagenum] = page_results
3788
3789 startv = (
3790 start % self._pagesize
3791 if firstid <= start < nextfirstid
3792 else 0)
3793
3794 endv = (
3795 ((end - 1) % self._pagesize) + 1
3796 if (end is not None and firstid <= end <= nextfirstid)
3797 else None)
3798
3799 if startv != 0 or endv is not None:
3800 page_results = page_results[startv:endv]
3801 res.extend(page_results)
3802
3803 # A little optimization - if current page is not "full", ie. does
3804 # not contain page_size videos then we can assume that this page
3805 # is the last one - there are no more ids on further pages -
3806 # i.e. no need to query again.
3807 if len(page_results) + startv < self._pagesize:
3808 break
3809
3810 # If we got the whole page, but the next page is not interesting,
3811 # break out early as well
3812 if end == nextfirstid:
3813 break
3814 return res
3815
3816
3817 class InAdvancePagedList(PagedList):
3818 def __init__(self, pagefunc, pagecount, pagesize):
3819 self._pagefunc = pagefunc
3820 self._pagecount = pagecount
3821 self._pagesize = pagesize
3822
3823 def getslice(self, start=0, end=None):
3824 res = []
3825 start_page = start // self._pagesize
3826 end_page = (
3827 self._pagecount if end is None else (end // self._pagesize + 1))
3828 skip_elems = start - start_page * self._pagesize
3829 only_more = None if end is None else end - start
3830 for pagenum in range(start_page, end_page):
3831 page = list(self._pagefunc(pagenum))
3832 if skip_elems:
3833 page = page[skip_elems:]
3834 skip_elems = None
3835 if only_more is not None:
3836 if len(page) < only_more:
3837 only_more -= len(page)
3838 else:
3839 page = page[:only_more]
3840 res.extend(page)
3841 break
3842 res.extend(page)
3843 return res
3844
3845
3846 def uppercase_escape(s):
3847 unicode_escape = codecs.getdecoder('unicode_escape')
3848 return re.sub(
3849 r'\\U[0-9a-fA-F]{8}',
3850 lambda m: unicode_escape(m.group(0))[0],
3851 s)
3852
3853
3854 def lowercase_escape(s):
3855 unicode_escape = codecs.getdecoder('unicode_escape')
3856 return re.sub(
3857 r'\\u[0-9a-fA-F]{4}',
3858 lambda m: unicode_escape(m.group(0))[0],
3859 s)
3860
3861
3862 def escape_rfc3986(s):
3863 """Escape non-ASCII characters as suggested by RFC 3986"""
3864 if sys.version_info < (3, 0) and isinstance(s, compat_str):
3865 s = s.encode('utf-8')
3866 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3867
3868
3869 def escape_url(url):
3870 """Escape URL as suggested by RFC 3986"""
3871 url_parsed = compat_urllib_parse_urlparse(url)
3872 return url_parsed._replace(
3873 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3874 path=escape_rfc3986(url_parsed.path),
3875 params=escape_rfc3986(url_parsed.params),
3876 query=escape_rfc3986(url_parsed.query),
3877 fragment=escape_rfc3986(url_parsed.fragment)
3878 ).geturl()
3879
3880
3881 def read_batch_urls(batch_fd):
3882 def fixup(url):
3883 if not isinstance(url, compat_str):
3884 url = url.decode('utf-8', 'replace')
3885 BOM_UTF8 = '\xef\xbb\xbf'
3886 if url.startswith(BOM_UTF8):
3887 url = url[len(BOM_UTF8):]
3888 url = url.strip()
3889 if url.startswith(('#', ';', ']')):
3890 return False
3891 return url
3892
3893 with contextlib.closing(batch_fd) as fd:
3894 return [url for url in map(fixup, fd) if url]
3895
3896
3897 def urlencode_postdata(*args, **kargs):
3898 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3899
3900
3901 def update_url_query(url, query):
3902 if not query:
3903 return url
3904 parsed_url = compat_urlparse.urlparse(url)
3905 qs = compat_parse_qs(parsed_url.query)
3906 qs.update(query)
3907 return compat_urlparse.urlunparse(parsed_url._replace(
3908 query=compat_urllib_parse_urlencode(qs, True)))
3909
3910
3911 def update_Request(req, url=None, data=None, headers={}, query={}):
3912 req_headers = req.headers.copy()
3913 req_headers.update(headers)
3914 req_data = data or req.data
3915 req_url = update_url_query(url or req.get_full_url(), query)
3916 req_get_method = req.get_method()
3917 if req_get_method == 'HEAD':
3918 req_type = HEADRequest
3919 elif req_get_method == 'PUT':
3920 req_type = PUTRequest
3921 else:
3922 req_type = compat_urllib_request.Request
3923 new_req = req_type(
3924 req_url, data=req_data, headers=req_headers,
3925 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3926 if hasattr(req, 'timeout'):
3927 new_req.timeout = req.timeout
3928 return new_req
3929
3930
3931 def _multipart_encode_impl(data, boundary):
3932 content_type = 'multipart/form-data; boundary=%s' % boundary
3933
3934 out = b''
3935 for k, v in data.items():
3936 out += b'--' + boundary.encode('ascii') + b'\r\n'
3937 if isinstance(k, compat_str):
3938 k = k.encode('utf-8')
3939 if isinstance(v, compat_str):
3940 v = v.encode('utf-8')
3941 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3942 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3943 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3944 if boundary.encode('ascii') in content:
3945 raise ValueError('Boundary overlaps with data')
3946 out += content
3947
3948 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3949
3950 return out, content_type
3951
3952
3953 def multipart_encode(data, boundary=None):
3954 '''
3955 Encode a dict to RFC 7578-compliant form-data
3956
3957 data:
3958 A dict where keys and values can be either Unicode or bytes-like
3959 objects.
3960 boundary:
3961 If specified a Unicode object, it's used as the boundary. Otherwise
3962 a random boundary is generated.
3963
3964 Reference: https://tools.ietf.org/html/rfc7578
3965 '''
3966 has_specified_boundary = boundary is not None
3967
3968 while True:
3969 if boundary is None:
3970 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3971
3972 try:
3973 out, content_type = _multipart_encode_impl(data, boundary)
3974 break
3975 except ValueError:
3976 if has_specified_boundary:
3977 raise
3978 boundary = None
3979
3980 return out, content_type
3981
3982
3983 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3984 if isinstance(key_or_keys, (list, tuple)):
3985 for key in key_or_keys:
3986 if key not in d or d[key] is None or skip_false_values and not d[key]:
3987 continue
3988 return d[key]
3989 return default
3990 return d.get(key_or_keys, default)
3991
3992
3993 def try_get(src, getter, expected_type=None):
3994 if not isinstance(getter, (list, tuple)):
3995 getter = [getter]
3996 for get in getter:
3997 try:
3998 v = get(src)
3999 except (AttributeError, KeyError, TypeError, IndexError):
4000 pass
4001 else:
4002 if expected_type is None or isinstance(v, expected_type):
4003 return v
4004
4005
4006 def merge_dicts(*dicts):
4007 merged = {}
4008 for a_dict in dicts:
4009 for k, v in a_dict.items():
4010 if v is None:
4011 continue
4012 if (k not in merged
4013 or (isinstance(v, compat_str) and v
4014 and isinstance(merged[k], compat_str)
4015 and not merged[k])):
4016 merged[k] = v
4017 return merged
4018
4019
4020 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4021 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4022
4023
4024 US_RATINGS = {
4025 'G': 0,
4026 'PG': 10,
4027 'PG-13': 13,
4028 'R': 16,
4029 'NC': 18,
4030 }
4031
4032
4033 TV_PARENTAL_GUIDELINES = {
4034 'TV-Y': 0,
4035 'TV-Y7': 7,
4036 'TV-G': 0,
4037 'TV-PG': 0,
4038 'TV-14': 14,
4039 'TV-MA': 17,
4040 }
4041
4042
4043 def parse_age_limit(s):
4044 if type(s) == int:
4045 return s if 0 <= s <= 21 else None
4046 if not isinstance(s, compat_basestring):
4047 return None
4048 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4049 if m:
4050 return int(m.group('age'))
4051 if s in US_RATINGS:
4052 return US_RATINGS[s]
4053 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4054 if m:
4055 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4056 return None
4057
4058
4059 def strip_jsonp(code):
4060 return re.sub(
4061 r'''(?sx)^
4062 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4063 (?:\s*&&\s*(?P=func_name))?
4064 \s*\(\s*(?P<callback_data>.*)\);?
4065 \s*?(?://[^\n]*)*$''',
4066 r'\g<callback_data>', code)
4067
4068
4069 def js_to_json(code):
4070 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4071 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4072 INTEGER_TABLE = (
4073 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4074 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4075 )
4076
4077 def fix_kv(m):
4078 v = m.group(0)
4079 if v in ('true', 'false', 'null'):
4080 return v
4081 elif v.startswith('/*') or v.startswith('//') or v == ',':
4082 return ""
4083
4084 if v[0] in ("'", '"'):
4085 v = re.sub(r'(?s)\\.|"', lambda m: {
4086 '"': '\\"',
4087 "\\'": "'",
4088 '\\\n': '',
4089 '\\x': '\\u00',
4090 }.get(m.group(0), m.group(0)), v[1:-1])
4091
4092 for regex, base in INTEGER_TABLE:
4093 im = re.match(regex, v)
4094 if im:
4095 i = int(im.group(1), base)
4096 return '"%d":' % i if v.endswith(':') else '%d' % i
4097
4098 return '"%s"' % v
4099
4100 return re.sub(r'''(?sx)
4101 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4102 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4103 {comment}|,(?={skip}[\]}}])|
4104 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4105 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4106 [0-9]+(?={skip}:)
4107 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4108
4109
4110 def qualities(quality_ids):
4111 """ Get a numeric quality value out of a list of possible values """
4112 def q(qid):
4113 try:
4114 return quality_ids.index(qid)
4115 except ValueError:
4116 return -1
4117 return q
4118
4119
4120 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4121
4122
4123 def limit_length(s, length):
4124 """ Add ellipses to overly long strings """
4125 if s is None:
4126 return None
4127 ELLIPSES = '...'
4128 if len(s) > length:
4129 return s[:length - len(ELLIPSES)] + ELLIPSES
4130 return s
4131
4132
4133 def version_tuple(v):
4134 return tuple(int(e) for e in re.split(r'[-.]', v))
4135
4136
4137 def is_outdated_version(version, limit, assume_new=True):
4138 if not version:
4139 return not assume_new
4140 try:
4141 return version_tuple(version) < version_tuple(limit)
4142 except ValueError:
4143 return not assume_new
4144
4145
4146 def ytdl_is_updateable():
4147 """ Returns if youtube-dl can be updated with -U """
4148 from zipimport import zipimporter
4149
4150 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4151
4152
4153 def args_to_str(args):
4154 # Get a short string representation for a subprocess command
4155 return ' '.join(compat_shlex_quote(a) for a in args)
4156
4157
4158 def error_to_compat_str(err):
4159 err_str = str(err)
4160 # On python 2 error byte string must be decoded with proper
4161 # encoding rather than ascii
4162 if sys.version_info[0] < 3:
4163 err_str = err_str.decode(preferredencoding())
4164 return err_str
4165
4166
4167 def mimetype2ext(mt):
4168 if mt is None:
4169 return None
4170
4171 ext = {
4172 'audio/mp4': 'm4a',
4173 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4174 # it's the most popular one
4175 'audio/mpeg': 'mp3',
4176 }.get(mt)
4177 if ext is not None:
4178 return ext
4179
4180 _, _, res = mt.rpartition('/')
4181 res = res.split(';')[0].strip().lower()
4182
4183 return {
4184 '3gpp': '3gp',
4185 'smptett+xml': 'tt',
4186 'ttaf+xml': 'dfxp',
4187 'ttml+xml': 'ttml',
4188 'x-flv': 'flv',
4189 'x-mp4-fragmented': 'mp4',
4190 'x-ms-sami': 'sami',
4191 'x-ms-wmv': 'wmv',
4192 'mpegurl': 'm3u8',
4193 'x-mpegurl': 'm3u8',
4194 'vnd.apple.mpegurl': 'm3u8',
4195 'dash+xml': 'mpd',
4196 'f4m+xml': 'f4m',
4197 'hds+xml': 'f4m',
4198 'vnd.ms-sstr+xml': 'ism',
4199 'quicktime': 'mov',
4200 'mp2t': 'ts',
4201 }.get(res, res)
4202
4203
4204 def parse_codecs(codecs_str):
4205 # http://tools.ietf.org/html/rfc6381
4206 if not codecs_str:
4207 return {}
4208 splited_codecs = list(filter(None, map(
4209 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4210 vcodec, acodec = None, None
4211 for full_codec in splited_codecs:
4212 codec = full_codec.split('.')[0]
4213 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4214 if not vcodec:
4215 vcodec = full_codec
4216 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4217 if not acodec:
4218 acodec = full_codec
4219 else:
4220 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4221 if not vcodec and not acodec:
4222 if len(splited_codecs) == 2:
4223 return {
4224 'vcodec': splited_codecs[0],
4225 'acodec': splited_codecs[1],
4226 }
4227 else:
4228 return {
4229 'vcodec': vcodec or 'none',
4230 'acodec': acodec or 'none',
4231 }
4232 return {}
4233
4234
4235 def urlhandle_detect_ext(url_handle):
4236 getheader = url_handle.headers.get
4237
4238 cd = getheader('Content-Disposition')
4239 if cd:
4240 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4241 if m:
4242 e = determine_ext(m.group('filename'), default_ext=None)
4243 if e:
4244 return e
4245
4246 return mimetype2ext(getheader('Content-Type'))
4247
4248
4249 def encode_data_uri(data, mime_type):
4250 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4251
4252
4253 def age_restricted(content_limit, age_limit):
4254 """ Returns True iff the content should be blocked """
4255
4256 if age_limit is None: # No limit set
4257 return False
4258 if content_limit is None:
4259 return False # Content available for everyone
4260 return age_limit < content_limit
4261
4262
4263 def is_html(first_bytes):
4264 """ Detect whether a file contains HTML by examining its first bytes. """
4265
4266 BOMS = [
4267 (b'\xef\xbb\xbf', 'utf-8'),
4268 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4269 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4270 (b'\xff\xfe', 'utf-16-le'),
4271 (b'\xfe\xff', 'utf-16-be'),
4272 ]
4273 for bom, enc in BOMS:
4274 if first_bytes.startswith(bom):
4275 s = first_bytes[len(bom):].decode(enc, 'replace')
4276 break
4277 else:
4278 s = first_bytes.decode('utf-8', 'replace')
4279
4280 return re.match(r'^\s*<', s)
4281
4282
4283 def determine_protocol(info_dict):
4284 protocol = info_dict.get('protocol')
4285 if protocol is not None:
4286 return protocol
4287
4288 url = info_dict['url']
4289 if url.startswith('rtmp'):
4290 return 'rtmp'
4291 elif url.startswith('mms'):
4292 return 'mms'
4293 elif url.startswith('rtsp'):
4294 return 'rtsp'
4295
4296 ext = determine_ext(url)
4297 if ext == 'm3u8':
4298 return 'm3u8'
4299 elif ext == 'f4m':
4300 return 'f4m'
4301
4302 return compat_urllib_parse_urlparse(url).scheme
4303
4304
4305 def render_table(header_row, data):
4306 """ Render a list of rows, each as a list of values """
4307 table = [header_row] + data
4308 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4309 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4310 return '\n'.join(format_str % tuple(row) for row in table)
4311
4312
4313 def _match_one(filter_part, dct):
4314 COMPARISON_OPERATORS = {
4315 '<': operator.lt,
4316 '<=': operator.le,
4317 '>': operator.gt,
4318 '>=': operator.ge,
4319 '=': operator.eq,
4320 '!=': operator.ne,
4321 }
4322 operator_rex = re.compile(r'''(?x)\s*
4323 (?P<key>[a-z_]+)
4324 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4325 (?:
4326 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4327 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4328 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4329 )
4330 \s*$
4331 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4332 m = operator_rex.search(filter_part)
4333 if m:
4334 op = COMPARISON_OPERATORS[m.group('op')]
4335 actual_value = dct.get(m.group('key'))
4336 if (m.group('quotedstrval') is not None
4337 or m.group('strval') is not None
4338 # If the original field is a string and matching comparisonvalue is
4339 # a number we should respect the origin of the original field
4340 # and process comparison value as a string (see
4341 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4342 or actual_value is not None and m.group('intval') is not None
4343 and isinstance(actual_value, compat_str)):
4344 if m.group('op') not in ('=', '!='):
4345 raise ValueError(
4346 'Operator %s does not support string values!' % m.group('op'))
4347 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4348 quote = m.group('quote')
4349 if quote is not None:
4350 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4351 else:
4352 try:
4353 comparison_value = int(m.group('intval'))
4354 except ValueError:
4355 comparison_value = parse_filesize(m.group('intval'))
4356 if comparison_value is None:
4357 comparison_value = parse_filesize(m.group('intval') + 'B')
4358 if comparison_value is None:
4359 raise ValueError(
4360 'Invalid integer value %r in filter part %r' % (
4361 m.group('intval'), filter_part))
4362 if actual_value is None:
4363 return m.group('none_inclusive')
4364 return op(actual_value, comparison_value)
4365
4366 UNARY_OPERATORS = {
4367 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4368 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4369 }
4370 operator_rex = re.compile(r'''(?x)\s*
4371 (?P<op>%s)\s*(?P<key>[a-z_]+)
4372 \s*$
4373 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4374 m = operator_rex.search(filter_part)
4375 if m:
4376 op = UNARY_OPERATORS[m.group('op')]
4377 actual_value = dct.get(m.group('key'))
4378 return op(actual_value)
4379
4380 raise ValueError('Invalid filter part %r' % filter_part)
4381
4382
4383 def match_str(filter_str, dct):
4384 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4385
4386 return all(
4387 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4388
4389
4390 def match_filter_func(filter_str):
4391 def _match_func(info_dict):
4392 if match_str(filter_str, info_dict):
4393 return None
4394 else:
4395 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4396 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4397 return _match_func
4398
4399
4400 def parse_dfxp_time_expr(time_expr):
4401 if not time_expr:
4402 return
4403
4404 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4405 if mobj:
4406 return float(mobj.group('time_offset'))
4407
4408 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4409 if mobj:
4410 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4411
4412
4413 def srt_subtitles_timecode(seconds):
4414 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4415
4416
4417 def dfxp2srt(dfxp_data):
4418 '''
4419 @param dfxp_data A bytes-like object containing DFXP data
4420 @returns A unicode object containing converted SRT data
4421 '''
4422 LEGACY_NAMESPACES = (
4423 (b'http://www.w3.org/ns/ttml', [
4424 b'http://www.w3.org/2004/11/ttaf1',
4425 b'http://www.w3.org/2006/04/ttaf1',
4426 b'http://www.w3.org/2006/10/ttaf1',
4427 ]),
4428 (b'http://www.w3.org/ns/ttml#styling', [
4429 b'http://www.w3.org/ns/ttml#style',
4430 ]),
4431 )
4432
4433 SUPPORTED_STYLING = [
4434 'color',
4435 'fontFamily',
4436 'fontSize',
4437 'fontStyle',
4438 'fontWeight',
4439 'textDecoration'
4440 ]
4441
4442 _x = functools.partial(xpath_with_ns, ns_map={
4443 'xml': 'http://www.w3.org/XML/1998/namespace',
4444 'ttml': 'http://www.w3.org/ns/ttml',
4445 'tts': 'http://www.w3.org/ns/ttml#styling',
4446 })
4447
4448 styles = {}
4449 default_style = {}
4450
4451 class TTMLPElementParser(object):
4452 _out = ''
4453 _unclosed_elements = []
4454 _applied_styles = []
4455
4456 def start(self, tag, attrib):
4457 if tag in (_x('ttml:br'), 'br'):
4458 self._out += '\n'
4459 else:
4460 unclosed_elements = []
4461 style = {}
4462 element_style_id = attrib.get('style')
4463 if default_style:
4464 style.update(default_style)
4465 if element_style_id:
4466 style.update(styles.get(element_style_id, {}))
4467 for prop in SUPPORTED_STYLING:
4468 prop_val = attrib.get(_x('tts:' + prop))
4469 if prop_val:
4470 style[prop] = prop_val
4471 if style:
4472 font = ''
4473 for k, v in sorted(style.items()):
4474 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4475 continue
4476 if k == 'color':
4477 font += ' color="%s"' % v
4478 elif k == 'fontSize':
4479 font += ' size="%s"' % v
4480 elif k == 'fontFamily':
4481 font += ' face="%s"' % v
4482 elif k == 'fontWeight' and v == 'bold':
4483 self._out += '<b>'
4484 unclosed_elements.append('b')
4485 elif k == 'fontStyle' and v == 'italic':
4486 self._out += '<i>'
4487 unclosed_elements.append('i')
4488 elif k == 'textDecoration' and v == 'underline':
4489 self._out += '<u>'
4490 unclosed_elements.append('u')
4491 if font:
4492 self._out += '<font' + font + '>'
4493 unclosed_elements.append('font')
4494 applied_style = {}
4495 if self._applied_styles:
4496 applied_style.update(self._applied_styles[-1])
4497 applied_style.update(style)
4498 self._applied_styles.append(applied_style)
4499 self._unclosed_elements.append(unclosed_elements)
4500
4501 def end(self, tag):
4502 if tag not in (_x('ttml:br'), 'br'):
4503 unclosed_elements = self._unclosed_elements.pop()
4504 for element in reversed(unclosed_elements):
4505 self._out += '</%s>' % element
4506 if unclosed_elements and self._applied_styles:
4507 self._applied_styles.pop()
4508
4509 def data(self, data):
4510 self._out += data
4511
4512 def close(self):
4513 return self._out.strip()
4514
4515 def parse_node(node):
4516 target = TTMLPElementParser()
4517 parser = xml.etree.ElementTree.XMLParser(target=target)
4518 parser.feed(xml.etree.ElementTree.tostring(node))
4519 return parser.close()
4520
4521 for k, v in LEGACY_NAMESPACES:
4522 for ns in v:
4523 dfxp_data = dfxp_data.replace(ns, k)
4524
4525 dfxp = compat_etree_fromstring(dfxp_data)
4526 out = []
4527 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4528
4529 if not paras:
4530 raise ValueError('Invalid dfxp/TTML subtitle')
4531
4532 repeat = False
4533 while True:
4534 for style in dfxp.findall(_x('.//ttml:style')):
4535 style_id = style.get('id') or style.get(_x('xml:id'))
4536 if not style_id:
4537 continue
4538 parent_style_id = style.get('style')
4539 if parent_style_id:
4540 if parent_style_id not in styles:
4541 repeat = True
4542 continue
4543 styles[style_id] = styles[parent_style_id].copy()
4544 for prop in SUPPORTED_STYLING:
4545 prop_val = style.get(_x('tts:' + prop))
4546 if prop_val:
4547 styles.setdefault(style_id, {})[prop] = prop_val
4548 if repeat:
4549 repeat = False
4550 else:
4551 break
4552
4553 for p in ('body', 'div'):
4554 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4555 if ele is None:
4556 continue
4557 style = styles.get(ele.get('style'))
4558 if not style:
4559 continue
4560 default_style.update(style)
4561
4562 for para, index in zip(paras, itertools.count(1)):
4563 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4564 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4565 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4566 if begin_time is None:
4567 continue
4568 if not end_time:
4569 if not dur:
4570 continue
4571 end_time = begin_time + dur
4572 out.append('%d\n%s --> %s\n%s\n\n' % (
4573 index,
4574 srt_subtitles_timecode(begin_time),
4575 srt_subtitles_timecode(end_time),
4576 parse_node(para)))
4577
4578 return ''.join(out)
4579
4580
4581 def cli_option(params, command_option, param):
4582 param = params.get(param)
4583 if param:
4584 param = compat_str(param)
4585 return [command_option, param] if param is not None else []
4586
4587
4588 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4589 param = params.get(param)
4590 if param is None:
4591 return []
4592 assert isinstance(param, bool)
4593 if separator:
4594 return [command_option + separator + (true_value if param else false_value)]
4595 return [command_option, true_value if param else false_value]
4596
4597
4598 def cli_valueless_option(params, command_option, param, expected_value=True):
4599 param = params.get(param)
4600 return [command_option] if param == expected_value else []
4601
4602
4603 def cli_configuration_args(params, param, default=[]):
4604 ex_args = params.get(param)
4605 if ex_args is None:
4606 return default
4607 assert isinstance(ex_args, list)
4608 return ex_args
4609
4610
4611 class ISO639Utils(object):
4612 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4613 _lang_map = {
4614 'aa': 'aar',
4615 'ab': 'abk',
4616 'ae': 'ave',
4617 'af': 'afr',
4618 'ak': 'aka',
4619 'am': 'amh',
4620 'an': 'arg',
4621 'ar': 'ara',
4622 'as': 'asm',
4623 'av': 'ava',
4624 'ay': 'aym',
4625 'az': 'aze',
4626 'ba': 'bak',
4627 'be': 'bel',
4628 'bg': 'bul',
4629 'bh': 'bih',
4630 'bi': 'bis',
4631 'bm': 'bam',
4632 'bn': 'ben',
4633 'bo': 'bod',
4634 'br': 'bre',
4635 'bs': 'bos',
4636 'ca': 'cat',
4637 'ce': 'che',
4638 'ch': 'cha',
4639 'co': 'cos',
4640 'cr': 'cre',
4641 'cs': 'ces',
4642 'cu': 'chu',
4643 'cv': 'chv',
4644 'cy': 'cym',
4645 'da': 'dan',
4646 'de': 'deu',
4647 'dv': 'div',
4648 'dz': 'dzo',
4649 'ee': 'ewe',
4650 'el': 'ell',
4651 'en': 'eng',
4652 'eo': 'epo',
4653 'es': 'spa',
4654 'et': 'est',
4655 'eu': 'eus',
4656 'fa': 'fas',
4657 'ff': 'ful',
4658 'fi': 'fin',
4659 'fj': 'fij',
4660 'fo': 'fao',
4661 'fr': 'fra',
4662 'fy': 'fry',
4663 'ga': 'gle',
4664 'gd': 'gla',
4665 'gl': 'glg',
4666 'gn': 'grn',
4667 'gu': 'guj',
4668 'gv': 'glv',
4669 'ha': 'hau',
4670 'he': 'heb',
4671 'iw': 'heb', # Replaced by he in 1989 revision
4672 'hi': 'hin',
4673 'ho': 'hmo',
4674 'hr': 'hrv',
4675 'ht': 'hat',
4676 'hu': 'hun',
4677 'hy': 'hye',
4678 'hz': 'her',
4679 'ia': 'ina',
4680 'id': 'ind',
4681 'in': 'ind', # Replaced by id in 1989 revision
4682 'ie': 'ile',
4683 'ig': 'ibo',
4684 'ii': 'iii',
4685 'ik': 'ipk',
4686 'io': 'ido',
4687 'is': 'isl',
4688 'it': 'ita',
4689 'iu': 'iku',
4690 'ja': 'jpn',
4691 'jv': 'jav',
4692 'ka': 'kat',
4693 'kg': 'kon',
4694 'ki': 'kik',
4695 'kj': 'kua',
4696 'kk': 'kaz',
4697 'kl': 'kal',
4698 'km': 'khm',
4699 'kn': 'kan',
4700 'ko': 'kor',
4701 'kr': 'kau',
4702 'ks': 'kas',
4703 'ku': 'kur',
4704 'kv': 'kom',
4705 'kw': 'cor',
4706 'ky': 'kir',
4707 'la': 'lat',
4708 'lb': 'ltz',
4709 'lg': 'lug',
4710 'li': 'lim',
4711 'ln': 'lin',
4712 'lo': 'lao',
4713 'lt': 'lit',
4714 'lu': 'lub',
4715 'lv': 'lav',
4716 'mg': 'mlg',
4717 'mh': 'mah',
4718 'mi': 'mri',
4719 'mk': 'mkd',
4720 'ml': 'mal',
4721 'mn': 'mon',
4722 'mr': 'mar',
4723 'ms': 'msa',
4724 'mt': 'mlt',
4725 'my': 'mya',
4726 'na': 'nau',
4727 'nb': 'nob',
4728 'nd': 'nde',
4729 'ne': 'nep',
4730 'ng': 'ndo',
4731 'nl': 'nld',
4732 'nn': 'nno',
4733 'no': 'nor',
4734 'nr': 'nbl',
4735 'nv': 'nav',
4736 'ny': 'nya',
4737 'oc': 'oci',
4738 'oj': 'oji',
4739 'om': 'orm',
4740 'or': 'ori',
4741 'os': 'oss',
4742 'pa': 'pan',
4743 'pi': 'pli',
4744 'pl': 'pol',
4745 'ps': 'pus',
4746 'pt': 'por',
4747 'qu': 'que',
4748 'rm': 'roh',
4749 'rn': 'run',
4750 'ro': 'ron',
4751 'ru': 'rus',
4752 'rw': 'kin',
4753 'sa': 'san',
4754 'sc': 'srd',
4755 'sd': 'snd',
4756 'se': 'sme',
4757 'sg': 'sag',
4758 'si': 'sin',
4759 'sk': 'slk',
4760 'sl': 'slv',
4761 'sm': 'smo',
4762 'sn': 'sna',
4763 'so': 'som',
4764 'sq': 'sqi',
4765 'sr': 'srp',
4766 'ss': 'ssw',
4767 'st': 'sot',
4768 'su': 'sun',
4769 'sv': 'swe',
4770 'sw': 'swa',
4771 'ta': 'tam',
4772 'te': 'tel',
4773 'tg': 'tgk',
4774 'th': 'tha',
4775 'ti': 'tir',
4776 'tk': 'tuk',
4777 'tl': 'tgl',
4778 'tn': 'tsn',
4779 'to': 'ton',
4780 'tr': 'tur',
4781 'ts': 'tso',
4782 'tt': 'tat',
4783 'tw': 'twi',
4784 'ty': 'tah',
4785 'ug': 'uig',
4786 'uk': 'ukr',
4787 'ur': 'urd',
4788 'uz': 'uzb',
4789 've': 'ven',
4790 'vi': 'vie',
4791 'vo': 'vol',
4792 'wa': 'wln',
4793 'wo': 'wol',
4794 'xh': 'xho',
4795 'yi': 'yid',
4796 'ji': 'yid', # Replaced by yi in 1989 revision
4797 'yo': 'yor',
4798 'za': 'zha',
4799 'zh': 'zho',
4800 'zu': 'zul',
4801 }
4802
4803 @classmethod
4804 def short2long(cls, code):
4805 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4806 return cls._lang_map.get(code[:2])
4807
4808 @classmethod
4809 def long2short(cls, code):
4810 """Convert language code from ISO 639-2/T to ISO 639-1"""
4811 for short_name, long_name in cls._lang_map.items():
4812 if long_name == code:
4813 return short_name
4814
4815
4816 class ISO3166Utils(object):
4817 # From http://data.okfn.org/data/core/country-list
4818 _country_map = {
4819 'AF': 'Afghanistan',
4820 'AX': 'ƅland Islands',
4821 'AL': 'Albania',
4822 'DZ': 'Algeria',
4823 'AS': 'American Samoa',
4824 'AD': 'Andorra',
4825 'AO': 'Angola',
4826 'AI': 'Anguilla',
4827 'AQ': 'Antarctica',
4828 'AG': 'Antigua and Barbuda',
4829 'AR': 'Argentina',
4830 'AM': 'Armenia',
4831 'AW': 'Aruba',
4832 'AU': 'Australia',
4833 'AT': 'Austria',
4834 'AZ': 'Azerbaijan',
4835 'BS': 'Bahamas',
4836 'BH': 'Bahrain',
4837 'BD': 'Bangladesh',
4838 'BB': 'Barbados',
4839 'BY': 'Belarus',
4840 'BE': 'Belgium',
4841 'BZ': 'Belize',
4842 'BJ': 'Benin',
4843 'BM': 'Bermuda',
4844 'BT': 'Bhutan',
4845 'BO': 'Bolivia, Plurinational State of',
4846 'BQ': 'Bonaire, Sint Eustatius and Saba',
4847 'BA': 'Bosnia and Herzegovina',
4848 'BW': 'Botswana',
4849 'BV': 'Bouvet Island',
4850 'BR': 'Brazil',
4851 'IO': 'British Indian Ocean Territory',
4852 'BN': 'Brunei Darussalam',
4853 'BG': 'Bulgaria',
4854 'BF': 'Burkina Faso',
4855 'BI': 'Burundi',
4856 'KH': 'Cambodia',
4857 'CM': 'Cameroon',
4858 'CA': 'Canada',
4859 'CV': 'Cape Verde',
4860 'KY': 'Cayman Islands',
4861 'CF': 'Central African Republic',
4862 'TD': 'Chad',
4863 'CL': 'Chile',
4864 'CN': 'China',
4865 'CX': 'Christmas Island',
4866 'CC': 'Cocos (Keeling) Islands',
4867 'CO': 'Colombia',
4868 'KM': 'Comoros',
4869 'CG': 'Congo',
4870 'CD': 'Congo, the Democratic Republic of the',
4871 'CK': 'Cook Islands',
4872 'CR': 'Costa Rica',
4873 'CI': 'CĆ“te d\'Ivoire',
4874 'HR': 'Croatia',
4875 'CU': 'Cuba',
4876 'CW': 'CuraƧao',
4877 'CY': 'Cyprus',
4878 'CZ': 'Czech Republic',
4879 'DK': 'Denmark',
4880 'DJ': 'Djibouti',
4881 'DM': 'Dominica',
4882 'DO': 'Dominican Republic',
4883 'EC': 'Ecuador',
4884 'EG': 'Egypt',
4885 'SV': 'El Salvador',
4886 'GQ': 'Equatorial Guinea',
4887 'ER': 'Eritrea',
4888 'EE': 'Estonia',
4889 'ET': 'Ethiopia',
4890 'FK': 'Falkland Islands (Malvinas)',
4891 'FO': 'Faroe Islands',
4892 'FJ': 'Fiji',
4893 'FI': 'Finland',
4894 'FR': 'France',
4895 'GF': 'French Guiana',
4896 'PF': 'French Polynesia',
4897 'TF': 'French Southern Territories',
4898 'GA': 'Gabon',
4899 'GM': 'Gambia',
4900 'GE': 'Georgia',
4901 'DE': 'Germany',
4902 'GH': 'Ghana',
4903 'GI': 'Gibraltar',
4904 'GR': 'Greece',
4905 'GL': 'Greenland',
4906 'GD': 'Grenada',
4907 'GP': 'Guadeloupe',
4908 'GU': 'Guam',
4909 'GT': 'Guatemala',
4910 'GG': 'Guernsey',
4911 'GN': 'Guinea',
4912 'GW': 'Guinea-Bissau',
4913 'GY': 'Guyana',
4914 'HT': 'Haiti',
4915 'HM': 'Heard Island and McDonald Islands',
4916 'VA': 'Holy See (Vatican City State)',
4917 'HN': 'Honduras',
4918 'HK': 'Hong Kong',
4919 'HU': 'Hungary',
4920 'IS': 'Iceland',
4921 'IN': 'India',
4922 'ID': 'Indonesia',
4923 'IR': 'Iran, Islamic Republic of',
4924 'IQ': 'Iraq',
4925 'IE': 'Ireland',
4926 'IM': 'Isle of Man',
4927 'IL': 'Israel',
4928 'IT': 'Italy',
4929 'JM': 'Jamaica',
4930 'JP': 'Japan',
4931 'JE': 'Jersey',
4932 'JO': 'Jordan',
4933 'KZ': 'Kazakhstan',
4934 'KE': 'Kenya',
4935 'KI': 'Kiribati',
4936 'KP': 'Korea, Democratic People\'s Republic of',
4937 'KR': 'Korea, Republic of',
4938 'KW': 'Kuwait',
4939 'KG': 'Kyrgyzstan',
4940 'LA': 'Lao People\'s Democratic Republic',
4941 'LV': 'Latvia',
4942 'LB': 'Lebanon',
4943 'LS': 'Lesotho',
4944 'LR': 'Liberia',
4945 'LY': 'Libya',
4946 'LI': 'Liechtenstein',
4947 'LT': 'Lithuania',
4948 'LU': 'Luxembourg',
4949 'MO': 'Macao',
4950 'MK': 'Macedonia, the Former Yugoslav Republic of',
4951 'MG': 'Madagascar',
4952 'MW': 'Malawi',
4953 'MY': 'Malaysia',
4954 'MV': 'Maldives',
4955 'ML': 'Mali',
4956 'MT': 'Malta',
4957 'MH': 'Marshall Islands',
4958 'MQ': 'Martinique',
4959 'MR': 'Mauritania',
4960 'MU': 'Mauritius',
4961 'YT': 'Mayotte',
4962 'MX': 'Mexico',
4963 'FM': 'Micronesia, Federated States of',
4964 'MD': 'Moldova, Republic of',
4965 'MC': 'Monaco',
4966 'MN': 'Mongolia',
4967 'ME': 'Montenegro',
4968 'MS': 'Montserrat',
4969 'MA': 'Morocco',
4970 'MZ': 'Mozambique',
4971 'MM': 'Myanmar',
4972 'NA': 'Namibia',
4973 'NR': 'Nauru',
4974 'NP': 'Nepal',
4975 'NL': 'Netherlands',
4976 'NC': 'New Caledonia',
4977 'NZ': 'New Zealand',
4978 'NI': 'Nicaragua',
4979 'NE': 'Niger',
4980 'NG': 'Nigeria',
4981 'NU': 'Niue',
4982 'NF': 'Norfolk Island',
4983 'MP': 'Northern Mariana Islands',
4984 'NO': 'Norway',
4985 'OM': 'Oman',
4986 'PK': 'Pakistan',
4987 'PW': 'Palau',
4988 'PS': 'Palestine, State of',
4989 'PA': 'Panama',
4990 'PG': 'Papua New Guinea',
4991 'PY': 'Paraguay',
4992 'PE': 'Peru',
4993 'PH': 'Philippines',
4994 'PN': 'Pitcairn',
4995 'PL': 'Poland',
4996 'PT': 'Portugal',
4997 'PR': 'Puerto Rico',
4998 'QA': 'Qatar',
4999 'RE': 'RĆ©union',
5000 'RO': 'Romania',
5001 'RU': 'Russian Federation',
5002 'RW': 'Rwanda',
5003 'BL': 'Saint BarthƩlemy',
5004 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5005 'KN': 'Saint Kitts and Nevis',
5006 'LC': 'Saint Lucia',
5007 'MF': 'Saint Martin (French part)',
5008 'PM': 'Saint Pierre and Miquelon',
5009 'VC': 'Saint Vincent and the Grenadines',
5010 'WS': 'Samoa',
5011 'SM': 'San Marino',
5012 'ST': 'Sao Tome and Principe',
5013 'SA': 'Saudi Arabia',
5014 'SN': 'Senegal',
5015 'RS': 'Serbia',
5016 'SC': 'Seychelles',
5017 'SL': 'Sierra Leone',
5018 'SG': 'Singapore',
5019 'SX': 'Sint Maarten (Dutch part)',
5020 'SK': 'Slovakia',
5021 'SI': 'Slovenia',
5022 'SB': 'Solomon Islands',
5023 'SO': 'Somalia',
5024 'ZA': 'South Africa',
5025 'GS': 'South Georgia and the South Sandwich Islands',
5026 'SS': 'South Sudan',
5027 'ES': 'Spain',
5028 'LK': 'Sri Lanka',
5029 'SD': 'Sudan',
5030 'SR': 'Suriname',
5031 'SJ': 'Svalbard and Jan Mayen',
5032 'SZ': 'Swaziland',
5033 'SE': 'Sweden',
5034 'CH': 'Switzerland',
5035 'SY': 'Syrian Arab Republic',
5036 'TW': 'Taiwan, Province of China',
5037 'TJ': 'Tajikistan',
5038 'TZ': 'Tanzania, United Republic of',
5039 'TH': 'Thailand',
5040 'TL': 'Timor-Leste',
5041 'TG': 'Togo',
5042 'TK': 'Tokelau',
5043 'TO': 'Tonga',
5044 'TT': 'Trinidad and Tobago',
5045 'TN': 'Tunisia',
5046 'TR': 'Turkey',
5047 'TM': 'Turkmenistan',
5048 'TC': 'Turks and Caicos Islands',
5049 'TV': 'Tuvalu',
5050 'UG': 'Uganda',
5051 'UA': 'Ukraine',
5052 'AE': 'United Arab Emirates',
5053 'GB': 'United Kingdom',
5054 'US': 'United States',
5055 'UM': 'United States Minor Outlying Islands',
5056 'UY': 'Uruguay',
5057 'UZ': 'Uzbekistan',
5058 'VU': 'Vanuatu',
5059 'VE': 'Venezuela, Bolivarian Republic of',
5060 'VN': 'Viet Nam',
5061 'VG': 'Virgin Islands, British',
5062 'VI': 'Virgin Islands, U.S.',
5063 'WF': 'Wallis and Futuna',
5064 'EH': 'Western Sahara',
5065 'YE': 'Yemen',
5066 'ZM': 'Zambia',
5067 'ZW': 'Zimbabwe',
5068 }
5069
5070 @classmethod
5071 def short2full(cls, code):
5072 """Convert an ISO 3166-2 country code to the corresponding full name"""
5073 return cls._country_map.get(code.upper())
5074
5075
5076 class GeoUtils(object):
5077 # Major IPv4 address blocks per country
5078 _country_ip_map = {
5079 'AD': '46.172.224.0/19',
5080 'AE': '94.200.0.0/13',
5081 'AF': '149.54.0.0/17',
5082 'AG': '209.59.64.0/18',
5083 'AI': '204.14.248.0/21',
5084 'AL': '46.99.0.0/16',
5085 'AM': '46.70.0.0/15',
5086 'AO': '105.168.0.0/13',
5087 'AP': '182.50.184.0/21',
5088 'AQ': '23.154.160.0/24',
5089 'AR': '181.0.0.0/12',
5090 'AS': '202.70.112.0/20',
5091 'AT': '77.116.0.0/14',
5092 'AU': '1.128.0.0/11',
5093 'AW': '181.41.0.0/18',
5094 'AX': '185.217.4.0/22',
5095 'AZ': '5.197.0.0/16',
5096 'BA': '31.176.128.0/17',
5097 'BB': '65.48.128.0/17',
5098 'BD': '114.130.0.0/16',
5099 'BE': '57.0.0.0/8',
5100 'BF': '102.178.0.0/15',
5101 'BG': '95.42.0.0/15',
5102 'BH': '37.131.0.0/17',
5103 'BI': '154.117.192.0/18',
5104 'BJ': '137.255.0.0/16',
5105 'BL': '185.212.72.0/23',
5106 'BM': '196.12.64.0/18',
5107 'BN': '156.31.0.0/16',
5108 'BO': '161.56.0.0/16',
5109 'BQ': '161.0.80.0/20',
5110 'BR': '191.128.0.0/12',
5111 'BS': '24.51.64.0/18',
5112 'BT': '119.2.96.0/19',
5113 'BW': '168.167.0.0/16',
5114 'BY': '178.120.0.0/13',
5115 'BZ': '179.42.192.0/18',
5116 'CA': '99.224.0.0/11',
5117 'CD': '41.243.0.0/16',
5118 'CF': '197.242.176.0/21',
5119 'CG': '160.113.0.0/16',
5120 'CH': '85.0.0.0/13',
5121 'CI': '102.136.0.0/14',
5122 'CK': '202.65.32.0/19',
5123 'CL': '152.172.0.0/14',
5124 'CM': '102.244.0.0/14',
5125 'CN': '36.128.0.0/10',
5126 'CO': '181.240.0.0/12',
5127 'CR': '201.192.0.0/12',
5128 'CU': '152.206.0.0/15',
5129 'CV': '165.90.96.0/19',
5130 'CW': '190.88.128.0/17',
5131 'CY': '31.153.0.0/16',
5132 'CZ': '88.100.0.0/14',
5133 'DE': '53.0.0.0/8',
5134 'DJ': '197.241.0.0/17',
5135 'DK': '87.48.0.0/12',
5136 'DM': '192.243.48.0/20',
5137 'DO': '152.166.0.0/15',
5138 'DZ': '41.96.0.0/12',
5139 'EC': '186.68.0.0/15',
5140 'EE': '90.190.0.0/15',
5141 'EG': '156.160.0.0/11',
5142 'ER': '196.200.96.0/20',
5143 'ES': '88.0.0.0/11',
5144 'ET': '196.188.0.0/14',
5145 'EU': '2.16.0.0/13',
5146 'FI': '91.152.0.0/13',
5147 'FJ': '144.120.0.0/16',
5148 'FK': '80.73.208.0/21',
5149 'FM': '119.252.112.0/20',
5150 'FO': '88.85.32.0/19',
5151 'FR': '90.0.0.0/9',
5152 'GA': '41.158.0.0/15',
5153 'GB': '25.0.0.0/8',
5154 'GD': '74.122.88.0/21',
5155 'GE': '31.146.0.0/16',
5156 'GF': '161.22.64.0/18',
5157 'GG': '62.68.160.0/19',
5158 'GH': '154.160.0.0/12',
5159 'GI': '95.164.0.0/16',
5160 'GL': '88.83.0.0/19',
5161 'GM': '160.182.0.0/15',
5162 'GN': '197.149.192.0/18',
5163 'GP': '104.250.0.0/19',
5164 'GQ': '105.235.224.0/20',
5165 'GR': '94.64.0.0/13',
5166 'GT': '168.234.0.0/16',
5167 'GU': '168.123.0.0/16',
5168 'GW': '197.214.80.0/20',
5169 'GY': '181.41.64.0/18',
5170 'HK': '113.252.0.0/14',
5171 'HN': '181.210.0.0/16',
5172 'HR': '93.136.0.0/13',
5173 'HT': '148.102.128.0/17',
5174 'HU': '84.0.0.0/14',
5175 'ID': '39.192.0.0/10',
5176 'IE': '87.32.0.0/12',
5177 'IL': '79.176.0.0/13',
5178 'IM': '5.62.80.0/20',
5179 'IN': '117.192.0.0/10',
5180 'IO': '203.83.48.0/21',
5181 'IQ': '37.236.0.0/14',
5182 'IR': '2.176.0.0/12',
5183 'IS': '82.221.0.0/16',
5184 'IT': '79.0.0.0/10',
5185 'JE': '87.244.64.0/18',
5186 'JM': '72.27.0.0/17',
5187 'JO': '176.29.0.0/16',
5188 'JP': '133.0.0.0/8',
5189 'KE': '105.48.0.0/12',
5190 'KG': '158.181.128.0/17',
5191 'KH': '36.37.128.0/17',
5192 'KI': '103.25.140.0/22',
5193 'KM': '197.255.224.0/20',
5194 'KN': '198.167.192.0/19',
5195 'KP': '175.45.176.0/22',
5196 'KR': '175.192.0.0/10',
5197 'KW': '37.36.0.0/14',
5198 'KY': '64.96.0.0/15',
5199 'KZ': '2.72.0.0/13',
5200 'LA': '115.84.64.0/18',
5201 'LB': '178.135.0.0/16',
5202 'LC': '24.92.144.0/20',
5203 'LI': '82.117.0.0/19',
5204 'LK': '112.134.0.0/15',
5205 'LR': '102.183.0.0/16',
5206 'LS': '129.232.0.0/17',
5207 'LT': '78.56.0.0/13',
5208 'LU': '188.42.0.0/16',
5209 'LV': '46.109.0.0/16',
5210 'LY': '41.252.0.0/14',
5211 'MA': '105.128.0.0/11',
5212 'MC': '88.209.64.0/18',
5213 'MD': '37.246.0.0/16',
5214 'ME': '178.175.0.0/17',
5215 'MF': '74.112.232.0/21',
5216 'MG': '154.126.0.0/17',
5217 'MH': '117.103.88.0/21',
5218 'MK': '77.28.0.0/15',
5219 'ML': '154.118.128.0/18',
5220 'MM': '37.111.0.0/17',
5221 'MN': '49.0.128.0/17',
5222 'MO': '60.246.0.0/16',
5223 'MP': '202.88.64.0/20',
5224 'MQ': '109.203.224.0/19',
5225 'MR': '41.188.64.0/18',
5226 'MS': '208.90.112.0/22',
5227 'MT': '46.11.0.0/16',
5228 'MU': '105.16.0.0/12',
5229 'MV': '27.114.128.0/18',
5230 'MW': '102.70.0.0/15',
5231 'MX': '187.192.0.0/11',
5232 'MY': '175.136.0.0/13',
5233 'MZ': '197.218.0.0/15',
5234 'NA': '41.182.0.0/16',
5235 'NC': '101.101.0.0/18',
5236 'NE': '197.214.0.0/18',
5237 'NF': '203.17.240.0/22',
5238 'NG': '105.112.0.0/12',
5239 'NI': '186.76.0.0/15',
5240 'NL': '145.96.0.0/11',
5241 'NO': '84.208.0.0/13',
5242 'NP': '36.252.0.0/15',
5243 'NR': '203.98.224.0/19',
5244 'NU': '49.156.48.0/22',
5245 'NZ': '49.224.0.0/14',
5246 'OM': '5.36.0.0/15',
5247 'PA': '186.72.0.0/15',
5248 'PE': '186.160.0.0/14',
5249 'PF': '123.50.64.0/18',
5250 'PG': '124.240.192.0/19',
5251 'PH': '49.144.0.0/13',
5252 'PK': '39.32.0.0/11',
5253 'PL': '83.0.0.0/11',
5254 'PM': '70.36.0.0/20',
5255 'PR': '66.50.0.0/16',
5256 'PS': '188.161.0.0/16',
5257 'PT': '85.240.0.0/13',
5258 'PW': '202.124.224.0/20',
5259 'PY': '181.120.0.0/14',
5260 'QA': '37.210.0.0/15',
5261 'RE': '102.35.0.0/16',
5262 'RO': '79.112.0.0/13',
5263 'RS': '93.86.0.0/15',
5264 'RU': '5.136.0.0/13',
5265 'RW': '41.186.0.0/16',
5266 'SA': '188.48.0.0/13',
5267 'SB': '202.1.160.0/19',
5268 'SC': '154.192.0.0/11',
5269 'SD': '102.120.0.0/13',
5270 'SE': '78.64.0.0/12',
5271 'SG': '8.128.0.0/10',
5272 'SI': '188.196.0.0/14',
5273 'SK': '78.98.0.0/15',
5274 'SL': '102.143.0.0/17',
5275 'SM': '89.186.32.0/19',
5276 'SN': '41.82.0.0/15',
5277 'SO': '154.115.192.0/18',
5278 'SR': '186.179.128.0/17',
5279 'SS': '105.235.208.0/21',
5280 'ST': '197.159.160.0/19',
5281 'SV': '168.243.0.0/16',
5282 'SX': '190.102.0.0/20',
5283 'SY': '5.0.0.0/16',
5284 'SZ': '41.84.224.0/19',
5285 'TC': '65.255.48.0/20',
5286 'TD': '154.68.128.0/19',
5287 'TG': '196.168.0.0/14',
5288 'TH': '171.96.0.0/13',
5289 'TJ': '85.9.128.0/18',
5290 'TK': '27.96.24.0/21',
5291 'TL': '180.189.160.0/20',
5292 'TM': '95.85.96.0/19',
5293 'TN': '197.0.0.0/11',
5294 'TO': '175.176.144.0/21',
5295 'TR': '78.160.0.0/11',
5296 'TT': '186.44.0.0/15',
5297 'TV': '202.2.96.0/19',
5298 'TW': '120.96.0.0/11',
5299 'TZ': '156.156.0.0/14',
5300 'UA': '37.52.0.0/14',
5301 'UG': '102.80.0.0/13',
5302 'US': '6.0.0.0/8',
5303 'UY': '167.56.0.0/13',
5304 'UZ': '84.54.64.0/18',
5305 'VA': '212.77.0.0/19',
5306 'VC': '207.191.240.0/21',
5307 'VE': '186.88.0.0/13',
5308 'VG': '66.81.192.0/20',
5309 'VI': '146.226.0.0/16',
5310 'VN': '14.160.0.0/11',
5311 'VU': '202.80.32.0/20',
5312 'WF': '117.20.32.0/21',
5313 'WS': '202.4.32.0/19',
5314 'YE': '134.35.0.0/16',
5315 'YT': '41.242.116.0/22',
5316 'ZA': '41.0.0.0/11',
5317 'ZM': '102.144.0.0/13',
5318 'ZW': '102.177.192.0/18',
5319 }
5320
5321 @classmethod
5322 def random_ipv4(cls, code_or_block):
5323 if len(code_or_block) == 2:
5324 block = cls._country_ip_map.get(code_or_block.upper())
5325 if not block:
5326 return None
5327 else:
5328 block = code_or_block
5329 addr, preflen = block.split('/')
5330 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5331 addr_max = addr_min | (0xffffffff >> int(preflen))
5332 return compat_str(socket.inet_ntoa(
5333 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5334
5335
5336 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5337 def __init__(self, proxies=None):
5338 # Set default handlers
5339 for type in ('http', 'https'):
5340 setattr(self, '%s_open' % type,
5341 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5342 meth(r, proxy, type))
5343 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5344
5345 def proxy_open(self, req, proxy, type):
5346 req_proxy = req.headers.get('Ytdl-request-proxy')
5347 if req_proxy is not None:
5348 proxy = req_proxy
5349 del req.headers['Ytdl-request-proxy']
5350
5351 if proxy == '__noproxy__':
5352 return None # No Proxy
5353 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5354 req.add_header('Ytdl-socks-proxy', proxy)
5355 # youtube-dl's http/https handlers do wrapping the socket with socks
5356 return None
5357 return compat_urllib_request.ProxyHandler.proxy_open(
5358 self, req, proxy, type)
5359
5360
5361 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5362 # released into Public Domain
5363 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5364
5365 def long_to_bytes(n, blocksize=0):
5366 """long_to_bytes(n:long, blocksize:int) : string
5367 Convert a long integer to a byte string.
5368
5369 If optional blocksize is given and greater than zero, pad the front of the
5370 byte string with binary zeros so that the length is a multiple of
5371 blocksize.
5372 """
5373 # after much testing, this algorithm was deemed to be the fastest
5374 s = b''
5375 n = int(n)
5376 while n > 0:
5377 s = compat_struct_pack('>I', n & 0xffffffff) + s
5378 n = n >> 32
5379 # strip off leading zeros
5380 for i in range(len(s)):
5381 if s[i] != b'\000'[0]:
5382 break
5383 else:
5384 # only happens when n == 0
5385 s = b'\000'
5386 i = 0
5387 s = s[i:]
5388 # add back some pad bytes. this could be done more efficiently w.r.t. the
5389 # de-padding being done above, but sigh...
5390 if blocksize > 0 and len(s) % blocksize:
5391 s = (blocksize - len(s) % blocksize) * b'\000' + s
5392 return s
5393
5394
5395 def bytes_to_long(s):
5396 """bytes_to_long(string) : long
5397 Convert a byte string to a long integer.
5398
5399 This is (essentially) the inverse of long_to_bytes().
5400 """
5401 acc = 0
5402 length = len(s)
5403 if length % 4:
5404 extra = (4 - length % 4)
5405 s = b'\000' * extra + s
5406 length = length + extra
5407 for i in range(0, length, 4):
5408 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5409 return acc
5410
5411
5412 def ohdave_rsa_encrypt(data, exponent, modulus):
5413 '''
5414 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5415
5416 Input:
5417 data: data to encrypt, bytes-like object
5418 exponent, modulus: parameter e and N of RSA algorithm, both integer
5419 Output: hex string of encrypted data
5420
5421 Limitation: supports one block encryption only
5422 '''
5423
5424 payload = int(binascii.hexlify(data[::-1]), 16)
5425 encrypted = pow(payload, exponent, modulus)
5426 return '%x' % encrypted
5427
5428
5429 def pkcs1pad(data, length):
5430 """
5431 Padding input data with PKCS#1 scheme
5432
5433 @param {int[]} data input data
5434 @param {int} length target length
5435 @returns {int[]} padded data
5436 """
5437 if len(data) > length - 11:
5438 raise ValueError('Input data too long for PKCS#1 padding')
5439
5440 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5441 return [0, 2] + pseudo_random + [0] + data
5442
5443
5444 def encode_base_n(num, n, table=None):
5445 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5446 if not table:
5447 table = FULL_TABLE[:n]
5448
5449 if n > len(table):
5450 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5451
5452 if num == 0:
5453 return table[0]
5454
5455 ret = ''
5456 while num:
5457 ret = table[num % n] + ret
5458 num = num // n
5459 return ret
5460
5461
5462 def decode_packed_codes(code):
5463 mobj = re.search(PACKED_CODES_RE, code)
5464 obfucasted_code, base, count, symbols = mobj.groups()
5465 base = int(base)
5466 count = int(count)
5467 symbols = symbols.split('|')
5468 symbol_table = {}
5469
5470 while count:
5471 count -= 1
5472 base_n_count = encode_base_n(count, base)
5473 symbol_table[base_n_count] = symbols[count] or base_n_count
5474
5475 return re.sub(
5476 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5477 obfucasted_code)
5478
5479
5480 def caesar(s, alphabet, shift):
5481 if shift == 0:
5482 return s
5483 l = len(alphabet)
5484 return ''.join(
5485 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5486 for c in s)
5487
5488
5489 def rot47(s):
5490 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5491
5492
5493 def parse_m3u8_attributes(attrib):
5494 info = {}
5495 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5496 if val.startswith('"'):
5497 val = val[1:-1]
5498 info[key] = val
5499 return info
5500
5501
5502 def urshift(val, n):
5503 return val >> n if val >= 0 else (val + 0x100000000) >> n
5504
5505
5506 # Based on png2str() written by @gdkchan and improved by @yokrysty
5507 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5508 def decode_png(png_data):
5509 # Reference: https://www.w3.org/TR/PNG/
5510 header = png_data[8:]
5511
5512 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5513 raise IOError('Not a valid PNG file.')
5514
5515 int_map = {1: '>B', 2: '>H', 4: '>I'}
5516 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5517
5518 chunks = []
5519
5520 while header:
5521 length = unpack_integer(header[:4])
5522 header = header[4:]
5523
5524 chunk_type = header[:4]
5525 header = header[4:]
5526
5527 chunk_data = header[:length]
5528 header = header[length:]
5529
5530 header = header[4:] # Skip CRC
5531
5532 chunks.append({
5533 'type': chunk_type,
5534 'length': length,
5535 'data': chunk_data
5536 })
5537
5538 ihdr = chunks[0]['data']
5539
5540 width = unpack_integer(ihdr[:4])
5541 height = unpack_integer(ihdr[4:8])
5542
5543 idat = b''
5544
5545 for chunk in chunks:
5546 if chunk['type'] == b'IDAT':
5547 idat += chunk['data']
5548
5549 if not idat:
5550 raise IOError('Unable to read PNG data.')
5551
5552 decompressed_data = bytearray(zlib.decompress(idat))
5553
5554 stride = width * 3
5555 pixels = []
5556
5557 def _get_pixel(idx):
5558 x = idx % stride
5559 y = idx // stride
5560 return pixels[y][x]
5561
5562 for y in range(height):
5563 basePos = y * (1 + stride)
5564 filter_type = decompressed_data[basePos]
5565
5566 current_row = []
5567
5568 pixels.append(current_row)
5569
5570 for x in range(stride):
5571 color = decompressed_data[1 + basePos + x]
5572 basex = y * stride + x
5573 left = 0
5574 up = 0
5575
5576 if x > 2:
5577 left = _get_pixel(basex - 3)
5578 if y > 0:
5579 up = _get_pixel(basex - stride)
5580
5581 if filter_type == 1: # Sub
5582 color = (color + left) & 0xff
5583 elif filter_type == 2: # Up
5584 color = (color + up) & 0xff
5585 elif filter_type == 3: # Average
5586 color = (color + ((left + up) >> 1)) & 0xff
5587 elif filter_type == 4: # Paeth
5588 a = left
5589 b = up
5590 c = 0
5591
5592 if x > 2 and y > 0:
5593 c = _get_pixel(basex - stride - 3)
5594
5595 p = a + b - c
5596
5597 pa = abs(p - a)
5598 pb = abs(p - b)
5599 pc = abs(p - c)
5600
5601 if pa <= pb and pa <= pc:
5602 color = (color + a) & 0xff
5603 elif pb <= pc:
5604 color = (color + b) & 0xff
5605 else:
5606 color = (color + c) & 0xff
5607
5608 current_row.append(color)
5609
5610 return width, height, pixels
5611
5612
5613 def write_xattr(path, key, value):
5614 # This mess below finds the best xattr tool for the job
5615 try:
5616 # try the pyxattr module...
5617 import xattr
5618
5619 if hasattr(xattr, 'set'): # pyxattr
5620 # Unicode arguments are not supported in python-pyxattr until
5621 # version 0.5.0
5622 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5623 pyxattr_required_version = '0.5.0'
5624 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5625 # TODO: fallback to CLI tools
5626 raise XAttrUnavailableError(
5627 'python-pyxattr is detected but is too old. '
5628 'youtube-dl requires %s or above while your version is %s. '
5629 'Falling back to other xattr implementations' % (
5630 pyxattr_required_version, xattr.__version__))
5631
5632 setxattr = xattr.set
5633 else: # xattr
5634 setxattr = xattr.setxattr
5635
5636 try:
5637 setxattr(path, key, value)
5638 except EnvironmentError as e:
5639 raise XAttrMetadataError(e.errno, e.strerror)
5640
5641 except ImportError:
5642 if compat_os_name == 'nt':
5643 # Write xattrs to NTFS Alternate Data Streams:
5644 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5645 assert ':' not in key
5646 assert os.path.exists(path)
5647
5648 ads_fn = path + ':' + key
5649 try:
5650 with open(ads_fn, 'wb') as f:
5651 f.write(value)
5652 except EnvironmentError as e:
5653 raise XAttrMetadataError(e.errno, e.strerror)
5654 else:
5655 user_has_setfattr = check_executable('setfattr', ['--version'])
5656 user_has_xattr = check_executable('xattr', ['-h'])
5657
5658 if user_has_setfattr or user_has_xattr:
5659
5660 value = value.decode('utf-8')
5661 if user_has_setfattr:
5662 executable = 'setfattr'
5663 opts = ['-n', key, '-v', value]
5664 elif user_has_xattr:
5665 executable = 'xattr'
5666 opts = ['-w', key, value]
5667
5668 cmd = ([encodeFilename(executable, True)]
5669 + [encodeArgument(o) for o in opts]
5670 + [encodeFilename(path, True)])
5671
5672 try:
5673 p = subprocess.Popen(
5674 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5675 except EnvironmentError as e:
5676 raise XAttrMetadataError(e.errno, e.strerror)
5677 stdout, stderr = p.communicate()
5678 stderr = stderr.decode('utf-8', 'replace')
5679 if p.returncode != 0:
5680 raise XAttrMetadataError(p.returncode, stderr)
5681
5682 else:
5683 # On Unix, and can't find pyxattr, setfattr, or xattr.
5684 if sys.platform.startswith('linux'):
5685 raise XAttrUnavailableError(
5686 "Couldn't find a tool to set the xattrs. "
5687 "Install either the python 'pyxattr' or 'xattr' "
5688 "modules, or the GNU 'attr' package "
5689 "(which contains the 'setfattr' tool).")
5690 else:
5691 raise XAttrUnavailableError(
5692 "Couldn't find a tool to set the xattrs. "
5693 "Install either the python 'xattr' module, "
5694 "or the 'xattr' binary.")
5695
5696
5697 def random_birthday(year_field, month_field, day_field):
5698 start_date = datetime.date(1950, 1, 1)
5699 end_date = datetime.date(1995, 12, 31)
5700 offset = random.randint(0, (end_date - start_date).days)
5701 random_date = start_date + datetime.timedelta(offset)
5702 return {
5703 year_field: str(random_date.year),
5704 month_field: str(random_date.month),
5705 day_field: str(random_date.day),
5706 }