]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/utils.py
New upstream version 2020.05.08
[youtubedl] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import io
20 import itertools
21 import json
22 import locale
23 import math
24 import operator
25 import os
26 import platform
27 import random
28 import re
29 import socket
30 import ssl
31 import subprocess
32 import sys
33 import tempfile
34 import time
35 import traceback
36 import xml.etree.ElementTree
37 import zlib
38
39 from .compat import (
40 compat_HTMLParseError,
41 compat_HTMLParser,
42 compat_basestring,
43 compat_chr,
44 compat_cookiejar,
45 compat_ctypes_WINFUNCTYPE,
46 compat_etree_fromstring,
47 compat_expanduser,
48 compat_html_entities,
49 compat_html_entities_html5,
50 compat_http_client,
51 compat_integer_types,
52 compat_kwargs,
53 compat_os_name,
54 compat_parse_qs,
55 compat_shlex_quote,
56 compat_str,
57 compat_struct_pack,
58 compat_struct_unpack,
59 compat_urllib_error,
60 compat_urllib_parse,
61 compat_urllib_parse_urlencode,
62 compat_urllib_parse_urlparse,
63 compat_urllib_parse_unquote_plus,
64 compat_urllib_request,
65 compat_urlparse,
66 compat_xpath,
67 )
68
69 from .socks import (
70 ProxyType,
71 sockssocket,
72 )
73
74
75 def register_socks_protocols():
76 # "Register" SOCKS protocols
77 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
78 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
79 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
80 if scheme not in compat_urlparse.uses_netloc:
81 compat_urlparse.uses_netloc.append(scheme)
82
83
84 # This is not clearly defined otherwise
85 compiled_regex_type = type(re.compile(''))
86
87
88 def random_user_agent():
89 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
90 _CHROME_VERSIONS = (
91 '74.0.3729.129',
92 '76.0.3780.3',
93 '76.0.3780.2',
94 '74.0.3729.128',
95 '76.0.3780.1',
96 '76.0.3780.0',
97 '75.0.3770.15',
98 '74.0.3729.127',
99 '74.0.3729.126',
100 '76.0.3779.1',
101 '76.0.3779.0',
102 '75.0.3770.14',
103 '74.0.3729.125',
104 '76.0.3778.1',
105 '76.0.3778.0',
106 '75.0.3770.13',
107 '74.0.3729.124',
108 '74.0.3729.123',
109 '73.0.3683.121',
110 '76.0.3777.1',
111 '76.0.3777.0',
112 '75.0.3770.12',
113 '74.0.3729.122',
114 '76.0.3776.4',
115 '75.0.3770.11',
116 '74.0.3729.121',
117 '76.0.3776.3',
118 '76.0.3776.2',
119 '73.0.3683.120',
120 '74.0.3729.120',
121 '74.0.3729.119',
122 '74.0.3729.118',
123 '76.0.3776.1',
124 '76.0.3776.0',
125 '76.0.3775.5',
126 '75.0.3770.10',
127 '74.0.3729.117',
128 '76.0.3775.4',
129 '76.0.3775.3',
130 '74.0.3729.116',
131 '75.0.3770.9',
132 '76.0.3775.2',
133 '76.0.3775.1',
134 '76.0.3775.0',
135 '75.0.3770.8',
136 '74.0.3729.115',
137 '74.0.3729.114',
138 '76.0.3774.1',
139 '76.0.3774.0',
140 '75.0.3770.7',
141 '74.0.3729.113',
142 '74.0.3729.112',
143 '74.0.3729.111',
144 '76.0.3773.1',
145 '76.0.3773.0',
146 '75.0.3770.6',
147 '74.0.3729.110',
148 '74.0.3729.109',
149 '76.0.3772.1',
150 '76.0.3772.0',
151 '75.0.3770.5',
152 '74.0.3729.108',
153 '74.0.3729.107',
154 '76.0.3771.1',
155 '76.0.3771.0',
156 '75.0.3770.4',
157 '74.0.3729.106',
158 '74.0.3729.105',
159 '75.0.3770.3',
160 '74.0.3729.104',
161 '74.0.3729.103',
162 '74.0.3729.102',
163 '75.0.3770.2',
164 '74.0.3729.101',
165 '75.0.3770.1',
166 '75.0.3770.0',
167 '74.0.3729.100',
168 '75.0.3769.5',
169 '75.0.3769.4',
170 '74.0.3729.99',
171 '75.0.3769.3',
172 '75.0.3769.2',
173 '75.0.3768.6',
174 '74.0.3729.98',
175 '75.0.3769.1',
176 '75.0.3769.0',
177 '74.0.3729.97',
178 '73.0.3683.119',
179 '73.0.3683.118',
180 '74.0.3729.96',
181 '75.0.3768.5',
182 '75.0.3768.4',
183 '75.0.3768.3',
184 '75.0.3768.2',
185 '74.0.3729.95',
186 '74.0.3729.94',
187 '75.0.3768.1',
188 '75.0.3768.0',
189 '74.0.3729.93',
190 '74.0.3729.92',
191 '73.0.3683.117',
192 '74.0.3729.91',
193 '75.0.3766.3',
194 '74.0.3729.90',
195 '75.0.3767.2',
196 '75.0.3767.1',
197 '75.0.3767.0',
198 '74.0.3729.89',
199 '73.0.3683.116',
200 '75.0.3766.2',
201 '74.0.3729.88',
202 '75.0.3766.1',
203 '75.0.3766.0',
204 '74.0.3729.87',
205 '73.0.3683.115',
206 '74.0.3729.86',
207 '75.0.3765.1',
208 '75.0.3765.0',
209 '74.0.3729.85',
210 '73.0.3683.114',
211 '74.0.3729.84',
212 '75.0.3764.1',
213 '75.0.3764.0',
214 '74.0.3729.83',
215 '73.0.3683.113',
216 '75.0.3763.2',
217 '75.0.3761.4',
218 '74.0.3729.82',
219 '75.0.3763.1',
220 '75.0.3763.0',
221 '74.0.3729.81',
222 '73.0.3683.112',
223 '75.0.3762.1',
224 '75.0.3762.0',
225 '74.0.3729.80',
226 '75.0.3761.3',
227 '74.0.3729.79',
228 '73.0.3683.111',
229 '75.0.3761.2',
230 '74.0.3729.78',
231 '74.0.3729.77',
232 '75.0.3761.1',
233 '75.0.3761.0',
234 '73.0.3683.110',
235 '74.0.3729.76',
236 '74.0.3729.75',
237 '75.0.3760.0',
238 '74.0.3729.74',
239 '75.0.3759.8',
240 '75.0.3759.7',
241 '75.0.3759.6',
242 '74.0.3729.73',
243 '75.0.3759.5',
244 '74.0.3729.72',
245 '73.0.3683.109',
246 '75.0.3759.4',
247 '75.0.3759.3',
248 '74.0.3729.71',
249 '75.0.3759.2',
250 '74.0.3729.70',
251 '73.0.3683.108',
252 '74.0.3729.69',
253 '75.0.3759.1',
254 '75.0.3759.0',
255 '74.0.3729.68',
256 '73.0.3683.107',
257 '74.0.3729.67',
258 '75.0.3758.1',
259 '75.0.3758.0',
260 '74.0.3729.66',
261 '73.0.3683.106',
262 '74.0.3729.65',
263 '75.0.3757.1',
264 '75.0.3757.0',
265 '74.0.3729.64',
266 '73.0.3683.105',
267 '74.0.3729.63',
268 '75.0.3756.1',
269 '75.0.3756.0',
270 '74.0.3729.62',
271 '73.0.3683.104',
272 '75.0.3755.3',
273 '75.0.3755.2',
274 '73.0.3683.103',
275 '75.0.3755.1',
276 '75.0.3755.0',
277 '74.0.3729.61',
278 '73.0.3683.102',
279 '74.0.3729.60',
280 '75.0.3754.2',
281 '74.0.3729.59',
282 '75.0.3753.4',
283 '74.0.3729.58',
284 '75.0.3754.1',
285 '75.0.3754.0',
286 '74.0.3729.57',
287 '73.0.3683.101',
288 '75.0.3753.3',
289 '75.0.3752.2',
290 '75.0.3753.2',
291 '74.0.3729.56',
292 '75.0.3753.1',
293 '75.0.3753.0',
294 '74.0.3729.55',
295 '73.0.3683.100',
296 '74.0.3729.54',
297 '75.0.3752.1',
298 '75.0.3752.0',
299 '74.0.3729.53',
300 '73.0.3683.99',
301 '74.0.3729.52',
302 '75.0.3751.1',
303 '75.0.3751.0',
304 '74.0.3729.51',
305 '73.0.3683.98',
306 '74.0.3729.50',
307 '75.0.3750.0',
308 '74.0.3729.49',
309 '74.0.3729.48',
310 '74.0.3729.47',
311 '75.0.3749.3',
312 '74.0.3729.46',
313 '73.0.3683.97',
314 '75.0.3749.2',
315 '74.0.3729.45',
316 '75.0.3749.1',
317 '75.0.3749.0',
318 '74.0.3729.44',
319 '73.0.3683.96',
320 '74.0.3729.43',
321 '74.0.3729.42',
322 '75.0.3748.1',
323 '75.0.3748.0',
324 '74.0.3729.41',
325 '75.0.3747.1',
326 '73.0.3683.95',
327 '75.0.3746.4',
328 '74.0.3729.40',
329 '74.0.3729.39',
330 '75.0.3747.0',
331 '75.0.3746.3',
332 '75.0.3746.2',
333 '74.0.3729.38',
334 '75.0.3746.1',
335 '75.0.3746.0',
336 '74.0.3729.37',
337 '73.0.3683.94',
338 '75.0.3745.5',
339 '75.0.3745.4',
340 '75.0.3745.3',
341 '75.0.3745.2',
342 '74.0.3729.36',
343 '75.0.3745.1',
344 '75.0.3745.0',
345 '75.0.3744.2',
346 '74.0.3729.35',
347 '73.0.3683.93',
348 '74.0.3729.34',
349 '75.0.3744.1',
350 '75.0.3744.0',
351 '74.0.3729.33',
352 '73.0.3683.92',
353 '74.0.3729.32',
354 '74.0.3729.31',
355 '73.0.3683.91',
356 '75.0.3741.2',
357 '75.0.3740.5',
358 '74.0.3729.30',
359 '75.0.3741.1',
360 '75.0.3741.0',
361 '74.0.3729.29',
362 '75.0.3740.4',
363 '73.0.3683.90',
364 '74.0.3729.28',
365 '75.0.3740.3',
366 '73.0.3683.89',
367 '75.0.3740.2',
368 '74.0.3729.27',
369 '75.0.3740.1',
370 '75.0.3740.0',
371 '74.0.3729.26',
372 '73.0.3683.88',
373 '73.0.3683.87',
374 '74.0.3729.25',
375 '75.0.3739.1',
376 '75.0.3739.0',
377 '73.0.3683.86',
378 '74.0.3729.24',
379 '73.0.3683.85',
380 '75.0.3738.4',
381 '75.0.3738.3',
382 '75.0.3738.2',
383 '75.0.3738.1',
384 '75.0.3738.0',
385 '74.0.3729.23',
386 '73.0.3683.84',
387 '74.0.3729.22',
388 '74.0.3729.21',
389 '75.0.3737.1',
390 '75.0.3737.0',
391 '74.0.3729.20',
392 '73.0.3683.83',
393 '74.0.3729.19',
394 '75.0.3736.1',
395 '75.0.3736.0',
396 '74.0.3729.18',
397 '73.0.3683.82',
398 '74.0.3729.17',
399 '75.0.3735.1',
400 '75.0.3735.0',
401 '74.0.3729.16',
402 '73.0.3683.81',
403 '75.0.3734.1',
404 '75.0.3734.0',
405 '74.0.3729.15',
406 '73.0.3683.80',
407 '74.0.3729.14',
408 '75.0.3733.1',
409 '75.0.3733.0',
410 '75.0.3732.1',
411 '74.0.3729.13',
412 '74.0.3729.12',
413 '73.0.3683.79',
414 '74.0.3729.11',
415 '75.0.3732.0',
416 '74.0.3729.10',
417 '73.0.3683.78',
418 '74.0.3729.9',
419 '74.0.3729.8',
420 '74.0.3729.7',
421 '75.0.3731.3',
422 '75.0.3731.2',
423 '75.0.3731.0',
424 '74.0.3729.6',
425 '73.0.3683.77',
426 '73.0.3683.76',
427 '75.0.3730.5',
428 '75.0.3730.4',
429 '73.0.3683.75',
430 '74.0.3729.5',
431 '73.0.3683.74',
432 '75.0.3730.3',
433 '75.0.3730.2',
434 '74.0.3729.4',
435 '73.0.3683.73',
436 '73.0.3683.72',
437 '75.0.3730.1',
438 '75.0.3730.0',
439 '74.0.3729.3',
440 '73.0.3683.71',
441 '74.0.3729.2',
442 '73.0.3683.70',
443 '74.0.3729.1',
444 '74.0.3729.0',
445 '74.0.3726.4',
446 '73.0.3683.69',
447 '74.0.3726.3',
448 '74.0.3728.0',
449 '74.0.3726.2',
450 '73.0.3683.68',
451 '74.0.3726.1',
452 '74.0.3726.0',
453 '74.0.3725.4',
454 '73.0.3683.67',
455 '73.0.3683.66',
456 '74.0.3725.3',
457 '74.0.3725.2',
458 '74.0.3725.1',
459 '74.0.3724.8',
460 '74.0.3725.0',
461 '73.0.3683.65',
462 '74.0.3724.7',
463 '74.0.3724.6',
464 '74.0.3724.5',
465 '74.0.3724.4',
466 '74.0.3724.3',
467 '74.0.3724.2',
468 '74.0.3724.1',
469 '74.0.3724.0',
470 '73.0.3683.64',
471 '74.0.3723.1',
472 '74.0.3723.0',
473 '73.0.3683.63',
474 '74.0.3722.1',
475 '74.0.3722.0',
476 '73.0.3683.62',
477 '74.0.3718.9',
478 '74.0.3702.3',
479 '74.0.3721.3',
480 '74.0.3721.2',
481 '74.0.3721.1',
482 '74.0.3721.0',
483 '74.0.3720.6',
484 '73.0.3683.61',
485 '72.0.3626.122',
486 '73.0.3683.60',
487 '74.0.3720.5',
488 '72.0.3626.121',
489 '74.0.3718.8',
490 '74.0.3720.4',
491 '74.0.3720.3',
492 '74.0.3718.7',
493 '74.0.3720.2',
494 '74.0.3720.1',
495 '74.0.3720.0',
496 '74.0.3718.6',
497 '74.0.3719.5',
498 '73.0.3683.59',
499 '74.0.3718.5',
500 '74.0.3718.4',
501 '74.0.3719.4',
502 '74.0.3719.3',
503 '74.0.3719.2',
504 '74.0.3719.1',
505 '73.0.3683.58',
506 '74.0.3719.0',
507 '73.0.3683.57',
508 '73.0.3683.56',
509 '74.0.3718.3',
510 '73.0.3683.55',
511 '74.0.3718.2',
512 '74.0.3718.1',
513 '74.0.3718.0',
514 '73.0.3683.54',
515 '74.0.3717.2',
516 '73.0.3683.53',
517 '74.0.3717.1',
518 '74.0.3717.0',
519 '73.0.3683.52',
520 '74.0.3716.1',
521 '74.0.3716.0',
522 '73.0.3683.51',
523 '74.0.3715.1',
524 '74.0.3715.0',
525 '73.0.3683.50',
526 '74.0.3711.2',
527 '74.0.3714.2',
528 '74.0.3713.3',
529 '74.0.3714.1',
530 '74.0.3714.0',
531 '73.0.3683.49',
532 '74.0.3713.1',
533 '74.0.3713.0',
534 '72.0.3626.120',
535 '73.0.3683.48',
536 '74.0.3712.2',
537 '74.0.3712.1',
538 '74.0.3712.0',
539 '73.0.3683.47',
540 '72.0.3626.119',
541 '73.0.3683.46',
542 '74.0.3710.2',
543 '72.0.3626.118',
544 '74.0.3711.1',
545 '74.0.3711.0',
546 '73.0.3683.45',
547 '72.0.3626.117',
548 '74.0.3710.1',
549 '74.0.3710.0',
550 '73.0.3683.44',
551 '72.0.3626.116',
552 '74.0.3709.1',
553 '74.0.3709.0',
554 '74.0.3704.9',
555 '73.0.3683.43',
556 '72.0.3626.115',
557 '74.0.3704.8',
558 '74.0.3704.7',
559 '74.0.3708.0',
560 '74.0.3706.7',
561 '74.0.3704.6',
562 '73.0.3683.42',
563 '72.0.3626.114',
564 '74.0.3706.6',
565 '72.0.3626.113',
566 '74.0.3704.5',
567 '74.0.3706.5',
568 '74.0.3706.4',
569 '74.0.3706.3',
570 '74.0.3706.2',
571 '74.0.3706.1',
572 '74.0.3706.0',
573 '73.0.3683.41',
574 '72.0.3626.112',
575 '74.0.3705.1',
576 '74.0.3705.0',
577 '73.0.3683.40',
578 '72.0.3626.111',
579 '73.0.3683.39',
580 '74.0.3704.4',
581 '73.0.3683.38',
582 '74.0.3704.3',
583 '74.0.3704.2',
584 '74.0.3704.1',
585 '74.0.3704.0',
586 '73.0.3683.37',
587 '72.0.3626.110',
588 '72.0.3626.109',
589 '74.0.3703.3',
590 '74.0.3703.2',
591 '73.0.3683.36',
592 '74.0.3703.1',
593 '74.0.3703.0',
594 '73.0.3683.35',
595 '72.0.3626.108',
596 '74.0.3702.2',
597 '74.0.3699.3',
598 '74.0.3702.1',
599 '74.0.3702.0',
600 '73.0.3683.34',
601 '72.0.3626.107',
602 '73.0.3683.33',
603 '74.0.3701.1',
604 '74.0.3701.0',
605 '73.0.3683.32',
606 '73.0.3683.31',
607 '72.0.3626.105',
608 '74.0.3700.1',
609 '74.0.3700.0',
610 '73.0.3683.29',
611 '72.0.3626.103',
612 '74.0.3699.2',
613 '74.0.3699.1',
614 '74.0.3699.0',
615 '73.0.3683.28',
616 '72.0.3626.102',
617 '73.0.3683.27',
618 '73.0.3683.26',
619 '74.0.3698.0',
620 '74.0.3696.2',
621 '72.0.3626.101',
622 '73.0.3683.25',
623 '74.0.3696.1',
624 '74.0.3696.0',
625 '74.0.3694.8',
626 '72.0.3626.100',
627 '74.0.3694.7',
628 '74.0.3694.6',
629 '74.0.3694.5',
630 '74.0.3694.4',
631 '72.0.3626.99',
632 '72.0.3626.98',
633 '74.0.3694.3',
634 '73.0.3683.24',
635 '72.0.3626.97',
636 '72.0.3626.96',
637 '72.0.3626.95',
638 '73.0.3683.23',
639 '72.0.3626.94',
640 '73.0.3683.22',
641 '73.0.3683.21',
642 '72.0.3626.93',
643 '74.0.3694.2',
644 '72.0.3626.92',
645 '74.0.3694.1',
646 '74.0.3694.0',
647 '74.0.3693.6',
648 '73.0.3683.20',
649 '72.0.3626.91',
650 '74.0.3693.5',
651 '74.0.3693.4',
652 '74.0.3693.3',
653 '74.0.3693.2',
654 '73.0.3683.19',
655 '74.0.3693.1',
656 '74.0.3693.0',
657 '73.0.3683.18',
658 '72.0.3626.90',
659 '74.0.3692.1',
660 '74.0.3692.0',
661 '73.0.3683.17',
662 '72.0.3626.89',
663 '74.0.3687.3',
664 '74.0.3691.1',
665 '74.0.3691.0',
666 '73.0.3683.16',
667 '72.0.3626.88',
668 '72.0.3626.87',
669 '73.0.3683.15',
670 '74.0.3690.1',
671 '74.0.3690.0',
672 '73.0.3683.14',
673 '72.0.3626.86',
674 '73.0.3683.13',
675 '73.0.3683.12',
676 '74.0.3689.1',
677 '74.0.3689.0',
678 '73.0.3683.11',
679 '72.0.3626.85',
680 '73.0.3683.10',
681 '72.0.3626.84',
682 '73.0.3683.9',
683 '74.0.3688.1',
684 '74.0.3688.0',
685 '73.0.3683.8',
686 '72.0.3626.83',
687 '74.0.3687.2',
688 '74.0.3687.1',
689 '74.0.3687.0',
690 '73.0.3683.7',
691 '72.0.3626.82',
692 '74.0.3686.4',
693 '72.0.3626.81',
694 '74.0.3686.3',
695 '74.0.3686.2',
696 '74.0.3686.1',
697 '74.0.3686.0',
698 '73.0.3683.6',
699 '72.0.3626.80',
700 '74.0.3685.1',
701 '74.0.3685.0',
702 '73.0.3683.5',
703 '72.0.3626.79',
704 '74.0.3684.1',
705 '74.0.3684.0',
706 '73.0.3683.4',
707 '72.0.3626.78',
708 '72.0.3626.77',
709 '73.0.3683.3',
710 '73.0.3683.2',
711 '72.0.3626.76',
712 '73.0.3683.1',
713 '73.0.3683.0',
714 '72.0.3626.75',
715 '71.0.3578.141',
716 '73.0.3682.1',
717 '73.0.3682.0',
718 '72.0.3626.74',
719 '71.0.3578.140',
720 '73.0.3681.4',
721 '73.0.3681.3',
722 '73.0.3681.2',
723 '73.0.3681.1',
724 '73.0.3681.0',
725 '72.0.3626.73',
726 '71.0.3578.139',
727 '72.0.3626.72',
728 '72.0.3626.71',
729 '73.0.3680.1',
730 '73.0.3680.0',
731 '72.0.3626.70',
732 '71.0.3578.138',
733 '73.0.3678.2',
734 '73.0.3679.1',
735 '73.0.3679.0',
736 '72.0.3626.69',
737 '71.0.3578.137',
738 '73.0.3678.1',
739 '73.0.3678.0',
740 '71.0.3578.136',
741 '73.0.3677.1',
742 '73.0.3677.0',
743 '72.0.3626.68',
744 '72.0.3626.67',
745 '71.0.3578.135',
746 '73.0.3676.1',
747 '73.0.3676.0',
748 '73.0.3674.2',
749 '72.0.3626.66',
750 '71.0.3578.134',
751 '73.0.3674.1',
752 '73.0.3674.0',
753 '72.0.3626.65',
754 '71.0.3578.133',
755 '73.0.3673.2',
756 '73.0.3673.1',
757 '73.0.3673.0',
758 '72.0.3626.64',
759 '71.0.3578.132',
760 '72.0.3626.63',
761 '72.0.3626.62',
762 '72.0.3626.61',
763 '72.0.3626.60',
764 '73.0.3672.1',
765 '73.0.3672.0',
766 '72.0.3626.59',
767 '71.0.3578.131',
768 '73.0.3671.3',
769 '73.0.3671.2',
770 '73.0.3671.1',
771 '73.0.3671.0',
772 '72.0.3626.58',
773 '71.0.3578.130',
774 '73.0.3670.1',
775 '73.0.3670.0',
776 '72.0.3626.57',
777 '71.0.3578.129',
778 '73.0.3669.1',
779 '73.0.3669.0',
780 '72.0.3626.56',
781 '71.0.3578.128',
782 '73.0.3668.2',
783 '73.0.3668.1',
784 '73.0.3668.0',
785 '72.0.3626.55',
786 '71.0.3578.127',
787 '73.0.3667.2',
788 '73.0.3667.1',
789 '73.0.3667.0',
790 '72.0.3626.54',
791 '71.0.3578.126',
792 '73.0.3666.1',
793 '73.0.3666.0',
794 '72.0.3626.53',
795 '71.0.3578.125',
796 '73.0.3665.4',
797 '73.0.3665.3',
798 '72.0.3626.52',
799 '73.0.3665.2',
800 '73.0.3664.4',
801 '73.0.3665.1',
802 '73.0.3665.0',
803 '72.0.3626.51',
804 '71.0.3578.124',
805 '72.0.3626.50',
806 '73.0.3664.3',
807 '73.0.3664.2',
808 '73.0.3664.1',
809 '73.0.3664.0',
810 '73.0.3663.2',
811 '72.0.3626.49',
812 '71.0.3578.123',
813 '73.0.3663.1',
814 '73.0.3663.0',
815 '72.0.3626.48',
816 '71.0.3578.122',
817 '73.0.3662.1',
818 '73.0.3662.0',
819 '72.0.3626.47',
820 '71.0.3578.121',
821 '73.0.3661.1',
822 '72.0.3626.46',
823 '73.0.3661.0',
824 '72.0.3626.45',
825 '71.0.3578.120',
826 '73.0.3660.2',
827 '73.0.3660.1',
828 '73.0.3660.0',
829 '72.0.3626.44',
830 '71.0.3578.119',
831 '73.0.3659.1',
832 '73.0.3659.0',
833 '72.0.3626.43',
834 '71.0.3578.118',
835 '73.0.3658.1',
836 '73.0.3658.0',
837 '72.0.3626.42',
838 '71.0.3578.117',
839 '73.0.3657.1',
840 '73.0.3657.0',
841 '72.0.3626.41',
842 '71.0.3578.116',
843 '73.0.3656.1',
844 '73.0.3656.0',
845 '72.0.3626.40',
846 '71.0.3578.115',
847 '73.0.3655.1',
848 '73.0.3655.0',
849 '72.0.3626.39',
850 '71.0.3578.114',
851 '73.0.3654.1',
852 '73.0.3654.0',
853 '72.0.3626.38',
854 '71.0.3578.113',
855 '73.0.3653.1',
856 '73.0.3653.0',
857 '72.0.3626.37',
858 '71.0.3578.112',
859 '73.0.3652.1',
860 '73.0.3652.0',
861 '72.0.3626.36',
862 '71.0.3578.111',
863 '73.0.3651.1',
864 '73.0.3651.0',
865 '72.0.3626.35',
866 '71.0.3578.110',
867 '73.0.3650.1',
868 '73.0.3650.0',
869 '72.0.3626.34',
870 '71.0.3578.109',
871 '73.0.3649.1',
872 '73.0.3649.0',
873 '72.0.3626.33',
874 '71.0.3578.108',
875 '73.0.3648.2',
876 '73.0.3648.1',
877 '73.0.3648.0',
878 '72.0.3626.32',
879 '71.0.3578.107',
880 '73.0.3647.2',
881 '73.0.3647.1',
882 '73.0.3647.0',
883 '72.0.3626.31',
884 '71.0.3578.106',
885 '73.0.3635.3',
886 '73.0.3646.2',
887 '73.0.3646.1',
888 '73.0.3646.0',
889 '72.0.3626.30',
890 '71.0.3578.105',
891 '72.0.3626.29',
892 '73.0.3645.2',
893 '73.0.3645.1',
894 '73.0.3645.0',
895 '72.0.3626.28',
896 '71.0.3578.104',
897 '72.0.3626.27',
898 '72.0.3626.26',
899 '72.0.3626.25',
900 '72.0.3626.24',
901 '73.0.3644.0',
902 '73.0.3643.2',
903 '72.0.3626.23',
904 '71.0.3578.103',
905 '73.0.3643.1',
906 '73.0.3643.0',
907 '72.0.3626.22',
908 '71.0.3578.102',
909 '73.0.3642.1',
910 '73.0.3642.0',
911 '72.0.3626.21',
912 '71.0.3578.101',
913 '73.0.3641.1',
914 '73.0.3641.0',
915 '72.0.3626.20',
916 '71.0.3578.100',
917 '72.0.3626.19',
918 '73.0.3640.1',
919 '73.0.3640.0',
920 '72.0.3626.18',
921 '73.0.3639.1',
922 '71.0.3578.99',
923 '73.0.3639.0',
924 '72.0.3626.17',
925 '73.0.3638.2',
926 '72.0.3626.16',
927 '73.0.3638.1',
928 '73.0.3638.0',
929 '72.0.3626.15',
930 '71.0.3578.98',
931 '73.0.3635.2',
932 '71.0.3578.97',
933 '73.0.3637.1',
934 '73.0.3637.0',
935 '72.0.3626.14',
936 '71.0.3578.96',
937 '71.0.3578.95',
938 '72.0.3626.13',
939 '71.0.3578.94',
940 '73.0.3636.2',
941 '71.0.3578.93',
942 '73.0.3636.1',
943 '73.0.3636.0',
944 '72.0.3626.12',
945 '71.0.3578.92',
946 '73.0.3635.1',
947 '73.0.3635.0',
948 '72.0.3626.11',
949 '71.0.3578.91',
950 '73.0.3634.2',
951 '73.0.3634.1',
952 '73.0.3634.0',
953 '72.0.3626.10',
954 '71.0.3578.90',
955 '71.0.3578.89',
956 '73.0.3633.2',
957 '73.0.3633.1',
958 '73.0.3633.0',
959 '72.0.3610.4',
960 '72.0.3626.9',
961 '71.0.3578.88',
962 '73.0.3632.5',
963 '73.0.3632.4',
964 '73.0.3632.3',
965 '73.0.3632.2',
966 '73.0.3632.1',
967 '73.0.3632.0',
968 '72.0.3626.8',
969 '71.0.3578.87',
970 '73.0.3631.2',
971 '73.0.3631.1',
972 '73.0.3631.0',
973 '72.0.3626.7',
974 '71.0.3578.86',
975 '72.0.3626.6',
976 '73.0.3630.1',
977 '73.0.3630.0',
978 '72.0.3626.5',
979 '71.0.3578.85',
980 '72.0.3626.4',
981 '73.0.3628.3',
982 '73.0.3628.2',
983 '73.0.3629.1',
984 '73.0.3629.0',
985 '72.0.3626.3',
986 '71.0.3578.84',
987 '73.0.3628.1',
988 '73.0.3628.0',
989 '71.0.3578.83',
990 '73.0.3627.1',
991 '73.0.3627.0',
992 '72.0.3626.2',
993 '71.0.3578.82',
994 '71.0.3578.81',
995 '71.0.3578.80',
996 '72.0.3626.1',
997 '72.0.3626.0',
998 '71.0.3578.79',
999 '70.0.3538.124',
1000 '71.0.3578.78',
1001 '72.0.3623.4',
1002 '72.0.3625.2',
1003 '72.0.3625.1',
1004 '72.0.3625.0',
1005 '71.0.3578.77',
1006 '70.0.3538.123',
1007 '72.0.3624.4',
1008 '72.0.3624.3',
1009 '72.0.3624.2',
1010 '71.0.3578.76',
1011 '72.0.3624.1',
1012 '72.0.3624.0',
1013 '72.0.3623.3',
1014 '71.0.3578.75',
1015 '70.0.3538.122',
1016 '71.0.3578.74',
1017 '72.0.3623.2',
1018 '72.0.3610.3',
1019 '72.0.3623.1',
1020 '72.0.3623.0',
1021 '72.0.3622.3',
1022 '72.0.3622.2',
1023 '71.0.3578.73',
1024 '70.0.3538.121',
1025 '72.0.3622.1',
1026 '72.0.3622.0',
1027 '71.0.3578.72',
1028 '70.0.3538.120',
1029 '72.0.3621.1',
1030 '72.0.3621.0',
1031 '71.0.3578.71',
1032 '70.0.3538.119',
1033 '72.0.3620.1',
1034 '72.0.3620.0',
1035 '71.0.3578.70',
1036 '70.0.3538.118',
1037 '71.0.3578.69',
1038 '72.0.3619.1',
1039 '72.0.3619.0',
1040 '71.0.3578.68',
1041 '70.0.3538.117',
1042 '71.0.3578.67',
1043 '72.0.3618.1',
1044 '72.0.3618.0',
1045 '71.0.3578.66',
1046 '70.0.3538.116',
1047 '72.0.3617.1',
1048 '72.0.3617.0',
1049 '71.0.3578.65',
1050 '70.0.3538.115',
1051 '72.0.3602.3',
1052 '71.0.3578.64',
1053 '72.0.3616.1',
1054 '72.0.3616.0',
1055 '71.0.3578.63',
1056 '70.0.3538.114',
1057 '71.0.3578.62',
1058 '72.0.3615.1',
1059 '72.0.3615.0',
1060 '71.0.3578.61',
1061 '70.0.3538.113',
1062 '72.0.3614.1',
1063 '72.0.3614.0',
1064 '71.0.3578.60',
1065 '70.0.3538.112',
1066 '72.0.3613.1',
1067 '72.0.3613.0',
1068 '71.0.3578.59',
1069 '70.0.3538.111',
1070 '72.0.3612.2',
1071 '72.0.3612.1',
1072 '72.0.3612.0',
1073 '70.0.3538.110',
1074 '71.0.3578.58',
1075 '70.0.3538.109',
1076 '72.0.3611.2',
1077 '72.0.3611.1',
1078 '72.0.3611.0',
1079 '71.0.3578.57',
1080 '70.0.3538.108',
1081 '72.0.3610.2',
1082 '71.0.3578.56',
1083 '71.0.3578.55',
1084 '72.0.3610.1',
1085 '72.0.3610.0',
1086 '71.0.3578.54',
1087 '70.0.3538.107',
1088 '71.0.3578.53',
1089 '72.0.3609.3',
1090 '71.0.3578.52',
1091 '72.0.3609.2',
1092 '71.0.3578.51',
1093 '72.0.3608.5',
1094 '72.0.3609.1',
1095 '72.0.3609.0',
1096 '71.0.3578.50',
1097 '70.0.3538.106',
1098 '72.0.3608.4',
1099 '72.0.3608.3',
1100 '72.0.3608.2',
1101 '71.0.3578.49',
1102 '72.0.3608.1',
1103 '72.0.3608.0',
1104 '70.0.3538.105',
1105 '71.0.3578.48',
1106 '72.0.3607.1',
1107 '72.0.3607.0',
1108 '71.0.3578.47',
1109 '70.0.3538.104',
1110 '72.0.3606.2',
1111 '72.0.3606.1',
1112 '72.0.3606.0',
1113 '71.0.3578.46',
1114 '70.0.3538.103',
1115 '70.0.3538.102',
1116 '72.0.3605.3',
1117 '72.0.3605.2',
1118 '72.0.3605.1',
1119 '72.0.3605.0',
1120 '71.0.3578.45',
1121 '70.0.3538.101',
1122 '71.0.3578.44',
1123 '71.0.3578.43',
1124 '70.0.3538.100',
1125 '70.0.3538.99',
1126 '71.0.3578.42',
1127 '72.0.3604.1',
1128 '72.0.3604.0',
1129 '71.0.3578.41',
1130 '70.0.3538.98',
1131 '71.0.3578.40',
1132 '72.0.3603.2',
1133 '72.0.3603.1',
1134 '72.0.3603.0',
1135 '71.0.3578.39',
1136 '70.0.3538.97',
1137 '72.0.3602.2',
1138 '71.0.3578.38',
1139 '71.0.3578.37',
1140 '72.0.3602.1',
1141 '72.0.3602.0',
1142 '71.0.3578.36',
1143 '70.0.3538.96',
1144 '72.0.3601.1',
1145 '72.0.3601.0',
1146 '71.0.3578.35',
1147 '70.0.3538.95',
1148 '72.0.3600.1',
1149 '72.0.3600.0',
1150 '71.0.3578.34',
1151 '70.0.3538.94',
1152 '72.0.3599.3',
1153 '72.0.3599.2',
1154 '72.0.3599.1',
1155 '72.0.3599.0',
1156 '71.0.3578.33',
1157 '70.0.3538.93',
1158 '72.0.3598.1',
1159 '72.0.3598.0',
1160 '71.0.3578.32',
1161 '70.0.3538.87',
1162 '72.0.3597.1',
1163 '72.0.3597.0',
1164 '72.0.3596.2',
1165 '71.0.3578.31',
1166 '70.0.3538.86',
1167 '71.0.3578.30',
1168 '71.0.3578.29',
1169 '72.0.3596.1',
1170 '72.0.3596.0',
1171 '71.0.3578.28',
1172 '70.0.3538.85',
1173 '72.0.3595.2',
1174 '72.0.3591.3',
1175 '72.0.3595.1',
1176 '72.0.3595.0',
1177 '71.0.3578.27',
1178 '70.0.3538.84',
1179 '72.0.3594.1',
1180 '72.0.3594.0',
1181 '71.0.3578.26',
1182 '70.0.3538.83',
1183 '72.0.3593.2',
1184 '72.0.3593.1',
1185 '72.0.3593.0',
1186 '71.0.3578.25',
1187 '70.0.3538.82',
1188 '72.0.3589.3',
1189 '72.0.3592.2',
1190 '72.0.3592.1',
1191 '72.0.3592.0',
1192 '71.0.3578.24',
1193 '72.0.3589.2',
1194 '70.0.3538.81',
1195 '70.0.3538.80',
1196 '72.0.3591.2',
1197 '72.0.3591.1',
1198 '72.0.3591.0',
1199 '71.0.3578.23',
1200 '70.0.3538.79',
1201 '71.0.3578.22',
1202 '72.0.3590.1',
1203 '72.0.3590.0',
1204 '71.0.3578.21',
1205 '70.0.3538.78',
1206 '70.0.3538.77',
1207 '72.0.3589.1',
1208 '72.0.3589.0',
1209 '71.0.3578.20',
1210 '70.0.3538.76',
1211 '71.0.3578.19',
1212 '70.0.3538.75',
1213 '72.0.3588.1',
1214 '72.0.3588.0',
1215 '71.0.3578.18',
1216 '70.0.3538.74',
1217 '72.0.3586.2',
1218 '72.0.3587.0',
1219 '71.0.3578.17',
1220 '70.0.3538.73',
1221 '72.0.3586.1',
1222 '72.0.3586.0',
1223 '71.0.3578.16',
1224 '70.0.3538.72',
1225 '72.0.3585.1',
1226 '72.0.3585.0',
1227 '71.0.3578.15',
1228 '70.0.3538.71',
1229 '71.0.3578.14',
1230 '72.0.3584.1',
1231 '72.0.3584.0',
1232 '71.0.3578.13',
1233 '70.0.3538.70',
1234 '72.0.3583.2',
1235 '71.0.3578.12',
1236 '72.0.3583.1',
1237 '72.0.3583.0',
1238 '71.0.3578.11',
1239 '70.0.3538.69',
1240 '71.0.3578.10',
1241 '72.0.3582.0',
1242 '72.0.3581.4',
1243 '71.0.3578.9',
1244 '70.0.3538.67',
1245 '72.0.3581.3',
1246 '72.0.3581.2',
1247 '72.0.3581.1',
1248 '72.0.3581.0',
1249 '71.0.3578.8',
1250 '70.0.3538.66',
1251 '72.0.3580.1',
1252 '72.0.3580.0',
1253 '71.0.3578.7',
1254 '70.0.3538.65',
1255 '71.0.3578.6',
1256 '72.0.3579.1',
1257 '72.0.3579.0',
1258 '71.0.3578.5',
1259 '70.0.3538.64',
1260 '71.0.3578.4',
1261 '71.0.3578.3',
1262 '71.0.3578.2',
1263 '71.0.3578.1',
1264 '71.0.3578.0',
1265 '70.0.3538.63',
1266 '69.0.3497.128',
1267 '70.0.3538.62',
1268 '70.0.3538.61',
1269 '70.0.3538.60',
1270 '70.0.3538.59',
1271 '71.0.3577.1',
1272 '71.0.3577.0',
1273 '70.0.3538.58',
1274 '69.0.3497.127',
1275 '71.0.3576.2',
1276 '71.0.3576.1',
1277 '71.0.3576.0',
1278 '70.0.3538.57',
1279 '70.0.3538.56',
1280 '71.0.3575.2',
1281 '70.0.3538.55',
1282 '69.0.3497.126',
1283 '70.0.3538.54',
1284 '71.0.3575.1',
1285 '71.0.3575.0',
1286 '71.0.3574.1',
1287 '71.0.3574.0',
1288 '70.0.3538.53',
1289 '69.0.3497.125',
1290 '70.0.3538.52',
1291 '71.0.3573.1',
1292 '71.0.3573.0',
1293 '70.0.3538.51',
1294 '69.0.3497.124',
1295 '71.0.3572.1',
1296 '71.0.3572.0',
1297 '70.0.3538.50',
1298 '69.0.3497.123',
1299 '71.0.3571.2',
1300 '70.0.3538.49',
1301 '69.0.3497.122',
1302 '71.0.3571.1',
1303 '71.0.3571.0',
1304 '70.0.3538.48',
1305 '69.0.3497.121',
1306 '71.0.3570.1',
1307 '71.0.3570.0',
1308 '70.0.3538.47',
1309 '69.0.3497.120',
1310 '71.0.3568.2',
1311 '71.0.3569.1',
1312 '71.0.3569.0',
1313 '70.0.3538.46',
1314 '69.0.3497.119',
1315 '70.0.3538.45',
1316 '71.0.3568.1',
1317 '71.0.3568.0',
1318 '70.0.3538.44',
1319 '69.0.3497.118',
1320 '70.0.3538.43',
1321 '70.0.3538.42',
1322 '71.0.3567.1',
1323 '71.0.3567.0',
1324 '70.0.3538.41',
1325 '69.0.3497.117',
1326 '71.0.3566.1',
1327 '71.0.3566.0',
1328 '70.0.3538.40',
1329 '69.0.3497.116',
1330 '71.0.3565.1',
1331 '71.0.3565.0',
1332 '70.0.3538.39',
1333 '69.0.3497.115',
1334 '71.0.3564.1',
1335 '71.0.3564.0',
1336 '70.0.3538.38',
1337 '69.0.3497.114',
1338 '71.0.3563.0',
1339 '71.0.3562.2',
1340 '70.0.3538.37',
1341 '69.0.3497.113',
1342 '70.0.3538.36',
1343 '70.0.3538.35',
1344 '71.0.3562.1',
1345 '71.0.3562.0',
1346 '70.0.3538.34',
1347 '69.0.3497.112',
1348 '70.0.3538.33',
1349 '71.0.3561.1',
1350 '71.0.3561.0',
1351 '70.0.3538.32',
1352 '69.0.3497.111',
1353 '71.0.3559.6',
1354 '71.0.3560.1',
1355 '71.0.3560.0',
1356 '71.0.3559.5',
1357 '71.0.3559.4',
1358 '70.0.3538.31',
1359 '69.0.3497.110',
1360 '71.0.3559.3',
1361 '70.0.3538.30',
1362 '69.0.3497.109',
1363 '71.0.3559.2',
1364 '71.0.3559.1',
1365 '71.0.3559.0',
1366 '70.0.3538.29',
1367 '69.0.3497.108',
1368 '71.0.3558.2',
1369 '71.0.3558.1',
1370 '71.0.3558.0',
1371 '70.0.3538.28',
1372 '69.0.3497.107',
1373 '71.0.3557.2',
1374 '71.0.3557.1',
1375 '71.0.3557.0',
1376 '70.0.3538.27',
1377 '69.0.3497.106',
1378 '71.0.3554.4',
1379 '70.0.3538.26',
1380 '71.0.3556.1',
1381 '71.0.3556.0',
1382 '70.0.3538.25',
1383 '71.0.3554.3',
1384 '69.0.3497.105',
1385 '71.0.3554.2',
1386 '70.0.3538.24',
1387 '69.0.3497.104',
1388 '71.0.3555.2',
1389 '70.0.3538.23',
1390 '71.0.3555.1',
1391 '71.0.3555.0',
1392 '70.0.3538.22',
1393 '69.0.3497.103',
1394 '71.0.3554.1',
1395 '71.0.3554.0',
1396 '70.0.3538.21',
1397 '69.0.3497.102',
1398 '71.0.3553.3',
1399 '70.0.3538.20',
1400 '69.0.3497.101',
1401 '71.0.3553.2',
1402 '69.0.3497.100',
1403 '71.0.3553.1',
1404 '71.0.3553.0',
1405 '70.0.3538.19',
1406 '69.0.3497.99',
1407 '69.0.3497.98',
1408 '69.0.3497.97',
1409 '71.0.3552.6',
1410 '71.0.3552.5',
1411 '71.0.3552.4',
1412 '71.0.3552.3',
1413 '71.0.3552.2',
1414 '71.0.3552.1',
1415 '71.0.3552.0',
1416 '70.0.3538.18',
1417 '69.0.3497.96',
1418 '71.0.3551.3',
1419 '71.0.3551.2',
1420 '71.0.3551.1',
1421 '71.0.3551.0',
1422 '70.0.3538.17',
1423 '69.0.3497.95',
1424 '71.0.3550.3',
1425 '71.0.3550.2',
1426 '71.0.3550.1',
1427 '71.0.3550.0',
1428 '70.0.3538.16',
1429 '69.0.3497.94',
1430 '71.0.3549.1',
1431 '71.0.3549.0',
1432 '70.0.3538.15',
1433 '69.0.3497.93',
1434 '69.0.3497.92',
1435 '71.0.3548.1',
1436 '71.0.3548.0',
1437 '70.0.3538.14',
1438 '69.0.3497.91',
1439 '71.0.3547.1',
1440 '71.0.3547.0',
1441 '70.0.3538.13',
1442 '69.0.3497.90',
1443 '71.0.3546.2',
1444 '69.0.3497.89',
1445 '71.0.3546.1',
1446 '71.0.3546.0',
1447 '70.0.3538.12',
1448 '69.0.3497.88',
1449 '71.0.3545.4',
1450 '71.0.3545.3',
1451 '71.0.3545.2',
1452 '71.0.3545.1',
1453 '71.0.3545.0',
1454 '70.0.3538.11',
1455 '69.0.3497.87',
1456 '71.0.3544.5',
1457 '71.0.3544.4',
1458 '71.0.3544.3',
1459 '71.0.3544.2',
1460 '71.0.3544.1',
1461 '71.0.3544.0',
1462 '69.0.3497.86',
1463 '70.0.3538.10',
1464 '69.0.3497.85',
1465 '70.0.3538.9',
1466 '69.0.3497.84',
1467 '71.0.3543.4',
1468 '70.0.3538.8',
1469 '71.0.3543.3',
1470 '71.0.3543.2',
1471 '71.0.3543.1',
1472 '71.0.3543.0',
1473 '70.0.3538.7',
1474 '69.0.3497.83',
1475 '71.0.3542.2',
1476 '71.0.3542.1',
1477 '71.0.3542.0',
1478 '70.0.3538.6',
1479 '69.0.3497.82',
1480 '69.0.3497.81',
1481 '71.0.3541.1',
1482 '71.0.3541.0',
1483 '70.0.3538.5',
1484 '69.0.3497.80',
1485 '71.0.3540.1',
1486 '71.0.3540.0',
1487 '70.0.3538.4',
1488 '69.0.3497.79',
1489 '70.0.3538.3',
1490 '71.0.3539.1',
1491 '71.0.3539.0',
1492 '69.0.3497.78',
1493 '68.0.3440.134',
1494 '69.0.3497.77',
1495 '70.0.3538.2',
1496 '70.0.3538.1',
1497 '70.0.3538.0',
1498 '69.0.3497.76',
1499 '68.0.3440.133',
1500 '69.0.3497.75',
1501 '70.0.3537.2',
1502 '70.0.3537.1',
1503 '70.0.3537.0',
1504 '69.0.3497.74',
1505 '68.0.3440.132',
1506 '70.0.3536.0',
1507 '70.0.3535.5',
1508 '70.0.3535.4',
1509 '70.0.3535.3',
1510 '69.0.3497.73',
1511 '68.0.3440.131',
1512 '70.0.3532.8',
1513 '70.0.3532.7',
1514 '69.0.3497.72',
1515 '69.0.3497.71',
1516 '70.0.3535.2',
1517 '70.0.3535.1',
1518 '70.0.3535.0',
1519 '69.0.3497.70',
1520 '68.0.3440.130',
1521 '69.0.3497.69',
1522 '68.0.3440.129',
1523 '70.0.3534.4',
1524 '70.0.3534.3',
1525 '70.0.3534.2',
1526 '70.0.3534.1',
1527 '70.0.3534.0',
1528 '69.0.3497.68',
1529 '68.0.3440.128',
1530 '70.0.3533.2',
1531 '70.0.3533.1',
1532 '70.0.3533.0',
1533 '69.0.3497.67',
1534 '68.0.3440.127',
1535 '70.0.3532.6',
1536 '70.0.3532.5',
1537 '70.0.3532.4',
1538 '69.0.3497.66',
1539 '68.0.3440.126',
1540 '70.0.3532.3',
1541 '70.0.3532.2',
1542 '70.0.3532.1',
1543 '69.0.3497.60',
1544 '69.0.3497.65',
1545 '69.0.3497.64',
1546 '70.0.3532.0',
1547 '70.0.3531.0',
1548 '70.0.3530.4',
1549 '70.0.3530.3',
1550 '70.0.3530.2',
1551 '69.0.3497.58',
1552 '68.0.3440.125',
1553 '69.0.3497.57',
1554 '69.0.3497.56',
1555 '69.0.3497.55',
1556 '69.0.3497.54',
1557 '70.0.3530.1',
1558 '70.0.3530.0',
1559 '69.0.3497.53',
1560 '68.0.3440.124',
1561 '69.0.3497.52',
1562 '70.0.3529.3',
1563 '70.0.3529.2',
1564 '70.0.3529.1',
1565 '70.0.3529.0',
1566 '69.0.3497.51',
1567 '70.0.3528.4',
1568 '68.0.3440.123',
1569 '70.0.3528.3',
1570 '70.0.3528.2',
1571 '70.0.3528.1',
1572 '70.0.3528.0',
1573 '69.0.3497.50',
1574 '68.0.3440.122',
1575 '70.0.3527.1',
1576 '70.0.3527.0',
1577 '69.0.3497.49',
1578 '68.0.3440.121',
1579 '70.0.3526.1',
1580 '70.0.3526.0',
1581 '68.0.3440.120',
1582 '69.0.3497.48',
1583 '69.0.3497.47',
1584 '68.0.3440.119',
1585 '68.0.3440.118',
1586 '70.0.3525.5',
1587 '70.0.3525.4',
1588 '70.0.3525.3',
1589 '68.0.3440.117',
1590 '69.0.3497.46',
1591 '70.0.3525.2',
1592 '70.0.3525.1',
1593 '70.0.3525.0',
1594 '69.0.3497.45',
1595 '68.0.3440.116',
1596 '70.0.3524.4',
1597 '70.0.3524.3',
1598 '69.0.3497.44',
1599 '70.0.3524.2',
1600 '70.0.3524.1',
1601 '70.0.3524.0',
1602 '70.0.3523.2',
1603 '69.0.3497.43',
1604 '68.0.3440.115',
1605 '70.0.3505.9',
1606 '69.0.3497.42',
1607 '70.0.3505.8',
1608 '70.0.3523.1',
1609 '70.0.3523.0',
1610 '69.0.3497.41',
1611 '68.0.3440.114',
1612 '70.0.3505.7',
1613 '69.0.3497.40',
1614 '70.0.3522.1',
1615 '70.0.3522.0',
1616 '70.0.3521.2',
1617 '69.0.3497.39',
1618 '68.0.3440.113',
1619 '70.0.3505.6',
1620 '70.0.3521.1',
1621 '70.0.3521.0',
1622 '69.0.3497.38',
1623 '68.0.3440.112',
1624 '70.0.3520.1',
1625 '70.0.3520.0',
1626 '69.0.3497.37',
1627 '68.0.3440.111',
1628 '70.0.3519.3',
1629 '70.0.3519.2',
1630 '70.0.3519.1',
1631 '70.0.3519.0',
1632 '69.0.3497.36',
1633 '68.0.3440.110',
1634 '70.0.3518.1',
1635 '70.0.3518.0',
1636 '69.0.3497.35',
1637 '69.0.3497.34',
1638 '68.0.3440.109',
1639 '70.0.3517.1',
1640 '70.0.3517.0',
1641 '69.0.3497.33',
1642 '68.0.3440.108',
1643 '69.0.3497.32',
1644 '70.0.3516.3',
1645 '70.0.3516.2',
1646 '70.0.3516.1',
1647 '70.0.3516.0',
1648 '69.0.3497.31',
1649 '68.0.3440.107',
1650 '70.0.3515.4',
1651 '68.0.3440.106',
1652 '70.0.3515.3',
1653 '70.0.3515.2',
1654 '70.0.3515.1',
1655 '70.0.3515.0',
1656 '69.0.3497.30',
1657 '68.0.3440.105',
1658 '68.0.3440.104',
1659 '70.0.3514.2',
1660 '70.0.3514.1',
1661 '70.0.3514.0',
1662 '69.0.3497.29',
1663 '68.0.3440.103',
1664 '70.0.3513.1',
1665 '70.0.3513.0',
1666 '69.0.3497.28',
1667 )
1668 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1669
1670
1671 std_headers = {
1672 'User-Agent': random_user_agent(),
1673 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1674 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1675 'Accept-Encoding': 'gzip, deflate',
1676 'Accept-Language': 'en-us,en;q=0.5',
1677 }
1678
1679
1680 USER_AGENTS = {
1681 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1682 }
1683
1684
1685 NO_DEFAULT = object()
1686
1687 ENGLISH_MONTH_NAMES = [
1688 'January', 'February', 'March', 'April', 'May', 'June',
1689 'July', 'August', 'September', 'October', 'November', 'December']
1690
1691 MONTH_NAMES = {
1692 'en': ENGLISH_MONTH_NAMES,
1693 'fr': [
1694 'janvier', 'fƩvrier', 'mars', 'avril', 'mai', 'juin',
1695 'juillet', 'aoƻt', 'septembre', 'octobre', 'novembre', 'dƩcembre'],
1696 }
1697
1698 KNOWN_EXTENSIONS = (
1699 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1700 'flv', 'f4v', 'f4a', 'f4b',
1701 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1702 'mkv', 'mka', 'mk3d',
1703 'avi', 'divx',
1704 'mov',
1705 'asf', 'wmv', 'wma',
1706 '3gp', '3g2',
1707 'mp3',
1708 'flac',
1709 'ape',
1710 'wav',
1711 'f4f', 'f4m', 'm3u8', 'smil')
1712
1713 # needed for sanitizing filenames in restricted mode
1714 ACCENT_CHARS = dict(zip('Ć‚ĆƒĆ„Ć€ĆĆ…Ć†Ć‡ĆˆĆ‰ĆŠĆ‹ĆŒĆĆŽĆĆĆ‘Ć’Ć“Ć”Ć•Ć–ÅĆ˜Å’Ć™ĆšĆ›ĆœÅ°ĆĆžĆŸĆ Ć”Ć¢Ć£Ć¤Ć„Ć¦Ć§ĆØĆ©ĆŖƫƬƭƮĆÆĆ°Ć±Ć²Ć³Ć“ĆµĆ¶Å‘ĆøÅ“Ć¹ĆŗĆ»Ć¼Å±Ć½Ć¾Ćæ',
1715 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1716 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1717
1718 DATE_FORMATS = (
1719 '%d %B %Y',
1720 '%d %b %Y',
1721 '%B %d %Y',
1722 '%B %dst %Y',
1723 '%B %dnd %Y',
1724 '%B %drd %Y',
1725 '%B %dth %Y',
1726 '%b %d %Y',
1727 '%b %dst %Y',
1728 '%b %dnd %Y',
1729 '%b %drd %Y',
1730 '%b %dth %Y',
1731 '%b %dst %Y %I:%M',
1732 '%b %dnd %Y %I:%M',
1733 '%b %drd %Y %I:%M',
1734 '%b %dth %Y %I:%M',
1735 '%Y %m %d',
1736 '%Y-%m-%d',
1737 '%Y/%m/%d',
1738 '%Y/%m/%d %H:%M',
1739 '%Y/%m/%d %H:%M:%S',
1740 '%Y-%m-%d %H:%M',
1741 '%Y-%m-%d %H:%M:%S',
1742 '%Y-%m-%d %H:%M:%S.%f',
1743 '%d.%m.%Y %H:%M',
1744 '%d.%m.%Y %H.%M',
1745 '%Y-%m-%dT%H:%M:%SZ',
1746 '%Y-%m-%dT%H:%M:%S.%fZ',
1747 '%Y-%m-%dT%H:%M:%S.%f0Z',
1748 '%Y-%m-%dT%H:%M:%S',
1749 '%Y-%m-%dT%H:%M:%S.%f',
1750 '%Y-%m-%dT%H:%M',
1751 '%b %d %Y at %H:%M',
1752 '%b %d %Y at %H:%M:%S',
1753 '%B %d %Y at %H:%M',
1754 '%B %d %Y at %H:%M:%S',
1755 )
1756
1757 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1758 DATE_FORMATS_DAY_FIRST.extend([
1759 '%d-%m-%Y',
1760 '%d.%m.%Y',
1761 '%d.%m.%y',
1762 '%d/%m/%Y',
1763 '%d/%m/%y',
1764 '%d/%m/%Y %H:%M:%S',
1765 ])
1766
1767 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1768 DATE_FORMATS_MONTH_FIRST.extend([
1769 '%m-%d-%Y',
1770 '%m.%d.%Y',
1771 '%m/%d/%Y',
1772 '%m/%d/%y',
1773 '%m/%d/%Y %H:%M:%S',
1774 ])
1775
1776 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1777 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1778
1779
1780 def preferredencoding():
1781 """Get preferred encoding.
1782
1783 Returns the best encoding scheme for the system, based on
1784 locale.getpreferredencoding() and some further tweaks.
1785 """
1786 try:
1787 pref = locale.getpreferredencoding()
1788 'TEST'.encode(pref)
1789 except Exception:
1790 pref = 'UTF-8'
1791
1792 return pref
1793
1794
1795 def write_json_file(obj, fn):
1796 """ Encode obj as JSON and write it to fn, atomically if possible """
1797
1798 fn = encodeFilename(fn)
1799 if sys.version_info < (3, 0) and sys.platform != 'win32':
1800 encoding = get_filesystem_encoding()
1801 # os.path.basename returns a bytes object, but NamedTemporaryFile
1802 # will fail if the filename contains non ascii characters unless we
1803 # use a unicode object
1804 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1805 # the same for os.path.dirname
1806 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1807 else:
1808 path_basename = os.path.basename
1809 path_dirname = os.path.dirname
1810
1811 args = {
1812 'suffix': '.tmp',
1813 'prefix': path_basename(fn) + '.',
1814 'dir': path_dirname(fn),
1815 'delete': False,
1816 }
1817
1818 # In Python 2.x, json.dump expects a bytestream.
1819 # In Python 3.x, it writes to a character stream
1820 if sys.version_info < (3, 0):
1821 args['mode'] = 'wb'
1822 else:
1823 args.update({
1824 'mode': 'w',
1825 'encoding': 'utf-8',
1826 })
1827
1828 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1829
1830 try:
1831 with tf:
1832 json.dump(obj, tf)
1833 if sys.platform == 'win32':
1834 # Need to remove existing file on Windows, else os.rename raises
1835 # WindowsError or FileExistsError.
1836 try:
1837 os.unlink(fn)
1838 except OSError:
1839 pass
1840 os.rename(tf.name, fn)
1841 except Exception:
1842 try:
1843 os.remove(tf.name)
1844 except OSError:
1845 pass
1846 raise
1847
1848
1849 if sys.version_info >= (2, 7):
1850 def find_xpath_attr(node, xpath, key, val=None):
1851 """ Find the xpath xpath[@key=val] """
1852 assert re.match(r'^[a-zA-Z_-]+$', key)
1853 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1854 return node.find(expr)
1855 else:
1856 def find_xpath_attr(node, xpath, key, val=None):
1857 for f in node.findall(compat_xpath(xpath)):
1858 if key not in f.attrib:
1859 continue
1860 if val is None or f.attrib.get(key) == val:
1861 return f
1862 return None
1863
1864 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1865 # the namespace parameter
1866
1867
1868 def xpath_with_ns(path, ns_map):
1869 components = [c.split(':') for c in path.split('/')]
1870 replaced = []
1871 for c in components:
1872 if len(c) == 1:
1873 replaced.append(c[0])
1874 else:
1875 ns, tag = c
1876 replaced.append('{%s}%s' % (ns_map[ns], tag))
1877 return '/'.join(replaced)
1878
1879
1880 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1881 def _find_xpath(xpath):
1882 return node.find(compat_xpath(xpath))
1883
1884 if isinstance(xpath, (str, compat_str)):
1885 n = _find_xpath(xpath)
1886 else:
1887 for xp in xpath:
1888 n = _find_xpath(xp)
1889 if n is not None:
1890 break
1891
1892 if n is None:
1893 if default is not NO_DEFAULT:
1894 return default
1895 elif fatal:
1896 name = xpath if name is None else name
1897 raise ExtractorError('Could not find XML element %s' % name)
1898 else:
1899 return None
1900 return n
1901
1902
1903 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1904 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1905 if n is None or n == default:
1906 return n
1907 if n.text is None:
1908 if default is not NO_DEFAULT:
1909 return default
1910 elif fatal:
1911 name = xpath if name is None else name
1912 raise ExtractorError('Could not find XML element\'s text %s' % name)
1913 else:
1914 return None
1915 return n.text
1916
1917
1918 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1919 n = find_xpath_attr(node, xpath, key)
1920 if n is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = '%s[@%s]' % (xpath, key) if name is None else name
1925 raise ExtractorError('Could not find XML attribute %s' % name)
1926 else:
1927 return None
1928 return n.attrib[key]
1929
1930
1931 def get_element_by_id(id, html):
1932 """Return the content of the tag with the specified ID in the passed HTML document"""
1933 return get_element_by_attribute('id', id, html)
1934
1935
1936 def get_element_by_class(class_name, html):
1937 """Return the content of the first tag with the specified class in the passed HTML document"""
1938 retval = get_elements_by_class(class_name, html)
1939 return retval[0] if retval else None
1940
1941
1942 def get_element_by_attribute(attribute, value, html, escape_value=True):
1943 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1944 return retval[0] if retval else None
1945
1946
1947 def get_elements_by_class(class_name, html):
1948 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1949 return get_elements_by_attribute(
1950 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1951 html, escape_value=False)
1952
1953
1954 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1955 """Return the content of the tag with the specified attribute in the passed HTML document"""
1956
1957 value = re.escape(value) if escape_value else value
1958
1959 retlist = []
1960 for m in re.finditer(r'''(?xs)
1961 <([a-zA-Z0-9:._-]+)
1962 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1963 \s+%s=['"]?%s['"]?
1964 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1965 \s*>
1966 (?P<content>.*?)
1967 </\1>
1968 ''' % (re.escape(attribute), value), html):
1969 res = m.group('content')
1970
1971 if res.startswith('"') or res.startswith("'"):
1972 res = res[1:-1]
1973
1974 retlist.append(unescapeHTML(res))
1975
1976 return retlist
1977
1978
1979 class HTMLAttributeParser(compat_HTMLParser):
1980 """Trivial HTML parser to gather the attributes for a single element"""
1981 def __init__(self):
1982 self.attrs = {}
1983 compat_HTMLParser.__init__(self)
1984
1985 def handle_starttag(self, tag, attrs):
1986 self.attrs = dict(attrs)
1987
1988
1989 def extract_attributes(html_element):
1990 """Given a string for an HTML element such as
1991 <el
1992 a="foo" B="bar" c="&98;az" d=boz
1993 empty= noval entity="&amp;"
1994 sq='"' dq="'"
1995 >
1996 Decode and return a dictionary of attributes.
1997 {
1998 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
1999 'empty': '', 'noval': None, 'entity': '&',
2000 'sq': '"', 'dq': '\''
2001 }.
2002 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2003 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2004 """
2005 parser = HTMLAttributeParser()
2006 try:
2007 parser.feed(html_element)
2008 parser.close()
2009 # Older Python may throw HTMLParseError in case of malformed HTML
2010 except compat_HTMLParseError:
2011 pass
2012 return parser.attrs
2013
2014
2015 def clean_html(html):
2016 """Clean an HTML snippet into a readable string"""
2017
2018 if html is None: # Convenience for sanitizing descriptions etc.
2019 return html
2020
2021 # Newline vs <br />
2022 html = html.replace('\n', ' ')
2023 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2024 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2025 # Strip html tags
2026 html = re.sub('<.*?>', '', html)
2027 # Replace html entities
2028 html = unescapeHTML(html)
2029 return html.strip()
2030
2031
2032 def sanitize_open(filename, open_mode):
2033 """Try to open the given filename, and slightly tweak it if this fails.
2034
2035 Attempts to open the given filename. If this fails, it tries to change
2036 the filename slightly, step by step, until it's either able to open it
2037 or it fails and raises a final exception, like the standard open()
2038 function.
2039
2040 It returns the tuple (stream, definitive_file_name).
2041 """
2042 try:
2043 if filename == '-':
2044 if sys.platform == 'win32':
2045 import msvcrt
2046 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2047 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2048 stream = open(encodeFilename(filename), open_mode)
2049 return (stream, filename)
2050 except (IOError, OSError) as err:
2051 if err.errno in (errno.EACCES,):
2052 raise
2053
2054 # In case of error, try to remove win32 forbidden chars
2055 alt_filename = sanitize_path(filename)
2056 if alt_filename == filename:
2057 raise
2058 else:
2059 # An exception here should be caught in the caller
2060 stream = open(encodeFilename(alt_filename), open_mode)
2061 return (stream, alt_filename)
2062
2063
2064 def timeconvert(timestr):
2065 """Convert RFC 2822 defined time string into system timestamp"""
2066 timestamp = None
2067 timetuple = email.utils.parsedate_tz(timestr)
2068 if timetuple is not None:
2069 timestamp = email.utils.mktime_tz(timetuple)
2070 return timestamp
2071
2072
2073 def sanitize_filename(s, restricted=False, is_id=False):
2074 """Sanitizes a string so it could be used as part of a filename.
2075 If restricted is set, use a stricter subset of allowed characters.
2076 Set is_id if this is not an arbitrary string, but an ID that should be kept
2077 if possible.
2078 """
2079 def replace_insane(char):
2080 if restricted and char in ACCENT_CHARS:
2081 return ACCENT_CHARS[char]
2082 if char == '?' or ord(char) < 32 or ord(char) == 127:
2083 return ''
2084 elif char == '"':
2085 return '' if restricted else '\''
2086 elif char == ':':
2087 return '_-' if restricted else ' -'
2088 elif char in '\\/|*<>':
2089 return '_'
2090 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2091 return '_'
2092 if restricted and ord(char) > 127:
2093 return '_'
2094 return char
2095
2096 # Handle timestamps
2097 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2098 result = ''.join(map(replace_insane, s))
2099 if not is_id:
2100 while '__' in result:
2101 result = result.replace('__', '_')
2102 result = result.strip('_')
2103 # Common case of "Foreign band name - English song title"
2104 if restricted and result.startswith('-_'):
2105 result = result[2:]
2106 if result.startswith('-'):
2107 result = '_' + result[len('-'):]
2108 result = result.lstrip('.')
2109 if not result:
2110 result = '_'
2111 return result
2112
2113
2114 def sanitize_path(s):
2115 """Sanitizes and normalizes path on Windows"""
2116 if sys.platform != 'win32':
2117 return s
2118 drive_or_unc, _ = os.path.splitdrive(s)
2119 if sys.version_info < (2, 7) and not drive_or_unc:
2120 drive_or_unc, _ = os.path.splitunc(s)
2121 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2122 if drive_or_unc:
2123 norm_path.pop(0)
2124 sanitized_path = [
2125 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2126 for path_part in norm_path]
2127 if drive_or_unc:
2128 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2129 return os.path.join(*sanitized_path)
2130
2131
2132 def sanitize_url(url):
2133 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2134 # the number of unwanted failures due to missing protocol
2135 if url.startswith('//'):
2136 return 'http:%s' % url
2137 # Fix some common typos seen so far
2138 COMMON_TYPOS = (
2139 # https://github.com/ytdl-org/youtube-dl/issues/15649
2140 (r'^httpss://', r'https://'),
2141 # https://bx1.be/lives/direct-tv/
2142 (r'^rmtp([es]?)://', r'rtmp\1://'),
2143 )
2144 for mistake, fixup in COMMON_TYPOS:
2145 if re.match(mistake, url):
2146 return re.sub(mistake, fixup, url)
2147 return url
2148
2149
2150 def sanitized_Request(url, *args, **kwargs):
2151 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2152
2153
2154 def expand_path(s):
2155 """Expand shell variables and ~"""
2156 return os.path.expandvars(compat_expanduser(s))
2157
2158
2159 def orderedSet(iterable):
2160 """ Remove all duplicates from the input iterable """
2161 res = []
2162 for el in iterable:
2163 if el not in res:
2164 res.append(el)
2165 return res
2166
2167
2168 def _htmlentity_transform(entity_with_semicolon):
2169 """Transforms an HTML entity to a character."""
2170 entity = entity_with_semicolon[:-1]
2171
2172 # Known non-numeric HTML entity
2173 if entity in compat_html_entities.name2codepoint:
2174 return compat_chr(compat_html_entities.name2codepoint[entity])
2175
2176 # TODO: HTML5 allows entities without a semicolon. For example,
2177 # '&Eacuteric' should be decoded as 'Ɖric'.
2178 if entity_with_semicolon in compat_html_entities_html5:
2179 return compat_html_entities_html5[entity_with_semicolon]
2180
2181 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2182 if mobj is not None:
2183 numstr = mobj.group(1)
2184 if numstr.startswith('x'):
2185 base = 16
2186 numstr = '0%s' % numstr
2187 else:
2188 base = 10
2189 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2190 try:
2191 return compat_chr(int(numstr, base))
2192 except ValueError:
2193 pass
2194
2195 # Unknown entity in name, return its literal representation
2196 return '&%s;' % entity
2197
2198
2199 def unescapeHTML(s):
2200 if s is None:
2201 return None
2202 assert type(s) == compat_str
2203
2204 return re.sub(
2205 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2206
2207
2208 def get_subprocess_encoding():
2209 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2210 # For subprocess calls, encode with locale encoding
2211 # Refer to http://stackoverflow.com/a/9951851/35070
2212 encoding = preferredencoding()
2213 else:
2214 encoding = sys.getfilesystemencoding()
2215 if encoding is None:
2216 encoding = 'utf-8'
2217 return encoding
2218
2219
2220 def encodeFilename(s, for_subprocess=False):
2221 """
2222 @param s The name of the file
2223 """
2224
2225 assert type(s) == compat_str
2226
2227 # Python 3 has a Unicode API
2228 if sys.version_info >= (3, 0):
2229 return s
2230
2231 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2232 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2233 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2234 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2235 return s
2236
2237 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2238 if sys.platform.startswith('java'):
2239 return s
2240
2241 return s.encode(get_subprocess_encoding(), 'ignore')
2242
2243
2244 def decodeFilename(b, for_subprocess=False):
2245
2246 if sys.version_info >= (3, 0):
2247 return b
2248
2249 if not isinstance(b, bytes):
2250 return b
2251
2252 return b.decode(get_subprocess_encoding(), 'ignore')
2253
2254
2255 def encodeArgument(s):
2256 if not isinstance(s, compat_str):
2257 # Legacy code that uses byte strings
2258 # Uncomment the following line after fixing all post processors
2259 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2260 s = s.decode('ascii')
2261 return encodeFilename(s, True)
2262
2263
2264 def decodeArgument(b):
2265 return decodeFilename(b, True)
2266
2267
2268 def decodeOption(optval):
2269 if optval is None:
2270 return optval
2271 if isinstance(optval, bytes):
2272 optval = optval.decode(preferredencoding())
2273
2274 assert isinstance(optval, compat_str)
2275 return optval
2276
2277
2278 def formatSeconds(secs):
2279 if secs > 3600:
2280 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2281 elif secs > 60:
2282 return '%d:%02d' % (secs // 60, secs % 60)
2283 else:
2284 return '%d' % secs
2285
2286
2287 def make_HTTPS_handler(params, **kwargs):
2288 opts_no_check_certificate = params.get('nocheckcertificate', False)
2289 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2290 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2291 if opts_no_check_certificate:
2292 context.check_hostname = False
2293 context.verify_mode = ssl.CERT_NONE
2294 try:
2295 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2296 except TypeError:
2297 # Python 2.7.8
2298 # (create_default_context present but HTTPSHandler has no context=)
2299 pass
2300
2301 if sys.version_info < (3, 2):
2302 return YoutubeDLHTTPSHandler(params, **kwargs)
2303 else: # Python < 3.4
2304 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2305 context.verify_mode = (ssl.CERT_NONE
2306 if opts_no_check_certificate
2307 else ssl.CERT_REQUIRED)
2308 context.set_default_verify_paths()
2309 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2310
2311
2312 def bug_reports_message():
2313 if ytdl_is_updateable():
2314 update_cmd = 'type youtube-dl -U to update'
2315 else:
2316 update_cmd = 'see https://yt-dl.org/update on how to update'
2317 msg = '; please report this issue on https://yt-dl.org/bug .'
2318 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2319 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2320 return msg
2321
2322
2323 class YoutubeDLError(Exception):
2324 """Base exception for YoutubeDL errors."""
2325 pass
2326
2327
2328 class ExtractorError(YoutubeDLError):
2329 """Error during info extraction."""
2330
2331 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2332 """ tb, if given, is the original traceback (so that it can be printed out).
2333 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2334 """
2335
2336 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2337 expected = True
2338 if video_id is not None:
2339 msg = video_id + ': ' + msg
2340 if cause:
2341 msg += ' (caused by %r)' % cause
2342 if not expected:
2343 msg += bug_reports_message()
2344 super(ExtractorError, self).__init__(msg)
2345
2346 self.traceback = tb
2347 self.exc_info = sys.exc_info() # preserve original exception
2348 self.cause = cause
2349 self.video_id = video_id
2350
2351 def format_traceback(self):
2352 if self.traceback is None:
2353 return None
2354 return ''.join(traceback.format_tb(self.traceback))
2355
2356
2357 class UnsupportedError(ExtractorError):
2358 def __init__(self, url):
2359 super(UnsupportedError, self).__init__(
2360 'Unsupported URL: %s' % url, expected=True)
2361 self.url = url
2362
2363
2364 class RegexNotFoundError(ExtractorError):
2365 """Error when a regex didn't match"""
2366 pass
2367
2368
2369 class GeoRestrictedError(ExtractorError):
2370 """Geographic restriction Error exception.
2371
2372 This exception may be thrown when a video is not available from your
2373 geographic location due to geographic restrictions imposed by a website.
2374 """
2375 def __init__(self, msg, countries=None):
2376 super(GeoRestrictedError, self).__init__(msg, expected=True)
2377 self.msg = msg
2378 self.countries = countries
2379
2380
2381 class DownloadError(YoutubeDLError):
2382 """Download Error exception.
2383
2384 This exception may be thrown by FileDownloader objects if they are not
2385 configured to continue on errors. They will contain the appropriate
2386 error message.
2387 """
2388
2389 def __init__(self, msg, exc_info=None):
2390 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2391 super(DownloadError, self).__init__(msg)
2392 self.exc_info = exc_info
2393
2394
2395 class SameFileError(YoutubeDLError):
2396 """Same File exception.
2397
2398 This exception will be thrown by FileDownloader objects if they detect
2399 multiple files would have to be downloaded to the same file on disk.
2400 """
2401 pass
2402
2403
2404 class PostProcessingError(YoutubeDLError):
2405 """Post Processing exception.
2406
2407 This exception may be raised by PostProcessor's .run() method to
2408 indicate an error in the postprocessing task.
2409 """
2410
2411 def __init__(self, msg):
2412 super(PostProcessingError, self).__init__(msg)
2413 self.msg = msg
2414
2415
2416 class MaxDownloadsReached(YoutubeDLError):
2417 """ --max-downloads limit has been reached. """
2418 pass
2419
2420
2421 class UnavailableVideoError(YoutubeDLError):
2422 """Unavailable Format exception.
2423
2424 This exception will be thrown when a video is requested
2425 in a format that is not available for that video.
2426 """
2427 pass
2428
2429
2430 class ContentTooShortError(YoutubeDLError):
2431 """Content Too Short exception.
2432
2433 This exception may be raised by FileDownloader objects when a file they
2434 download is too small for what the server announced first, indicating
2435 the connection was probably interrupted.
2436 """
2437
2438 def __init__(self, downloaded, expected):
2439 super(ContentTooShortError, self).__init__(
2440 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2441 )
2442 # Both in bytes
2443 self.downloaded = downloaded
2444 self.expected = expected
2445
2446
2447 class XAttrMetadataError(YoutubeDLError):
2448 def __init__(self, code=None, msg='Unknown error'):
2449 super(XAttrMetadataError, self).__init__(msg)
2450 self.code = code
2451 self.msg = msg
2452
2453 # Parsing code and msg
2454 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2455 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2456 self.reason = 'NO_SPACE'
2457 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2458 self.reason = 'VALUE_TOO_LONG'
2459 else:
2460 self.reason = 'NOT_SUPPORTED'
2461
2462
2463 class XAttrUnavailableError(YoutubeDLError):
2464 pass
2465
2466
2467 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2468 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2469 # expected HTTP responses to meet HTTP/1.0 or later (see also
2470 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2471 if sys.version_info < (3, 0):
2472 kwargs['strict'] = True
2473 hc = http_class(*args, **compat_kwargs(kwargs))
2474 source_address = ydl_handler._params.get('source_address')
2475
2476 if source_address is not None:
2477 # This is to workaround _create_connection() from socket where it will try all
2478 # address data from getaddrinfo() including IPv6. This filters the result from
2479 # getaddrinfo() based on the source_address value.
2480 # This is based on the cpython socket.create_connection() function.
2481 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2482 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2483 host, port = address
2484 err = None
2485 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2486 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2487 ip_addrs = [addr for addr in addrs if addr[0] == af]
2488 if addrs and not ip_addrs:
2489 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2490 raise socket.error(
2491 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2492 % (ip_version, source_address[0]))
2493 for res in ip_addrs:
2494 af, socktype, proto, canonname, sa = res
2495 sock = None
2496 try:
2497 sock = socket.socket(af, socktype, proto)
2498 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2499 sock.settimeout(timeout)
2500 sock.bind(source_address)
2501 sock.connect(sa)
2502 err = None # Explicitly break reference cycle
2503 return sock
2504 except socket.error as _:
2505 err = _
2506 if sock is not None:
2507 sock.close()
2508 if err is not None:
2509 raise err
2510 else:
2511 raise socket.error('getaddrinfo returns an empty list')
2512 if hasattr(hc, '_create_connection'):
2513 hc._create_connection = _create_connection
2514 sa = (source_address, 0)
2515 if hasattr(hc, 'source_address'): # Python 2.7+
2516 hc.source_address = sa
2517 else: # Python 2.6
2518 def _hc_connect(self, *args, **kwargs):
2519 sock = _create_connection(
2520 (self.host, self.port), self.timeout, sa)
2521 if is_https:
2522 self.sock = ssl.wrap_socket(
2523 sock, self.key_file, self.cert_file,
2524 ssl_version=ssl.PROTOCOL_TLSv1)
2525 else:
2526 self.sock = sock
2527 hc.connect = functools.partial(_hc_connect, hc)
2528
2529 return hc
2530
2531
2532 def handle_youtubedl_headers(headers):
2533 filtered_headers = headers
2534
2535 if 'Youtubedl-no-compression' in filtered_headers:
2536 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2537 del filtered_headers['Youtubedl-no-compression']
2538
2539 return filtered_headers
2540
2541
2542 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2543 """Handler for HTTP requests and responses.
2544
2545 This class, when installed with an OpenerDirector, automatically adds
2546 the standard headers to every HTTP request and handles gzipped and
2547 deflated responses from web servers. If compression is to be avoided in
2548 a particular request, the original request in the program code only has
2549 to include the HTTP header "Youtubedl-no-compression", which will be
2550 removed before making the real request.
2551
2552 Part of this code was copied from:
2553
2554 http://techknack.net/python-urllib2-handlers/
2555
2556 Andrew Rowls, the author of that code, agreed to release it to the
2557 public domain.
2558 """
2559
2560 def __init__(self, params, *args, **kwargs):
2561 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2562 self._params = params
2563
2564 def http_open(self, req):
2565 conn_class = compat_http_client.HTTPConnection
2566
2567 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2568 if socks_proxy:
2569 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2570 del req.headers['Ytdl-socks-proxy']
2571
2572 return self.do_open(functools.partial(
2573 _create_http_connection, self, conn_class, False),
2574 req)
2575
2576 @staticmethod
2577 def deflate(data):
2578 try:
2579 return zlib.decompress(data, -zlib.MAX_WBITS)
2580 except zlib.error:
2581 return zlib.decompress(data)
2582
2583 def http_request(self, req):
2584 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2585 # always respected by websites, some tend to give out URLs with non percent-encoded
2586 # non-ASCII characters (see telemb.py, ard.py [#3412])
2587 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2588 # To work around aforementioned issue we will replace request's original URL with
2589 # percent-encoded one
2590 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2591 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2592 url = req.get_full_url()
2593 url_escaped = escape_url(url)
2594
2595 # Substitute URL if any change after escaping
2596 if url != url_escaped:
2597 req = update_Request(req, url=url_escaped)
2598
2599 for h, v in std_headers.items():
2600 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2601 # The dict keys are capitalized because of this bug by urllib
2602 if h.capitalize() not in req.headers:
2603 req.add_header(h, v)
2604
2605 req.headers = handle_youtubedl_headers(req.headers)
2606
2607 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2608 # Python 2.6 is brain-dead when it comes to fragments
2609 req._Request__original = req._Request__original.partition('#')[0]
2610 req._Request__r_type = req._Request__r_type.partition('#')[0]
2611
2612 return req
2613
2614 def http_response(self, req, resp):
2615 old_resp = resp
2616 # gzip
2617 if resp.headers.get('Content-encoding', '') == 'gzip':
2618 content = resp.read()
2619 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2620 try:
2621 uncompressed = io.BytesIO(gz.read())
2622 except IOError as original_ioerror:
2623 # There may be junk add the end of the file
2624 # See http://stackoverflow.com/q/4928560/35070 for details
2625 for i in range(1, 1024):
2626 try:
2627 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2628 uncompressed = io.BytesIO(gz.read())
2629 except IOError:
2630 continue
2631 break
2632 else:
2633 raise original_ioerror
2634 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2635 resp.msg = old_resp.msg
2636 del resp.headers['Content-encoding']
2637 # deflate
2638 if resp.headers.get('Content-encoding', '') == 'deflate':
2639 gz = io.BytesIO(self.deflate(resp.read()))
2640 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2641 resp.msg = old_resp.msg
2642 del resp.headers['Content-encoding']
2643 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2644 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2645 if 300 <= resp.code < 400:
2646 location = resp.headers.get('Location')
2647 if location:
2648 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2649 if sys.version_info >= (3, 0):
2650 location = location.encode('iso-8859-1').decode('utf-8')
2651 else:
2652 location = location.decode('utf-8')
2653 location_escaped = escape_url(location)
2654 if location != location_escaped:
2655 del resp.headers['Location']
2656 if sys.version_info < (3, 0):
2657 location_escaped = location_escaped.encode('utf-8')
2658 resp.headers['Location'] = location_escaped
2659 return resp
2660
2661 https_request = http_request
2662 https_response = http_response
2663
2664
2665 def make_socks_conn_class(base_class, socks_proxy):
2666 assert issubclass(base_class, (
2667 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2668
2669 url_components = compat_urlparse.urlparse(socks_proxy)
2670 if url_components.scheme.lower() == 'socks5':
2671 socks_type = ProxyType.SOCKS5
2672 elif url_components.scheme.lower() in ('socks', 'socks4'):
2673 socks_type = ProxyType.SOCKS4
2674 elif url_components.scheme.lower() == 'socks4a':
2675 socks_type = ProxyType.SOCKS4A
2676
2677 def unquote_if_non_empty(s):
2678 if not s:
2679 return s
2680 return compat_urllib_parse_unquote_plus(s)
2681
2682 proxy_args = (
2683 socks_type,
2684 url_components.hostname, url_components.port or 1080,
2685 True, # Remote DNS
2686 unquote_if_non_empty(url_components.username),
2687 unquote_if_non_empty(url_components.password),
2688 )
2689
2690 class SocksConnection(base_class):
2691 def connect(self):
2692 self.sock = sockssocket()
2693 self.sock.setproxy(*proxy_args)
2694 if type(self.timeout) in (int, float):
2695 self.sock.settimeout(self.timeout)
2696 self.sock.connect((self.host, self.port))
2697
2698 if isinstance(self, compat_http_client.HTTPSConnection):
2699 if hasattr(self, '_context'): # Python > 2.6
2700 self.sock = self._context.wrap_socket(
2701 self.sock, server_hostname=self.host)
2702 else:
2703 self.sock = ssl.wrap_socket(self.sock)
2704
2705 return SocksConnection
2706
2707
2708 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2709 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2710 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2711 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2712 self._params = params
2713
2714 def https_open(self, req):
2715 kwargs = {}
2716 conn_class = self._https_conn_class
2717
2718 if hasattr(self, '_context'): # python > 2.6
2719 kwargs['context'] = self._context
2720 if hasattr(self, '_check_hostname'): # python 3.x
2721 kwargs['check_hostname'] = self._check_hostname
2722
2723 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2724 if socks_proxy:
2725 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2726 del req.headers['Ytdl-socks-proxy']
2727
2728 return self.do_open(functools.partial(
2729 _create_http_connection, self, conn_class, True),
2730 req, **kwargs)
2731
2732
2733 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2734 """
2735 See [1] for cookie file format.
2736
2737 1. https://curl.haxx.se/docs/http-cookies.html
2738 """
2739 _HTTPONLY_PREFIX = '#HttpOnly_'
2740 _ENTRY_LEN = 7
2741 _HEADER = '''# Netscape HTTP Cookie File
2742 # This file is generated by youtube-dl. Do not edit.
2743
2744 '''
2745 _CookieFileEntry = collections.namedtuple(
2746 'CookieFileEntry',
2747 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2748
2749 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2750 """
2751 Save cookies to a file.
2752
2753 Most of the code is taken from CPython 3.8 and slightly adapted
2754 to support cookie files with UTF-8 in both python 2 and 3.
2755 """
2756 if filename is None:
2757 if self.filename is not None:
2758 filename = self.filename
2759 else:
2760 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2761
2762 # Store session cookies with `expires` set to 0 instead of an empty
2763 # string
2764 for cookie in self:
2765 if cookie.expires is None:
2766 cookie.expires = 0
2767
2768 with io.open(filename, 'w', encoding='utf-8') as f:
2769 f.write(self._HEADER)
2770 now = time.time()
2771 for cookie in self:
2772 if not ignore_discard and cookie.discard:
2773 continue
2774 if not ignore_expires and cookie.is_expired(now):
2775 continue
2776 if cookie.secure:
2777 secure = 'TRUE'
2778 else:
2779 secure = 'FALSE'
2780 if cookie.domain.startswith('.'):
2781 initial_dot = 'TRUE'
2782 else:
2783 initial_dot = 'FALSE'
2784 if cookie.expires is not None:
2785 expires = compat_str(cookie.expires)
2786 else:
2787 expires = ''
2788 if cookie.value is None:
2789 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2790 # with no name, whereas http.cookiejar regards it as a
2791 # cookie with no value.
2792 name = ''
2793 value = cookie.name
2794 else:
2795 name = cookie.name
2796 value = cookie.value
2797 f.write(
2798 '\t'.join([cookie.domain, initial_dot, cookie.path,
2799 secure, expires, name, value]) + '\n')
2800
2801 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2802 """Load cookies from a file."""
2803 if filename is None:
2804 if self.filename is not None:
2805 filename = self.filename
2806 else:
2807 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2808
2809 def prepare_line(line):
2810 if line.startswith(self._HTTPONLY_PREFIX):
2811 line = line[len(self._HTTPONLY_PREFIX):]
2812 # comments and empty lines are fine
2813 if line.startswith('#') or not line.strip():
2814 return line
2815 cookie_list = line.split('\t')
2816 if len(cookie_list) != self._ENTRY_LEN:
2817 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2818 cookie = self._CookieFileEntry(*cookie_list)
2819 if cookie.expires_at and not cookie.expires_at.isdigit():
2820 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2821 return line
2822
2823 cf = io.StringIO()
2824 with io.open(filename, encoding='utf-8') as f:
2825 for line in f:
2826 try:
2827 cf.write(prepare_line(line))
2828 except compat_cookiejar.LoadError as e:
2829 write_string(
2830 'WARNING: skipping cookie file entry due to %s: %r\n'
2831 % (e, line), sys.stderr)
2832 continue
2833 cf.seek(0)
2834 self._really_load(cf, filename, ignore_discard, ignore_expires)
2835 # Session cookies are denoted by either `expires` field set to
2836 # an empty string or 0. MozillaCookieJar only recognizes the former
2837 # (see [1]). So we need force the latter to be recognized as session
2838 # cookies on our own.
2839 # Session cookies may be important for cookies-based authentication,
2840 # e.g. usually, when user does not check 'Remember me' check box while
2841 # logging in on a site, some important cookies are stored as session
2842 # cookies so that not recognizing them will result in failed login.
2843 # 1. https://bugs.python.org/issue17164
2844 for cookie in self:
2845 # Treat `expires=0` cookies as session cookies
2846 if cookie.expires == 0:
2847 cookie.expires = None
2848 cookie.discard = True
2849
2850
2851 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2852 def __init__(self, cookiejar=None):
2853 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2854
2855 def http_response(self, request, response):
2856 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2857 # characters in Set-Cookie HTTP header of last response (see
2858 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2859 # In order to at least prevent crashing we will percent encode Set-Cookie
2860 # header before HTTPCookieProcessor starts processing it.
2861 # if sys.version_info < (3, 0) and response.headers:
2862 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2863 # set_cookie = response.headers.get(set_cookie_header)
2864 # if set_cookie:
2865 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2866 # if set_cookie != set_cookie_escaped:
2867 # del response.headers[set_cookie_header]
2868 # response.headers[set_cookie_header] = set_cookie_escaped
2869 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2870
2871 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2872 https_response = http_response
2873
2874
2875 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2876 if sys.version_info[0] < 3:
2877 def redirect_request(self, req, fp, code, msg, headers, newurl):
2878 # On python 2 urlh.geturl() may sometimes return redirect URL
2879 # as byte string instead of unicode. This workaround allows
2880 # to force it always return unicode.
2881 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2882
2883
2884 def extract_timezone(date_str):
2885 m = re.search(
2886 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2887 date_str)
2888 if not m:
2889 timezone = datetime.timedelta()
2890 else:
2891 date_str = date_str[:-len(m.group('tz'))]
2892 if not m.group('sign'):
2893 timezone = datetime.timedelta()
2894 else:
2895 sign = 1 if m.group('sign') == '+' else -1
2896 timezone = datetime.timedelta(
2897 hours=sign * int(m.group('hours')),
2898 minutes=sign * int(m.group('minutes')))
2899 return timezone, date_str
2900
2901
2902 def parse_iso8601(date_str, delimiter='T', timezone=None):
2903 """ Return a UNIX timestamp from the given date """
2904
2905 if date_str is None:
2906 return None
2907
2908 date_str = re.sub(r'\.[0-9]+', '', date_str)
2909
2910 if timezone is None:
2911 timezone, date_str = extract_timezone(date_str)
2912
2913 try:
2914 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2915 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2916 return calendar.timegm(dt.timetuple())
2917 except ValueError:
2918 pass
2919
2920
2921 def date_formats(day_first=True):
2922 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2923
2924
2925 def unified_strdate(date_str, day_first=True):
2926 """Return a string with the date in the format YYYYMMDD"""
2927
2928 if date_str is None:
2929 return None
2930 upload_date = None
2931 # Replace commas
2932 date_str = date_str.replace(',', ' ')
2933 # Remove AM/PM + timezone
2934 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2935 _, date_str = extract_timezone(date_str)
2936
2937 for expression in date_formats(day_first):
2938 try:
2939 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2940 except ValueError:
2941 pass
2942 if upload_date is None:
2943 timetuple = email.utils.parsedate_tz(date_str)
2944 if timetuple:
2945 try:
2946 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2947 except ValueError:
2948 pass
2949 if upload_date is not None:
2950 return compat_str(upload_date)
2951
2952
2953 def unified_timestamp(date_str, day_first=True):
2954 if date_str is None:
2955 return None
2956
2957 date_str = re.sub(r'[,|]', '', date_str)
2958
2959 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2960 timezone, date_str = extract_timezone(date_str)
2961
2962 # Remove AM/PM + timezone
2963 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2964
2965 # Remove unrecognized timezones from ISO 8601 alike timestamps
2966 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2967 if m:
2968 date_str = date_str[:-len(m.group('tz'))]
2969
2970 # Python only supports microseconds, so remove nanoseconds
2971 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2972 if m:
2973 date_str = m.group(1)
2974
2975 for expression in date_formats(day_first):
2976 try:
2977 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2978 return calendar.timegm(dt.timetuple())
2979 except ValueError:
2980 pass
2981 timetuple = email.utils.parsedate_tz(date_str)
2982 if timetuple:
2983 return calendar.timegm(timetuple) + pm_delta * 3600
2984
2985
2986 def determine_ext(url, default_ext='unknown_video'):
2987 if url is None or '.' not in url:
2988 return default_ext
2989 guess = url.partition('?')[0].rpartition('.')[2]
2990 if re.match(r'^[A-Za-z0-9]+$', guess):
2991 return guess
2992 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2993 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
2994 return guess.rstrip('/')
2995 else:
2996 return default_ext
2997
2998
2999 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3000 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3001
3002
3003 def date_from_str(date_str):
3004 """
3005 Return a datetime object from a string in the format YYYYMMDD or
3006 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3007 today = datetime.date.today()
3008 if date_str in ('now', 'today'):
3009 return today
3010 if date_str == 'yesterday':
3011 return today - datetime.timedelta(days=1)
3012 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3013 if match is not None:
3014 sign = match.group('sign')
3015 time = int(match.group('time'))
3016 if sign == '-':
3017 time = -time
3018 unit = match.group('unit')
3019 # A bad approximation?
3020 if unit == 'month':
3021 unit = 'day'
3022 time *= 30
3023 elif unit == 'year':
3024 unit = 'day'
3025 time *= 365
3026 unit += 's'
3027 delta = datetime.timedelta(**{unit: time})
3028 return today + delta
3029 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3030
3031
3032 def hyphenate_date(date_str):
3033 """
3034 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3035 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3036 if match is not None:
3037 return '-'.join(match.groups())
3038 else:
3039 return date_str
3040
3041
3042 class DateRange(object):
3043 """Represents a time interval between two dates"""
3044
3045 def __init__(self, start=None, end=None):
3046 """start and end must be strings in the format accepted by date"""
3047 if start is not None:
3048 self.start = date_from_str(start)
3049 else:
3050 self.start = datetime.datetime.min.date()
3051 if end is not None:
3052 self.end = date_from_str(end)
3053 else:
3054 self.end = datetime.datetime.max.date()
3055 if self.start > self.end:
3056 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3057
3058 @classmethod
3059 def day(cls, day):
3060 """Returns a range that only contains the given day"""
3061 return cls(day, day)
3062
3063 def __contains__(self, date):
3064 """Check if the date is in the range"""
3065 if not isinstance(date, datetime.date):
3066 date = date_from_str(date)
3067 return self.start <= date <= self.end
3068
3069 def __str__(self):
3070 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3071
3072
3073 def platform_name():
3074 """ Returns the platform name as a compat_str """
3075 res = platform.platform()
3076 if isinstance(res, bytes):
3077 res = res.decode(preferredencoding())
3078
3079 assert isinstance(res, compat_str)
3080 return res
3081
3082
3083 def _windows_write_string(s, out):
3084 """ Returns True if the string was written using special methods,
3085 False if it has yet to be written out."""
3086 # Adapted from http://stackoverflow.com/a/3259271/35070
3087
3088 import ctypes
3089 import ctypes.wintypes
3090
3091 WIN_OUTPUT_IDS = {
3092 1: -11,
3093 2: -12,
3094 }
3095
3096 try:
3097 fileno = out.fileno()
3098 except AttributeError:
3099 # If the output stream doesn't have a fileno, it's virtual
3100 return False
3101 except io.UnsupportedOperation:
3102 # Some strange Windows pseudo files?
3103 return False
3104 if fileno not in WIN_OUTPUT_IDS:
3105 return False
3106
3107 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3108 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3109 ('GetStdHandle', ctypes.windll.kernel32))
3110 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3111
3112 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3113 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3114 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3115 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3116 written = ctypes.wintypes.DWORD(0)
3117
3118 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3119 FILE_TYPE_CHAR = 0x0002
3120 FILE_TYPE_REMOTE = 0x8000
3121 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3122 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3123 ctypes.POINTER(ctypes.wintypes.DWORD))(
3124 ('GetConsoleMode', ctypes.windll.kernel32))
3125 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3126
3127 def not_a_console(handle):
3128 if handle == INVALID_HANDLE_VALUE or handle is None:
3129 return True
3130 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3131 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3132
3133 if not_a_console(h):
3134 return False
3135
3136 def next_nonbmp_pos(s):
3137 try:
3138 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3139 except StopIteration:
3140 return len(s)
3141
3142 while s:
3143 count = min(next_nonbmp_pos(s), 1024)
3144
3145 ret = WriteConsoleW(
3146 h, s, count if count else 2, ctypes.byref(written), None)
3147 if ret == 0:
3148 raise OSError('Failed to write string')
3149 if not count: # We just wrote a non-BMP character
3150 assert written.value == 2
3151 s = s[1:]
3152 else:
3153 assert written.value > 0
3154 s = s[written.value:]
3155 return True
3156
3157
3158 def write_string(s, out=None, encoding=None):
3159 if out is None:
3160 out = sys.stderr
3161 assert type(s) == compat_str
3162
3163 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3164 if _windows_write_string(s, out):
3165 return
3166
3167 if ('b' in getattr(out, 'mode', '')
3168 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3169 byt = s.encode(encoding or preferredencoding(), 'ignore')
3170 out.write(byt)
3171 elif hasattr(out, 'buffer'):
3172 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3173 byt = s.encode(enc, 'ignore')
3174 out.buffer.write(byt)
3175 else:
3176 out.write(s)
3177 out.flush()
3178
3179
3180 def bytes_to_intlist(bs):
3181 if not bs:
3182 return []
3183 if isinstance(bs[0], int): # Python 3
3184 return list(bs)
3185 else:
3186 return [ord(c) for c in bs]
3187
3188
3189 def intlist_to_bytes(xs):
3190 if not xs:
3191 return b''
3192 return compat_struct_pack('%dB' % len(xs), *xs)
3193
3194
3195 # Cross-platform file locking
3196 if sys.platform == 'win32':
3197 import ctypes.wintypes
3198 import msvcrt
3199
3200 class OVERLAPPED(ctypes.Structure):
3201 _fields_ = [
3202 ('Internal', ctypes.wintypes.LPVOID),
3203 ('InternalHigh', ctypes.wintypes.LPVOID),
3204 ('Offset', ctypes.wintypes.DWORD),
3205 ('OffsetHigh', ctypes.wintypes.DWORD),
3206 ('hEvent', ctypes.wintypes.HANDLE),
3207 ]
3208
3209 kernel32 = ctypes.windll.kernel32
3210 LockFileEx = kernel32.LockFileEx
3211 LockFileEx.argtypes = [
3212 ctypes.wintypes.HANDLE, # hFile
3213 ctypes.wintypes.DWORD, # dwFlags
3214 ctypes.wintypes.DWORD, # dwReserved
3215 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3216 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3217 ctypes.POINTER(OVERLAPPED) # Overlapped
3218 ]
3219 LockFileEx.restype = ctypes.wintypes.BOOL
3220 UnlockFileEx = kernel32.UnlockFileEx
3221 UnlockFileEx.argtypes = [
3222 ctypes.wintypes.HANDLE, # hFile
3223 ctypes.wintypes.DWORD, # dwReserved
3224 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3225 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3226 ctypes.POINTER(OVERLAPPED) # Overlapped
3227 ]
3228 UnlockFileEx.restype = ctypes.wintypes.BOOL
3229 whole_low = 0xffffffff
3230 whole_high = 0x7fffffff
3231
3232 def _lock_file(f, exclusive):
3233 overlapped = OVERLAPPED()
3234 overlapped.Offset = 0
3235 overlapped.OffsetHigh = 0
3236 overlapped.hEvent = 0
3237 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3238 handle = msvcrt.get_osfhandle(f.fileno())
3239 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3240 whole_low, whole_high, f._lock_file_overlapped_p):
3241 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3242
3243 def _unlock_file(f):
3244 assert f._lock_file_overlapped_p
3245 handle = msvcrt.get_osfhandle(f.fileno())
3246 if not UnlockFileEx(handle, 0,
3247 whole_low, whole_high, f._lock_file_overlapped_p):
3248 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3249
3250 else:
3251 # Some platforms, such as Jython, is missing fcntl
3252 try:
3253 import fcntl
3254
3255 def _lock_file(f, exclusive):
3256 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3257
3258 def _unlock_file(f):
3259 fcntl.flock(f, fcntl.LOCK_UN)
3260 except ImportError:
3261 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3262
3263 def _lock_file(f, exclusive):
3264 raise IOError(UNSUPPORTED_MSG)
3265
3266 def _unlock_file(f):
3267 raise IOError(UNSUPPORTED_MSG)
3268
3269
3270 class locked_file(object):
3271 def __init__(self, filename, mode, encoding=None):
3272 assert mode in ['r', 'a', 'w']
3273 self.f = io.open(filename, mode, encoding=encoding)
3274 self.mode = mode
3275
3276 def __enter__(self):
3277 exclusive = self.mode != 'r'
3278 try:
3279 _lock_file(self.f, exclusive)
3280 except IOError:
3281 self.f.close()
3282 raise
3283 return self
3284
3285 def __exit__(self, etype, value, traceback):
3286 try:
3287 _unlock_file(self.f)
3288 finally:
3289 self.f.close()
3290
3291 def __iter__(self):
3292 return iter(self.f)
3293
3294 def write(self, *args):
3295 return self.f.write(*args)
3296
3297 def read(self, *args):
3298 return self.f.read(*args)
3299
3300
3301 def get_filesystem_encoding():
3302 encoding = sys.getfilesystemencoding()
3303 return encoding if encoding is not None else 'utf-8'
3304
3305
3306 def shell_quote(args):
3307 quoted_args = []
3308 encoding = get_filesystem_encoding()
3309 for a in args:
3310 if isinstance(a, bytes):
3311 # We may get a filename encoded with 'encodeFilename'
3312 a = a.decode(encoding)
3313 quoted_args.append(compat_shlex_quote(a))
3314 return ' '.join(quoted_args)
3315
3316
3317 def smuggle_url(url, data):
3318 """ Pass additional data in a URL for internal use. """
3319
3320 url, idata = unsmuggle_url(url, {})
3321 data.update(idata)
3322 sdata = compat_urllib_parse_urlencode(
3323 {'__youtubedl_smuggle': json.dumps(data)})
3324 return url + '#' + sdata
3325
3326
3327 def unsmuggle_url(smug_url, default=None):
3328 if '#__youtubedl_smuggle' not in smug_url:
3329 return smug_url, default
3330 url, _, sdata = smug_url.rpartition('#')
3331 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3332 data = json.loads(jsond)
3333 return url, data
3334
3335
3336 def format_bytes(bytes):
3337 if bytes is None:
3338 return 'N/A'
3339 if type(bytes) is str:
3340 bytes = float(bytes)
3341 if bytes == 0.0:
3342 exponent = 0
3343 else:
3344 exponent = int(math.log(bytes, 1024.0))
3345 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3346 converted = float(bytes) / float(1024 ** exponent)
3347 return '%.2f%s' % (converted, suffix)
3348
3349
3350 def lookup_unit_table(unit_table, s):
3351 units_re = '|'.join(re.escape(u) for u in unit_table)
3352 m = re.match(
3353 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3354 if not m:
3355 return None
3356 num_str = m.group('num').replace(',', '.')
3357 mult = unit_table[m.group('unit')]
3358 return int(float(num_str) * mult)
3359
3360
3361 def parse_filesize(s):
3362 if s is None:
3363 return None
3364
3365 # The lower-case forms are of course incorrect and unofficial,
3366 # but we support those too
3367 _UNIT_TABLE = {
3368 'B': 1,
3369 'b': 1,
3370 'bytes': 1,
3371 'KiB': 1024,
3372 'KB': 1000,
3373 'kB': 1024,
3374 'Kb': 1000,
3375 'kb': 1000,
3376 'kilobytes': 1000,
3377 'kibibytes': 1024,
3378 'MiB': 1024 ** 2,
3379 'MB': 1000 ** 2,
3380 'mB': 1024 ** 2,
3381 'Mb': 1000 ** 2,
3382 'mb': 1000 ** 2,
3383 'megabytes': 1000 ** 2,
3384 'mebibytes': 1024 ** 2,
3385 'GiB': 1024 ** 3,
3386 'GB': 1000 ** 3,
3387 'gB': 1024 ** 3,
3388 'Gb': 1000 ** 3,
3389 'gb': 1000 ** 3,
3390 'gigabytes': 1000 ** 3,
3391 'gibibytes': 1024 ** 3,
3392 'TiB': 1024 ** 4,
3393 'TB': 1000 ** 4,
3394 'tB': 1024 ** 4,
3395 'Tb': 1000 ** 4,
3396 'tb': 1000 ** 4,
3397 'terabytes': 1000 ** 4,
3398 'tebibytes': 1024 ** 4,
3399 'PiB': 1024 ** 5,
3400 'PB': 1000 ** 5,
3401 'pB': 1024 ** 5,
3402 'Pb': 1000 ** 5,
3403 'pb': 1000 ** 5,
3404 'petabytes': 1000 ** 5,
3405 'pebibytes': 1024 ** 5,
3406 'EiB': 1024 ** 6,
3407 'EB': 1000 ** 6,
3408 'eB': 1024 ** 6,
3409 'Eb': 1000 ** 6,
3410 'eb': 1000 ** 6,
3411 'exabytes': 1000 ** 6,
3412 'exbibytes': 1024 ** 6,
3413 'ZiB': 1024 ** 7,
3414 'ZB': 1000 ** 7,
3415 'zB': 1024 ** 7,
3416 'Zb': 1000 ** 7,
3417 'zb': 1000 ** 7,
3418 'zettabytes': 1000 ** 7,
3419 'zebibytes': 1024 ** 7,
3420 'YiB': 1024 ** 8,
3421 'YB': 1000 ** 8,
3422 'yB': 1024 ** 8,
3423 'Yb': 1000 ** 8,
3424 'yb': 1000 ** 8,
3425 'yottabytes': 1000 ** 8,
3426 'yobibytes': 1024 ** 8,
3427 }
3428
3429 return lookup_unit_table(_UNIT_TABLE, s)
3430
3431
3432 def parse_count(s):
3433 if s is None:
3434 return None
3435
3436 s = s.strip()
3437
3438 if re.match(r'^[\d,.]+$', s):
3439 return str_to_int(s)
3440
3441 _UNIT_TABLE = {
3442 'k': 1000,
3443 'K': 1000,
3444 'm': 1000 ** 2,
3445 'M': 1000 ** 2,
3446 'kk': 1000 ** 2,
3447 'KK': 1000 ** 2,
3448 }
3449
3450 return lookup_unit_table(_UNIT_TABLE, s)
3451
3452
3453 def parse_resolution(s):
3454 if s is None:
3455 return {}
3456
3457 mobj = re.search(r'\b(?P<w>\d+)\s*[xXƗ]\s*(?P<h>\d+)\b', s)
3458 if mobj:
3459 return {
3460 'width': int(mobj.group('w')),
3461 'height': int(mobj.group('h')),
3462 }
3463
3464 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3465 if mobj:
3466 return {'height': int(mobj.group(1))}
3467
3468 mobj = re.search(r'\b([48])[kK]\b', s)
3469 if mobj:
3470 return {'height': int(mobj.group(1)) * 540}
3471
3472 return {}
3473
3474
3475 def parse_bitrate(s):
3476 if not isinstance(s, compat_str):
3477 return
3478 mobj = re.search(r'\b(\d+)\s*kbps', s)
3479 if mobj:
3480 return int(mobj.group(1))
3481
3482
3483 def month_by_name(name, lang='en'):
3484 """ Return the number of a month by (locale-independently) English name """
3485
3486 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3487
3488 try:
3489 return month_names.index(name) + 1
3490 except ValueError:
3491 return None
3492
3493
3494 def month_by_abbreviation(abbrev):
3495 """ Return the number of a month by (locale-independently) English
3496 abbreviations """
3497
3498 try:
3499 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3500 except ValueError:
3501 return None
3502
3503
3504 def fix_xml_ampersands(xml_str):
3505 """Replace all the '&' by '&amp;' in XML"""
3506 return re.sub(
3507 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3508 '&amp;',
3509 xml_str)
3510
3511
3512 def setproctitle(title):
3513 assert isinstance(title, compat_str)
3514
3515 # ctypes in Jython is not complete
3516 # http://bugs.jython.org/issue2148
3517 if sys.platform.startswith('java'):
3518 return
3519
3520 try:
3521 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3522 except OSError:
3523 return
3524 except TypeError:
3525 # LoadLibrary in Windows Python 2.7.13 only expects
3526 # a bytestring, but since unicode_literals turns
3527 # every string into a unicode string, it fails.
3528 return
3529 title_bytes = title.encode('utf-8')
3530 buf = ctypes.create_string_buffer(len(title_bytes))
3531 buf.value = title_bytes
3532 try:
3533 libc.prctl(15, buf, 0, 0, 0)
3534 except AttributeError:
3535 return # Strange libc, just skip this
3536
3537
3538 def remove_start(s, start):
3539 return s[len(start):] if s is not None and s.startswith(start) else s
3540
3541
3542 def remove_end(s, end):
3543 return s[:-len(end)] if s is not None and s.endswith(end) else s
3544
3545
3546 def remove_quotes(s):
3547 if s is None or len(s) < 2:
3548 return s
3549 for quote in ('"', "'", ):
3550 if s[0] == quote and s[-1] == quote:
3551 return s[1:-1]
3552 return s
3553
3554
3555 def url_basename(url):
3556 path = compat_urlparse.urlparse(url).path
3557 return path.strip('/').split('/')[-1]
3558
3559
3560 def base_url(url):
3561 return re.match(r'https?://[^?#&]+/', url).group()
3562
3563
3564 def urljoin(base, path):
3565 if isinstance(path, bytes):
3566 path = path.decode('utf-8')
3567 if not isinstance(path, compat_str) or not path:
3568 return None
3569 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3570 return path
3571 if isinstance(base, bytes):
3572 base = base.decode('utf-8')
3573 if not isinstance(base, compat_str) or not re.match(
3574 r'^(?:https?:)?//', base):
3575 return None
3576 return compat_urlparse.urljoin(base, path)
3577
3578
3579 class HEADRequest(compat_urllib_request.Request):
3580 def get_method(self):
3581 return 'HEAD'
3582
3583
3584 class PUTRequest(compat_urllib_request.Request):
3585 def get_method(self):
3586 return 'PUT'
3587
3588
3589 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3590 if get_attr:
3591 if v is not None:
3592 v = getattr(v, get_attr, None)
3593 if v == '':
3594 v = None
3595 if v is None:
3596 return default
3597 try:
3598 return int(v) * invscale // scale
3599 except (ValueError, TypeError):
3600 return default
3601
3602
3603 def str_or_none(v, default=None):
3604 return default if v is None else compat_str(v)
3605
3606
3607 def str_to_int(int_str):
3608 """ A more relaxed version of int_or_none """
3609 if isinstance(int_str, compat_integer_types):
3610 return int_str
3611 elif isinstance(int_str, compat_str):
3612 int_str = re.sub(r'[,\.\+]', '', int_str)
3613 return int_or_none(int_str)
3614
3615
3616 def float_or_none(v, scale=1, invscale=1, default=None):
3617 if v is None:
3618 return default
3619 try:
3620 return float(v) * invscale / scale
3621 except (ValueError, TypeError):
3622 return default
3623
3624
3625 def bool_or_none(v, default=None):
3626 return v if isinstance(v, bool) else default
3627
3628
3629 def strip_or_none(v, default=None):
3630 return v.strip() if isinstance(v, compat_str) else default
3631
3632
3633 def url_or_none(url):
3634 if not url or not isinstance(url, compat_str):
3635 return None
3636 url = url.strip()
3637 return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3638
3639
3640 def parse_duration(s):
3641 if not isinstance(s, compat_basestring):
3642 return None
3643
3644 s = s.strip()
3645
3646 days, hours, mins, secs, ms = [None] * 5
3647 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3648 if m:
3649 days, hours, mins, secs, ms = m.groups()
3650 else:
3651 m = re.match(
3652 r'''(?ix)(?:P?
3653 (?:
3654 [0-9]+\s*y(?:ears?)?\s*
3655 )?
3656 (?:
3657 [0-9]+\s*m(?:onths?)?\s*
3658 )?
3659 (?:
3660 [0-9]+\s*w(?:eeks?)?\s*
3661 )?
3662 (?:
3663 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3664 )?
3665 T)?
3666 (?:
3667 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3668 )?
3669 (?:
3670 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3671 )?
3672 (?:
3673 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3674 )?Z?$''', s)
3675 if m:
3676 days, hours, mins, secs, ms = m.groups()
3677 else:
3678 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3679 if m:
3680 hours, mins = m.groups()
3681 else:
3682 return None
3683
3684 duration = 0
3685 if secs:
3686 duration += float(secs)
3687 if mins:
3688 duration += float(mins) * 60
3689 if hours:
3690 duration += float(hours) * 60 * 60
3691 if days:
3692 duration += float(days) * 24 * 60 * 60
3693 if ms:
3694 duration += float(ms)
3695 return duration
3696
3697
3698 def prepend_extension(filename, ext, expected_real_ext=None):
3699 name, real_ext = os.path.splitext(filename)
3700 return (
3701 '{0}.{1}{2}'.format(name, ext, real_ext)
3702 if not expected_real_ext or real_ext[1:] == expected_real_ext
3703 else '{0}.{1}'.format(filename, ext))
3704
3705
3706 def replace_extension(filename, ext, expected_real_ext=None):
3707 name, real_ext = os.path.splitext(filename)
3708 return '{0}.{1}'.format(
3709 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3710 ext)
3711
3712
3713 def check_executable(exe, args=[]):
3714 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3715 args can be a list of arguments for a short output (like -version) """
3716 try:
3717 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3718 except OSError:
3719 return False
3720 return exe
3721
3722
3723 def get_exe_version(exe, args=['--version'],
3724 version_re=None, unrecognized='present'):
3725 """ Returns the version of the specified executable,
3726 or False if the executable is not present """
3727 try:
3728 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3729 # SIGTTOU if youtube-dl is run in the background.
3730 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3731 out, _ = subprocess.Popen(
3732 [encodeArgument(exe)] + args,
3733 stdin=subprocess.PIPE,
3734 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3735 except OSError:
3736 return False
3737 if isinstance(out, bytes): # Python 2.x
3738 out = out.decode('ascii', 'ignore')
3739 return detect_exe_version(out, version_re, unrecognized)
3740
3741
3742 def detect_exe_version(output, version_re=None, unrecognized='present'):
3743 assert isinstance(output, compat_str)
3744 if version_re is None:
3745 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3746 m = re.search(version_re, output)
3747 if m:
3748 return m.group(1)
3749 else:
3750 return unrecognized
3751
3752
3753 class PagedList(object):
3754 def __len__(self):
3755 # This is only useful for tests
3756 return len(self.getslice())
3757
3758
3759 class OnDemandPagedList(PagedList):
3760 def __init__(self, pagefunc, pagesize, use_cache=True):
3761 self._pagefunc = pagefunc
3762 self._pagesize = pagesize
3763 self._use_cache = use_cache
3764 if use_cache:
3765 self._cache = {}
3766
3767 def getslice(self, start=0, end=None):
3768 res = []
3769 for pagenum in itertools.count(start // self._pagesize):
3770 firstid = pagenum * self._pagesize
3771 nextfirstid = pagenum * self._pagesize + self._pagesize
3772 if start >= nextfirstid:
3773 continue
3774
3775 page_results = None
3776 if self._use_cache:
3777 page_results = self._cache.get(pagenum)
3778 if page_results is None:
3779 page_results = list(self._pagefunc(pagenum))
3780 if self._use_cache:
3781 self._cache[pagenum] = page_results
3782
3783 startv = (
3784 start % self._pagesize
3785 if firstid <= start < nextfirstid
3786 else 0)
3787
3788 endv = (
3789 ((end - 1) % self._pagesize) + 1
3790 if (end is not None and firstid <= end <= nextfirstid)
3791 else None)
3792
3793 if startv != 0 or endv is not None:
3794 page_results = page_results[startv:endv]
3795 res.extend(page_results)
3796
3797 # A little optimization - if current page is not "full", ie. does
3798 # not contain page_size videos then we can assume that this page
3799 # is the last one - there are no more ids on further pages -
3800 # i.e. no need to query again.
3801 if len(page_results) + startv < self._pagesize:
3802 break
3803
3804 # If we got the whole page, but the next page is not interesting,
3805 # break out early as well
3806 if end == nextfirstid:
3807 break
3808 return res
3809
3810
3811 class InAdvancePagedList(PagedList):
3812 def __init__(self, pagefunc, pagecount, pagesize):
3813 self._pagefunc = pagefunc
3814 self._pagecount = pagecount
3815 self._pagesize = pagesize
3816
3817 def getslice(self, start=0, end=None):
3818 res = []
3819 start_page = start // self._pagesize
3820 end_page = (
3821 self._pagecount if end is None else (end // self._pagesize + 1))
3822 skip_elems = start - start_page * self._pagesize
3823 only_more = None if end is None else end - start
3824 for pagenum in range(start_page, end_page):
3825 page = list(self._pagefunc(pagenum))
3826 if skip_elems:
3827 page = page[skip_elems:]
3828 skip_elems = None
3829 if only_more is not None:
3830 if len(page) < only_more:
3831 only_more -= len(page)
3832 else:
3833 page = page[:only_more]
3834 res.extend(page)
3835 break
3836 res.extend(page)
3837 return res
3838
3839
3840 def uppercase_escape(s):
3841 unicode_escape = codecs.getdecoder('unicode_escape')
3842 return re.sub(
3843 r'\\U[0-9a-fA-F]{8}',
3844 lambda m: unicode_escape(m.group(0))[0],
3845 s)
3846
3847
3848 def lowercase_escape(s):
3849 unicode_escape = codecs.getdecoder('unicode_escape')
3850 return re.sub(
3851 r'\\u[0-9a-fA-F]{4}',
3852 lambda m: unicode_escape(m.group(0))[0],
3853 s)
3854
3855
3856 def escape_rfc3986(s):
3857 """Escape non-ASCII characters as suggested by RFC 3986"""
3858 if sys.version_info < (3, 0) and isinstance(s, compat_str):
3859 s = s.encode('utf-8')
3860 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3861
3862
3863 def escape_url(url):
3864 """Escape URL as suggested by RFC 3986"""
3865 url_parsed = compat_urllib_parse_urlparse(url)
3866 return url_parsed._replace(
3867 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3868 path=escape_rfc3986(url_parsed.path),
3869 params=escape_rfc3986(url_parsed.params),
3870 query=escape_rfc3986(url_parsed.query),
3871 fragment=escape_rfc3986(url_parsed.fragment)
3872 ).geturl()
3873
3874
3875 def read_batch_urls(batch_fd):
3876 def fixup(url):
3877 if not isinstance(url, compat_str):
3878 url = url.decode('utf-8', 'replace')
3879 BOM_UTF8 = '\xef\xbb\xbf'
3880 if url.startswith(BOM_UTF8):
3881 url = url[len(BOM_UTF8):]
3882 url = url.strip()
3883 if url.startswith(('#', ';', ']')):
3884 return False
3885 return url
3886
3887 with contextlib.closing(batch_fd) as fd:
3888 return [url for url in map(fixup, fd) if url]
3889
3890
3891 def urlencode_postdata(*args, **kargs):
3892 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3893
3894
3895 def update_url_query(url, query):
3896 if not query:
3897 return url
3898 parsed_url = compat_urlparse.urlparse(url)
3899 qs = compat_parse_qs(parsed_url.query)
3900 qs.update(query)
3901 return compat_urlparse.urlunparse(parsed_url._replace(
3902 query=compat_urllib_parse_urlencode(qs, True)))
3903
3904
3905 def update_Request(req, url=None, data=None, headers={}, query={}):
3906 req_headers = req.headers.copy()
3907 req_headers.update(headers)
3908 req_data = data or req.data
3909 req_url = update_url_query(url or req.get_full_url(), query)
3910 req_get_method = req.get_method()
3911 if req_get_method == 'HEAD':
3912 req_type = HEADRequest
3913 elif req_get_method == 'PUT':
3914 req_type = PUTRequest
3915 else:
3916 req_type = compat_urllib_request.Request
3917 new_req = req_type(
3918 req_url, data=req_data, headers=req_headers,
3919 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3920 if hasattr(req, 'timeout'):
3921 new_req.timeout = req.timeout
3922 return new_req
3923
3924
3925 def _multipart_encode_impl(data, boundary):
3926 content_type = 'multipart/form-data; boundary=%s' % boundary
3927
3928 out = b''
3929 for k, v in data.items():
3930 out += b'--' + boundary.encode('ascii') + b'\r\n'
3931 if isinstance(k, compat_str):
3932 k = k.encode('utf-8')
3933 if isinstance(v, compat_str):
3934 v = v.encode('utf-8')
3935 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3936 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3937 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3938 if boundary.encode('ascii') in content:
3939 raise ValueError('Boundary overlaps with data')
3940 out += content
3941
3942 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3943
3944 return out, content_type
3945
3946
3947 def multipart_encode(data, boundary=None):
3948 '''
3949 Encode a dict to RFC 7578-compliant form-data
3950
3951 data:
3952 A dict where keys and values can be either Unicode or bytes-like
3953 objects.
3954 boundary:
3955 If specified a Unicode object, it's used as the boundary. Otherwise
3956 a random boundary is generated.
3957
3958 Reference: https://tools.ietf.org/html/rfc7578
3959 '''
3960 has_specified_boundary = boundary is not None
3961
3962 while True:
3963 if boundary is None:
3964 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3965
3966 try:
3967 out, content_type = _multipart_encode_impl(data, boundary)
3968 break
3969 except ValueError:
3970 if has_specified_boundary:
3971 raise
3972 boundary = None
3973
3974 return out, content_type
3975
3976
3977 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3978 if isinstance(key_or_keys, (list, tuple)):
3979 for key in key_or_keys:
3980 if key not in d or d[key] is None or skip_false_values and not d[key]:
3981 continue
3982 return d[key]
3983 return default
3984 return d.get(key_or_keys, default)
3985
3986
3987 def try_get(src, getter, expected_type=None):
3988 if not isinstance(getter, (list, tuple)):
3989 getter = [getter]
3990 for get in getter:
3991 try:
3992 v = get(src)
3993 except (AttributeError, KeyError, TypeError, IndexError):
3994 pass
3995 else:
3996 if expected_type is None or isinstance(v, expected_type):
3997 return v
3998
3999
4000 def merge_dicts(*dicts):
4001 merged = {}
4002 for a_dict in dicts:
4003 for k, v in a_dict.items():
4004 if v is None:
4005 continue
4006 if (k not in merged
4007 or (isinstance(v, compat_str) and v
4008 and isinstance(merged[k], compat_str)
4009 and not merged[k])):
4010 merged[k] = v
4011 return merged
4012
4013
4014 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4015 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4016
4017
4018 US_RATINGS = {
4019 'G': 0,
4020 'PG': 10,
4021 'PG-13': 13,
4022 'R': 16,
4023 'NC': 18,
4024 }
4025
4026
4027 TV_PARENTAL_GUIDELINES = {
4028 'TV-Y': 0,
4029 'TV-Y7': 7,
4030 'TV-G': 0,
4031 'TV-PG': 0,
4032 'TV-14': 14,
4033 'TV-MA': 17,
4034 }
4035
4036
4037 def parse_age_limit(s):
4038 if type(s) == int:
4039 return s if 0 <= s <= 21 else None
4040 if not isinstance(s, compat_basestring):
4041 return None
4042 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4043 if m:
4044 return int(m.group('age'))
4045 if s in US_RATINGS:
4046 return US_RATINGS[s]
4047 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4048 if m:
4049 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4050 return None
4051
4052
4053 def strip_jsonp(code):
4054 return re.sub(
4055 r'''(?sx)^
4056 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4057 (?:\s*&&\s*(?P=func_name))?
4058 \s*\(\s*(?P<callback_data>.*)\);?
4059 \s*?(?://[^\n]*)*$''',
4060 r'\g<callback_data>', code)
4061
4062
4063 def js_to_json(code):
4064 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4065 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4066 INTEGER_TABLE = (
4067 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4068 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4069 )
4070
4071 def fix_kv(m):
4072 v = m.group(0)
4073 if v in ('true', 'false', 'null'):
4074 return v
4075 elif v.startswith('/*') or v.startswith('//') or v == ',':
4076 return ""
4077
4078 if v[0] in ("'", '"'):
4079 v = re.sub(r'(?s)\\.|"', lambda m: {
4080 '"': '\\"',
4081 "\\'": "'",
4082 '\\\n': '',
4083 '\\x': '\\u00',
4084 }.get(m.group(0), m.group(0)), v[1:-1])
4085
4086 for regex, base in INTEGER_TABLE:
4087 im = re.match(regex, v)
4088 if im:
4089 i = int(im.group(1), base)
4090 return '"%d":' % i if v.endswith(':') else '%d' % i
4091
4092 return '"%s"' % v
4093
4094 return re.sub(r'''(?sx)
4095 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4096 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4097 {comment}|,(?={skip}[\]}}])|
4098 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4099 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4100 [0-9]+(?={skip}:)
4101 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4102
4103
4104 def qualities(quality_ids):
4105 """ Get a numeric quality value out of a list of possible values """
4106 def q(qid):
4107 try:
4108 return quality_ids.index(qid)
4109 except ValueError:
4110 return -1
4111 return q
4112
4113
4114 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4115
4116
4117 def limit_length(s, length):
4118 """ Add ellipses to overly long strings """
4119 if s is None:
4120 return None
4121 ELLIPSES = '...'
4122 if len(s) > length:
4123 return s[:length - len(ELLIPSES)] + ELLIPSES
4124 return s
4125
4126
4127 def version_tuple(v):
4128 return tuple(int(e) for e in re.split(r'[-.]', v))
4129
4130
4131 def is_outdated_version(version, limit, assume_new=True):
4132 if not version:
4133 return not assume_new
4134 try:
4135 return version_tuple(version) < version_tuple(limit)
4136 except ValueError:
4137 return not assume_new
4138
4139
4140 def ytdl_is_updateable():
4141 """ Returns if youtube-dl can be updated with -U """
4142 from zipimport import zipimporter
4143
4144 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4145
4146
4147 def args_to_str(args):
4148 # Get a short string representation for a subprocess command
4149 return ' '.join(compat_shlex_quote(a) for a in args)
4150
4151
4152 def error_to_compat_str(err):
4153 err_str = str(err)
4154 # On python 2 error byte string must be decoded with proper
4155 # encoding rather than ascii
4156 if sys.version_info[0] < 3:
4157 err_str = err_str.decode(preferredencoding())
4158 return err_str
4159
4160
4161 def mimetype2ext(mt):
4162 if mt is None:
4163 return None
4164
4165 ext = {
4166 'audio/mp4': 'm4a',
4167 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4168 # it's the most popular one
4169 'audio/mpeg': 'mp3',
4170 }.get(mt)
4171 if ext is not None:
4172 return ext
4173
4174 _, _, res = mt.rpartition('/')
4175 res = res.split(';')[0].strip().lower()
4176
4177 return {
4178 '3gpp': '3gp',
4179 'smptett+xml': 'tt',
4180 'ttaf+xml': 'dfxp',
4181 'ttml+xml': 'ttml',
4182 'x-flv': 'flv',
4183 'x-mp4-fragmented': 'mp4',
4184 'x-ms-sami': 'sami',
4185 'x-ms-wmv': 'wmv',
4186 'mpegurl': 'm3u8',
4187 'x-mpegurl': 'm3u8',
4188 'vnd.apple.mpegurl': 'm3u8',
4189 'dash+xml': 'mpd',
4190 'f4m+xml': 'f4m',
4191 'hds+xml': 'f4m',
4192 'vnd.ms-sstr+xml': 'ism',
4193 'quicktime': 'mov',
4194 'mp2t': 'ts',
4195 }.get(res, res)
4196
4197
4198 def parse_codecs(codecs_str):
4199 # http://tools.ietf.org/html/rfc6381
4200 if not codecs_str:
4201 return {}
4202 splited_codecs = list(filter(None, map(
4203 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4204 vcodec, acodec = None, None
4205 for full_codec in splited_codecs:
4206 codec = full_codec.split('.')[0]
4207 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4208 if not vcodec:
4209 vcodec = full_codec
4210 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4211 if not acodec:
4212 acodec = full_codec
4213 else:
4214 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4215 if not vcodec and not acodec:
4216 if len(splited_codecs) == 2:
4217 return {
4218 'vcodec': splited_codecs[0],
4219 'acodec': splited_codecs[1],
4220 }
4221 else:
4222 return {
4223 'vcodec': vcodec or 'none',
4224 'acodec': acodec or 'none',
4225 }
4226 return {}
4227
4228
4229 def urlhandle_detect_ext(url_handle):
4230 getheader = url_handle.headers.get
4231
4232 cd = getheader('Content-Disposition')
4233 if cd:
4234 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4235 if m:
4236 e = determine_ext(m.group('filename'), default_ext=None)
4237 if e:
4238 return e
4239
4240 return mimetype2ext(getheader('Content-Type'))
4241
4242
4243 def encode_data_uri(data, mime_type):
4244 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4245
4246
4247 def age_restricted(content_limit, age_limit):
4248 """ Returns True iff the content should be blocked """
4249
4250 if age_limit is None: # No limit set
4251 return False
4252 if content_limit is None:
4253 return False # Content available for everyone
4254 return age_limit < content_limit
4255
4256
4257 def is_html(first_bytes):
4258 """ Detect whether a file contains HTML by examining its first bytes. """
4259
4260 BOMS = [
4261 (b'\xef\xbb\xbf', 'utf-8'),
4262 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4263 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4264 (b'\xff\xfe', 'utf-16-le'),
4265 (b'\xfe\xff', 'utf-16-be'),
4266 ]
4267 for bom, enc in BOMS:
4268 if first_bytes.startswith(bom):
4269 s = first_bytes[len(bom):].decode(enc, 'replace')
4270 break
4271 else:
4272 s = first_bytes.decode('utf-8', 'replace')
4273
4274 return re.match(r'^\s*<', s)
4275
4276
4277 def determine_protocol(info_dict):
4278 protocol = info_dict.get('protocol')
4279 if protocol is not None:
4280 return protocol
4281
4282 url = info_dict['url']
4283 if url.startswith('rtmp'):
4284 return 'rtmp'
4285 elif url.startswith('mms'):
4286 return 'mms'
4287 elif url.startswith('rtsp'):
4288 return 'rtsp'
4289
4290 ext = determine_ext(url)
4291 if ext == 'm3u8':
4292 return 'm3u8'
4293 elif ext == 'f4m':
4294 return 'f4m'
4295
4296 return compat_urllib_parse_urlparse(url).scheme
4297
4298
4299 def render_table(header_row, data):
4300 """ Render a list of rows, each as a list of values """
4301 table = [header_row] + data
4302 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4303 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4304 return '\n'.join(format_str % tuple(row) for row in table)
4305
4306
4307 def _match_one(filter_part, dct):
4308 COMPARISON_OPERATORS = {
4309 '<': operator.lt,
4310 '<=': operator.le,
4311 '>': operator.gt,
4312 '>=': operator.ge,
4313 '=': operator.eq,
4314 '!=': operator.ne,
4315 }
4316 operator_rex = re.compile(r'''(?x)\s*
4317 (?P<key>[a-z_]+)
4318 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4319 (?:
4320 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4321 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4322 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4323 )
4324 \s*$
4325 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4326 m = operator_rex.search(filter_part)
4327 if m:
4328 op = COMPARISON_OPERATORS[m.group('op')]
4329 actual_value = dct.get(m.group('key'))
4330 if (m.group('quotedstrval') is not None
4331 or m.group('strval') is not None
4332 # If the original field is a string and matching comparisonvalue is
4333 # a number we should respect the origin of the original field
4334 # and process comparison value as a string (see
4335 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4336 or actual_value is not None and m.group('intval') is not None
4337 and isinstance(actual_value, compat_str)):
4338 if m.group('op') not in ('=', '!='):
4339 raise ValueError(
4340 'Operator %s does not support string values!' % m.group('op'))
4341 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4342 quote = m.group('quote')
4343 if quote is not None:
4344 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4345 else:
4346 try:
4347 comparison_value = int(m.group('intval'))
4348 except ValueError:
4349 comparison_value = parse_filesize(m.group('intval'))
4350 if comparison_value is None:
4351 comparison_value = parse_filesize(m.group('intval') + 'B')
4352 if comparison_value is None:
4353 raise ValueError(
4354 'Invalid integer value %r in filter part %r' % (
4355 m.group('intval'), filter_part))
4356 if actual_value is None:
4357 return m.group('none_inclusive')
4358 return op(actual_value, comparison_value)
4359
4360 UNARY_OPERATORS = {
4361 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4362 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4363 }
4364 operator_rex = re.compile(r'''(?x)\s*
4365 (?P<op>%s)\s*(?P<key>[a-z_]+)
4366 \s*$
4367 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4368 m = operator_rex.search(filter_part)
4369 if m:
4370 op = UNARY_OPERATORS[m.group('op')]
4371 actual_value = dct.get(m.group('key'))
4372 return op(actual_value)
4373
4374 raise ValueError('Invalid filter part %r' % filter_part)
4375
4376
4377 def match_str(filter_str, dct):
4378 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4379
4380 return all(
4381 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4382
4383
4384 def match_filter_func(filter_str):
4385 def _match_func(info_dict):
4386 if match_str(filter_str, info_dict):
4387 return None
4388 else:
4389 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4390 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4391 return _match_func
4392
4393
4394 def parse_dfxp_time_expr(time_expr):
4395 if not time_expr:
4396 return
4397
4398 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4399 if mobj:
4400 return float(mobj.group('time_offset'))
4401
4402 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4403 if mobj:
4404 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4405
4406
4407 def srt_subtitles_timecode(seconds):
4408 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4409
4410
4411 def dfxp2srt(dfxp_data):
4412 '''
4413 @param dfxp_data A bytes-like object containing DFXP data
4414 @returns A unicode object containing converted SRT data
4415 '''
4416 LEGACY_NAMESPACES = (
4417 (b'http://www.w3.org/ns/ttml', [
4418 b'http://www.w3.org/2004/11/ttaf1',
4419 b'http://www.w3.org/2006/04/ttaf1',
4420 b'http://www.w3.org/2006/10/ttaf1',
4421 ]),
4422 (b'http://www.w3.org/ns/ttml#styling', [
4423 b'http://www.w3.org/ns/ttml#style',
4424 ]),
4425 )
4426
4427 SUPPORTED_STYLING = [
4428 'color',
4429 'fontFamily',
4430 'fontSize',
4431 'fontStyle',
4432 'fontWeight',
4433 'textDecoration'
4434 ]
4435
4436 _x = functools.partial(xpath_with_ns, ns_map={
4437 'xml': 'http://www.w3.org/XML/1998/namespace',
4438 'ttml': 'http://www.w3.org/ns/ttml',
4439 'tts': 'http://www.w3.org/ns/ttml#styling',
4440 })
4441
4442 styles = {}
4443 default_style = {}
4444
4445 class TTMLPElementParser(object):
4446 _out = ''
4447 _unclosed_elements = []
4448 _applied_styles = []
4449
4450 def start(self, tag, attrib):
4451 if tag in (_x('ttml:br'), 'br'):
4452 self._out += '\n'
4453 else:
4454 unclosed_elements = []
4455 style = {}
4456 element_style_id = attrib.get('style')
4457 if default_style:
4458 style.update(default_style)
4459 if element_style_id:
4460 style.update(styles.get(element_style_id, {}))
4461 for prop in SUPPORTED_STYLING:
4462 prop_val = attrib.get(_x('tts:' + prop))
4463 if prop_val:
4464 style[prop] = prop_val
4465 if style:
4466 font = ''
4467 for k, v in sorted(style.items()):
4468 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4469 continue
4470 if k == 'color':
4471 font += ' color="%s"' % v
4472 elif k == 'fontSize':
4473 font += ' size="%s"' % v
4474 elif k == 'fontFamily':
4475 font += ' face="%s"' % v
4476 elif k == 'fontWeight' and v == 'bold':
4477 self._out += '<b>'
4478 unclosed_elements.append('b')
4479 elif k == 'fontStyle' and v == 'italic':
4480 self._out += '<i>'
4481 unclosed_elements.append('i')
4482 elif k == 'textDecoration' and v == 'underline':
4483 self._out += '<u>'
4484 unclosed_elements.append('u')
4485 if font:
4486 self._out += '<font' + font + '>'
4487 unclosed_elements.append('font')
4488 applied_style = {}
4489 if self._applied_styles:
4490 applied_style.update(self._applied_styles[-1])
4491 applied_style.update(style)
4492 self._applied_styles.append(applied_style)
4493 self._unclosed_elements.append(unclosed_elements)
4494
4495 def end(self, tag):
4496 if tag not in (_x('ttml:br'), 'br'):
4497 unclosed_elements = self._unclosed_elements.pop()
4498 for element in reversed(unclosed_elements):
4499 self._out += '</%s>' % element
4500 if unclosed_elements and self._applied_styles:
4501 self._applied_styles.pop()
4502
4503 def data(self, data):
4504 self._out += data
4505
4506 def close(self):
4507 return self._out.strip()
4508
4509 def parse_node(node):
4510 target = TTMLPElementParser()
4511 parser = xml.etree.ElementTree.XMLParser(target=target)
4512 parser.feed(xml.etree.ElementTree.tostring(node))
4513 return parser.close()
4514
4515 for k, v in LEGACY_NAMESPACES:
4516 for ns in v:
4517 dfxp_data = dfxp_data.replace(ns, k)
4518
4519 dfxp = compat_etree_fromstring(dfxp_data)
4520 out = []
4521 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4522
4523 if not paras:
4524 raise ValueError('Invalid dfxp/TTML subtitle')
4525
4526 repeat = False
4527 while True:
4528 for style in dfxp.findall(_x('.//ttml:style')):
4529 style_id = style.get('id') or style.get(_x('xml:id'))
4530 if not style_id:
4531 continue
4532 parent_style_id = style.get('style')
4533 if parent_style_id:
4534 if parent_style_id not in styles:
4535 repeat = True
4536 continue
4537 styles[style_id] = styles[parent_style_id].copy()
4538 for prop in SUPPORTED_STYLING:
4539 prop_val = style.get(_x('tts:' + prop))
4540 if prop_val:
4541 styles.setdefault(style_id, {})[prop] = prop_val
4542 if repeat:
4543 repeat = False
4544 else:
4545 break
4546
4547 for p in ('body', 'div'):
4548 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4549 if ele is None:
4550 continue
4551 style = styles.get(ele.get('style'))
4552 if not style:
4553 continue
4554 default_style.update(style)
4555
4556 for para, index in zip(paras, itertools.count(1)):
4557 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4558 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4559 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4560 if begin_time is None:
4561 continue
4562 if not end_time:
4563 if not dur:
4564 continue
4565 end_time = begin_time + dur
4566 out.append('%d\n%s --> %s\n%s\n\n' % (
4567 index,
4568 srt_subtitles_timecode(begin_time),
4569 srt_subtitles_timecode(end_time),
4570 parse_node(para)))
4571
4572 return ''.join(out)
4573
4574
4575 def cli_option(params, command_option, param):
4576 param = params.get(param)
4577 if param:
4578 param = compat_str(param)
4579 return [command_option, param] if param is not None else []
4580
4581
4582 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4583 param = params.get(param)
4584 if param is None:
4585 return []
4586 assert isinstance(param, bool)
4587 if separator:
4588 return [command_option + separator + (true_value if param else false_value)]
4589 return [command_option, true_value if param else false_value]
4590
4591
4592 def cli_valueless_option(params, command_option, param, expected_value=True):
4593 param = params.get(param)
4594 return [command_option] if param == expected_value else []
4595
4596
4597 def cli_configuration_args(params, param, default=[]):
4598 ex_args = params.get(param)
4599 if ex_args is None:
4600 return default
4601 assert isinstance(ex_args, list)
4602 return ex_args
4603
4604
4605 class ISO639Utils(object):
4606 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4607 _lang_map = {
4608 'aa': 'aar',
4609 'ab': 'abk',
4610 'ae': 'ave',
4611 'af': 'afr',
4612 'ak': 'aka',
4613 'am': 'amh',
4614 'an': 'arg',
4615 'ar': 'ara',
4616 'as': 'asm',
4617 'av': 'ava',
4618 'ay': 'aym',
4619 'az': 'aze',
4620 'ba': 'bak',
4621 'be': 'bel',
4622 'bg': 'bul',
4623 'bh': 'bih',
4624 'bi': 'bis',
4625 'bm': 'bam',
4626 'bn': 'ben',
4627 'bo': 'bod',
4628 'br': 'bre',
4629 'bs': 'bos',
4630 'ca': 'cat',
4631 'ce': 'che',
4632 'ch': 'cha',
4633 'co': 'cos',
4634 'cr': 'cre',
4635 'cs': 'ces',
4636 'cu': 'chu',
4637 'cv': 'chv',
4638 'cy': 'cym',
4639 'da': 'dan',
4640 'de': 'deu',
4641 'dv': 'div',
4642 'dz': 'dzo',
4643 'ee': 'ewe',
4644 'el': 'ell',
4645 'en': 'eng',
4646 'eo': 'epo',
4647 'es': 'spa',
4648 'et': 'est',
4649 'eu': 'eus',
4650 'fa': 'fas',
4651 'ff': 'ful',
4652 'fi': 'fin',
4653 'fj': 'fij',
4654 'fo': 'fao',
4655 'fr': 'fra',
4656 'fy': 'fry',
4657 'ga': 'gle',
4658 'gd': 'gla',
4659 'gl': 'glg',
4660 'gn': 'grn',
4661 'gu': 'guj',
4662 'gv': 'glv',
4663 'ha': 'hau',
4664 'he': 'heb',
4665 'iw': 'heb', # Replaced by he in 1989 revision
4666 'hi': 'hin',
4667 'ho': 'hmo',
4668 'hr': 'hrv',
4669 'ht': 'hat',
4670 'hu': 'hun',
4671 'hy': 'hye',
4672 'hz': 'her',
4673 'ia': 'ina',
4674 'id': 'ind',
4675 'in': 'ind', # Replaced by id in 1989 revision
4676 'ie': 'ile',
4677 'ig': 'ibo',
4678 'ii': 'iii',
4679 'ik': 'ipk',
4680 'io': 'ido',
4681 'is': 'isl',
4682 'it': 'ita',
4683 'iu': 'iku',
4684 'ja': 'jpn',
4685 'jv': 'jav',
4686 'ka': 'kat',
4687 'kg': 'kon',
4688 'ki': 'kik',
4689 'kj': 'kua',
4690 'kk': 'kaz',
4691 'kl': 'kal',
4692 'km': 'khm',
4693 'kn': 'kan',
4694 'ko': 'kor',
4695 'kr': 'kau',
4696 'ks': 'kas',
4697 'ku': 'kur',
4698 'kv': 'kom',
4699 'kw': 'cor',
4700 'ky': 'kir',
4701 'la': 'lat',
4702 'lb': 'ltz',
4703 'lg': 'lug',
4704 'li': 'lim',
4705 'ln': 'lin',
4706 'lo': 'lao',
4707 'lt': 'lit',
4708 'lu': 'lub',
4709 'lv': 'lav',
4710 'mg': 'mlg',
4711 'mh': 'mah',
4712 'mi': 'mri',
4713 'mk': 'mkd',
4714 'ml': 'mal',
4715 'mn': 'mon',
4716 'mr': 'mar',
4717 'ms': 'msa',
4718 'mt': 'mlt',
4719 'my': 'mya',
4720 'na': 'nau',
4721 'nb': 'nob',
4722 'nd': 'nde',
4723 'ne': 'nep',
4724 'ng': 'ndo',
4725 'nl': 'nld',
4726 'nn': 'nno',
4727 'no': 'nor',
4728 'nr': 'nbl',
4729 'nv': 'nav',
4730 'ny': 'nya',
4731 'oc': 'oci',
4732 'oj': 'oji',
4733 'om': 'orm',
4734 'or': 'ori',
4735 'os': 'oss',
4736 'pa': 'pan',
4737 'pi': 'pli',
4738 'pl': 'pol',
4739 'ps': 'pus',
4740 'pt': 'por',
4741 'qu': 'que',
4742 'rm': 'roh',
4743 'rn': 'run',
4744 'ro': 'ron',
4745 'ru': 'rus',
4746 'rw': 'kin',
4747 'sa': 'san',
4748 'sc': 'srd',
4749 'sd': 'snd',
4750 'se': 'sme',
4751 'sg': 'sag',
4752 'si': 'sin',
4753 'sk': 'slk',
4754 'sl': 'slv',
4755 'sm': 'smo',
4756 'sn': 'sna',
4757 'so': 'som',
4758 'sq': 'sqi',
4759 'sr': 'srp',
4760 'ss': 'ssw',
4761 'st': 'sot',
4762 'su': 'sun',
4763 'sv': 'swe',
4764 'sw': 'swa',
4765 'ta': 'tam',
4766 'te': 'tel',
4767 'tg': 'tgk',
4768 'th': 'tha',
4769 'ti': 'tir',
4770 'tk': 'tuk',
4771 'tl': 'tgl',
4772 'tn': 'tsn',
4773 'to': 'ton',
4774 'tr': 'tur',
4775 'ts': 'tso',
4776 'tt': 'tat',
4777 'tw': 'twi',
4778 'ty': 'tah',
4779 'ug': 'uig',
4780 'uk': 'ukr',
4781 'ur': 'urd',
4782 'uz': 'uzb',
4783 've': 'ven',
4784 'vi': 'vie',
4785 'vo': 'vol',
4786 'wa': 'wln',
4787 'wo': 'wol',
4788 'xh': 'xho',
4789 'yi': 'yid',
4790 'ji': 'yid', # Replaced by yi in 1989 revision
4791 'yo': 'yor',
4792 'za': 'zha',
4793 'zh': 'zho',
4794 'zu': 'zul',
4795 }
4796
4797 @classmethod
4798 def short2long(cls, code):
4799 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4800 return cls._lang_map.get(code[:2])
4801
4802 @classmethod
4803 def long2short(cls, code):
4804 """Convert language code from ISO 639-2/T to ISO 639-1"""
4805 for short_name, long_name in cls._lang_map.items():
4806 if long_name == code:
4807 return short_name
4808
4809
4810 class ISO3166Utils(object):
4811 # From http://data.okfn.org/data/core/country-list
4812 _country_map = {
4813 'AF': 'Afghanistan',
4814 'AX': 'ƅland Islands',
4815 'AL': 'Albania',
4816 'DZ': 'Algeria',
4817 'AS': 'American Samoa',
4818 'AD': 'Andorra',
4819 'AO': 'Angola',
4820 'AI': 'Anguilla',
4821 'AQ': 'Antarctica',
4822 'AG': 'Antigua and Barbuda',
4823 'AR': 'Argentina',
4824 'AM': 'Armenia',
4825 'AW': 'Aruba',
4826 'AU': 'Australia',
4827 'AT': 'Austria',
4828 'AZ': 'Azerbaijan',
4829 'BS': 'Bahamas',
4830 'BH': 'Bahrain',
4831 'BD': 'Bangladesh',
4832 'BB': 'Barbados',
4833 'BY': 'Belarus',
4834 'BE': 'Belgium',
4835 'BZ': 'Belize',
4836 'BJ': 'Benin',
4837 'BM': 'Bermuda',
4838 'BT': 'Bhutan',
4839 'BO': 'Bolivia, Plurinational State of',
4840 'BQ': 'Bonaire, Sint Eustatius and Saba',
4841 'BA': 'Bosnia and Herzegovina',
4842 'BW': 'Botswana',
4843 'BV': 'Bouvet Island',
4844 'BR': 'Brazil',
4845 'IO': 'British Indian Ocean Territory',
4846 'BN': 'Brunei Darussalam',
4847 'BG': 'Bulgaria',
4848 'BF': 'Burkina Faso',
4849 'BI': 'Burundi',
4850 'KH': 'Cambodia',
4851 'CM': 'Cameroon',
4852 'CA': 'Canada',
4853 'CV': 'Cape Verde',
4854 'KY': 'Cayman Islands',
4855 'CF': 'Central African Republic',
4856 'TD': 'Chad',
4857 'CL': 'Chile',
4858 'CN': 'China',
4859 'CX': 'Christmas Island',
4860 'CC': 'Cocos (Keeling) Islands',
4861 'CO': 'Colombia',
4862 'KM': 'Comoros',
4863 'CG': 'Congo',
4864 'CD': 'Congo, the Democratic Republic of the',
4865 'CK': 'Cook Islands',
4866 'CR': 'Costa Rica',
4867 'CI': 'CĆ“te d\'Ivoire',
4868 'HR': 'Croatia',
4869 'CU': 'Cuba',
4870 'CW': 'CuraƧao',
4871 'CY': 'Cyprus',
4872 'CZ': 'Czech Republic',
4873 'DK': 'Denmark',
4874 'DJ': 'Djibouti',
4875 'DM': 'Dominica',
4876 'DO': 'Dominican Republic',
4877 'EC': 'Ecuador',
4878 'EG': 'Egypt',
4879 'SV': 'El Salvador',
4880 'GQ': 'Equatorial Guinea',
4881 'ER': 'Eritrea',
4882 'EE': 'Estonia',
4883 'ET': 'Ethiopia',
4884 'FK': 'Falkland Islands (Malvinas)',
4885 'FO': 'Faroe Islands',
4886 'FJ': 'Fiji',
4887 'FI': 'Finland',
4888 'FR': 'France',
4889 'GF': 'French Guiana',
4890 'PF': 'French Polynesia',
4891 'TF': 'French Southern Territories',
4892 'GA': 'Gabon',
4893 'GM': 'Gambia',
4894 'GE': 'Georgia',
4895 'DE': 'Germany',
4896 'GH': 'Ghana',
4897 'GI': 'Gibraltar',
4898 'GR': 'Greece',
4899 'GL': 'Greenland',
4900 'GD': 'Grenada',
4901 'GP': 'Guadeloupe',
4902 'GU': 'Guam',
4903 'GT': 'Guatemala',
4904 'GG': 'Guernsey',
4905 'GN': 'Guinea',
4906 'GW': 'Guinea-Bissau',
4907 'GY': 'Guyana',
4908 'HT': 'Haiti',
4909 'HM': 'Heard Island and McDonald Islands',
4910 'VA': 'Holy See (Vatican City State)',
4911 'HN': 'Honduras',
4912 'HK': 'Hong Kong',
4913 'HU': 'Hungary',
4914 'IS': 'Iceland',
4915 'IN': 'India',
4916 'ID': 'Indonesia',
4917 'IR': 'Iran, Islamic Republic of',
4918 'IQ': 'Iraq',
4919 'IE': 'Ireland',
4920 'IM': 'Isle of Man',
4921 'IL': 'Israel',
4922 'IT': 'Italy',
4923 'JM': 'Jamaica',
4924 'JP': 'Japan',
4925 'JE': 'Jersey',
4926 'JO': 'Jordan',
4927 'KZ': 'Kazakhstan',
4928 'KE': 'Kenya',
4929 'KI': 'Kiribati',
4930 'KP': 'Korea, Democratic People\'s Republic of',
4931 'KR': 'Korea, Republic of',
4932 'KW': 'Kuwait',
4933 'KG': 'Kyrgyzstan',
4934 'LA': 'Lao People\'s Democratic Republic',
4935 'LV': 'Latvia',
4936 'LB': 'Lebanon',
4937 'LS': 'Lesotho',
4938 'LR': 'Liberia',
4939 'LY': 'Libya',
4940 'LI': 'Liechtenstein',
4941 'LT': 'Lithuania',
4942 'LU': 'Luxembourg',
4943 'MO': 'Macao',
4944 'MK': 'Macedonia, the Former Yugoslav Republic of',
4945 'MG': 'Madagascar',
4946 'MW': 'Malawi',
4947 'MY': 'Malaysia',
4948 'MV': 'Maldives',
4949 'ML': 'Mali',
4950 'MT': 'Malta',
4951 'MH': 'Marshall Islands',
4952 'MQ': 'Martinique',
4953 'MR': 'Mauritania',
4954 'MU': 'Mauritius',
4955 'YT': 'Mayotte',
4956 'MX': 'Mexico',
4957 'FM': 'Micronesia, Federated States of',
4958 'MD': 'Moldova, Republic of',
4959 'MC': 'Monaco',
4960 'MN': 'Mongolia',
4961 'ME': 'Montenegro',
4962 'MS': 'Montserrat',
4963 'MA': 'Morocco',
4964 'MZ': 'Mozambique',
4965 'MM': 'Myanmar',
4966 'NA': 'Namibia',
4967 'NR': 'Nauru',
4968 'NP': 'Nepal',
4969 'NL': 'Netherlands',
4970 'NC': 'New Caledonia',
4971 'NZ': 'New Zealand',
4972 'NI': 'Nicaragua',
4973 'NE': 'Niger',
4974 'NG': 'Nigeria',
4975 'NU': 'Niue',
4976 'NF': 'Norfolk Island',
4977 'MP': 'Northern Mariana Islands',
4978 'NO': 'Norway',
4979 'OM': 'Oman',
4980 'PK': 'Pakistan',
4981 'PW': 'Palau',
4982 'PS': 'Palestine, State of',
4983 'PA': 'Panama',
4984 'PG': 'Papua New Guinea',
4985 'PY': 'Paraguay',
4986 'PE': 'Peru',
4987 'PH': 'Philippines',
4988 'PN': 'Pitcairn',
4989 'PL': 'Poland',
4990 'PT': 'Portugal',
4991 'PR': 'Puerto Rico',
4992 'QA': 'Qatar',
4993 'RE': 'RĆ©union',
4994 'RO': 'Romania',
4995 'RU': 'Russian Federation',
4996 'RW': 'Rwanda',
4997 'BL': 'Saint BarthƩlemy',
4998 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4999 'KN': 'Saint Kitts and Nevis',
5000 'LC': 'Saint Lucia',
5001 'MF': 'Saint Martin (French part)',
5002 'PM': 'Saint Pierre and Miquelon',
5003 'VC': 'Saint Vincent and the Grenadines',
5004 'WS': 'Samoa',
5005 'SM': 'San Marino',
5006 'ST': 'Sao Tome and Principe',
5007 'SA': 'Saudi Arabia',
5008 'SN': 'Senegal',
5009 'RS': 'Serbia',
5010 'SC': 'Seychelles',
5011 'SL': 'Sierra Leone',
5012 'SG': 'Singapore',
5013 'SX': 'Sint Maarten (Dutch part)',
5014 'SK': 'Slovakia',
5015 'SI': 'Slovenia',
5016 'SB': 'Solomon Islands',
5017 'SO': 'Somalia',
5018 'ZA': 'South Africa',
5019 'GS': 'South Georgia and the South Sandwich Islands',
5020 'SS': 'South Sudan',
5021 'ES': 'Spain',
5022 'LK': 'Sri Lanka',
5023 'SD': 'Sudan',
5024 'SR': 'Suriname',
5025 'SJ': 'Svalbard and Jan Mayen',
5026 'SZ': 'Swaziland',
5027 'SE': 'Sweden',
5028 'CH': 'Switzerland',
5029 'SY': 'Syrian Arab Republic',
5030 'TW': 'Taiwan, Province of China',
5031 'TJ': 'Tajikistan',
5032 'TZ': 'Tanzania, United Republic of',
5033 'TH': 'Thailand',
5034 'TL': 'Timor-Leste',
5035 'TG': 'Togo',
5036 'TK': 'Tokelau',
5037 'TO': 'Tonga',
5038 'TT': 'Trinidad and Tobago',
5039 'TN': 'Tunisia',
5040 'TR': 'Turkey',
5041 'TM': 'Turkmenistan',
5042 'TC': 'Turks and Caicos Islands',
5043 'TV': 'Tuvalu',
5044 'UG': 'Uganda',
5045 'UA': 'Ukraine',
5046 'AE': 'United Arab Emirates',
5047 'GB': 'United Kingdom',
5048 'US': 'United States',
5049 'UM': 'United States Minor Outlying Islands',
5050 'UY': 'Uruguay',
5051 'UZ': 'Uzbekistan',
5052 'VU': 'Vanuatu',
5053 'VE': 'Venezuela, Bolivarian Republic of',
5054 'VN': 'Viet Nam',
5055 'VG': 'Virgin Islands, British',
5056 'VI': 'Virgin Islands, U.S.',
5057 'WF': 'Wallis and Futuna',
5058 'EH': 'Western Sahara',
5059 'YE': 'Yemen',
5060 'ZM': 'Zambia',
5061 'ZW': 'Zimbabwe',
5062 }
5063
5064 @classmethod
5065 def short2full(cls, code):
5066 """Convert an ISO 3166-2 country code to the corresponding full name"""
5067 return cls._country_map.get(code.upper())
5068
5069
5070 class GeoUtils(object):
5071 # Major IPv4 address blocks per country
5072 _country_ip_map = {
5073 'AD': '46.172.224.0/19',
5074 'AE': '94.200.0.0/13',
5075 'AF': '149.54.0.0/17',
5076 'AG': '209.59.64.0/18',
5077 'AI': '204.14.248.0/21',
5078 'AL': '46.99.0.0/16',
5079 'AM': '46.70.0.0/15',
5080 'AO': '105.168.0.0/13',
5081 'AP': '182.50.184.0/21',
5082 'AQ': '23.154.160.0/24',
5083 'AR': '181.0.0.0/12',
5084 'AS': '202.70.112.0/20',
5085 'AT': '77.116.0.0/14',
5086 'AU': '1.128.0.0/11',
5087 'AW': '181.41.0.0/18',
5088 'AX': '185.217.4.0/22',
5089 'AZ': '5.197.0.0/16',
5090 'BA': '31.176.128.0/17',
5091 'BB': '65.48.128.0/17',
5092 'BD': '114.130.0.0/16',
5093 'BE': '57.0.0.0/8',
5094 'BF': '102.178.0.0/15',
5095 'BG': '95.42.0.0/15',
5096 'BH': '37.131.0.0/17',
5097 'BI': '154.117.192.0/18',
5098 'BJ': '137.255.0.0/16',
5099 'BL': '185.212.72.0/23',
5100 'BM': '196.12.64.0/18',
5101 'BN': '156.31.0.0/16',
5102 'BO': '161.56.0.0/16',
5103 'BQ': '161.0.80.0/20',
5104 'BR': '191.128.0.0/12',
5105 'BS': '24.51.64.0/18',
5106 'BT': '119.2.96.0/19',
5107 'BW': '168.167.0.0/16',
5108 'BY': '178.120.0.0/13',
5109 'BZ': '179.42.192.0/18',
5110 'CA': '99.224.0.0/11',
5111 'CD': '41.243.0.0/16',
5112 'CF': '197.242.176.0/21',
5113 'CG': '160.113.0.0/16',
5114 'CH': '85.0.0.0/13',
5115 'CI': '102.136.0.0/14',
5116 'CK': '202.65.32.0/19',
5117 'CL': '152.172.0.0/14',
5118 'CM': '102.244.0.0/14',
5119 'CN': '36.128.0.0/10',
5120 'CO': '181.240.0.0/12',
5121 'CR': '201.192.0.0/12',
5122 'CU': '152.206.0.0/15',
5123 'CV': '165.90.96.0/19',
5124 'CW': '190.88.128.0/17',
5125 'CY': '31.153.0.0/16',
5126 'CZ': '88.100.0.0/14',
5127 'DE': '53.0.0.0/8',
5128 'DJ': '197.241.0.0/17',
5129 'DK': '87.48.0.0/12',
5130 'DM': '192.243.48.0/20',
5131 'DO': '152.166.0.0/15',
5132 'DZ': '41.96.0.0/12',
5133 'EC': '186.68.0.0/15',
5134 'EE': '90.190.0.0/15',
5135 'EG': '156.160.0.0/11',
5136 'ER': '196.200.96.0/20',
5137 'ES': '88.0.0.0/11',
5138 'ET': '196.188.0.0/14',
5139 'EU': '2.16.0.0/13',
5140 'FI': '91.152.0.0/13',
5141 'FJ': '144.120.0.0/16',
5142 'FK': '80.73.208.0/21',
5143 'FM': '119.252.112.0/20',
5144 'FO': '88.85.32.0/19',
5145 'FR': '90.0.0.0/9',
5146 'GA': '41.158.0.0/15',
5147 'GB': '25.0.0.0/8',
5148 'GD': '74.122.88.0/21',
5149 'GE': '31.146.0.0/16',
5150 'GF': '161.22.64.0/18',
5151 'GG': '62.68.160.0/19',
5152 'GH': '154.160.0.0/12',
5153 'GI': '95.164.0.0/16',
5154 'GL': '88.83.0.0/19',
5155 'GM': '160.182.0.0/15',
5156 'GN': '197.149.192.0/18',
5157 'GP': '104.250.0.0/19',
5158 'GQ': '105.235.224.0/20',
5159 'GR': '94.64.0.0/13',
5160 'GT': '168.234.0.0/16',
5161 'GU': '168.123.0.0/16',
5162 'GW': '197.214.80.0/20',
5163 'GY': '181.41.64.0/18',
5164 'HK': '113.252.0.0/14',
5165 'HN': '181.210.0.0/16',
5166 'HR': '93.136.0.0/13',
5167 'HT': '148.102.128.0/17',
5168 'HU': '84.0.0.0/14',
5169 'ID': '39.192.0.0/10',
5170 'IE': '87.32.0.0/12',
5171 'IL': '79.176.0.0/13',
5172 'IM': '5.62.80.0/20',
5173 'IN': '117.192.0.0/10',
5174 'IO': '203.83.48.0/21',
5175 'IQ': '37.236.0.0/14',
5176 'IR': '2.176.0.0/12',
5177 'IS': '82.221.0.0/16',
5178 'IT': '79.0.0.0/10',
5179 'JE': '87.244.64.0/18',
5180 'JM': '72.27.0.0/17',
5181 'JO': '176.29.0.0/16',
5182 'JP': '133.0.0.0/8',
5183 'KE': '105.48.0.0/12',
5184 'KG': '158.181.128.0/17',
5185 'KH': '36.37.128.0/17',
5186 'KI': '103.25.140.0/22',
5187 'KM': '197.255.224.0/20',
5188 'KN': '198.167.192.0/19',
5189 'KP': '175.45.176.0/22',
5190 'KR': '175.192.0.0/10',
5191 'KW': '37.36.0.0/14',
5192 'KY': '64.96.0.0/15',
5193 'KZ': '2.72.0.0/13',
5194 'LA': '115.84.64.0/18',
5195 'LB': '178.135.0.0/16',
5196 'LC': '24.92.144.0/20',
5197 'LI': '82.117.0.0/19',
5198 'LK': '112.134.0.0/15',
5199 'LR': '102.183.0.0/16',
5200 'LS': '129.232.0.0/17',
5201 'LT': '78.56.0.0/13',
5202 'LU': '188.42.0.0/16',
5203 'LV': '46.109.0.0/16',
5204 'LY': '41.252.0.0/14',
5205 'MA': '105.128.0.0/11',
5206 'MC': '88.209.64.0/18',
5207 'MD': '37.246.0.0/16',
5208 'ME': '178.175.0.0/17',
5209 'MF': '74.112.232.0/21',
5210 'MG': '154.126.0.0/17',
5211 'MH': '117.103.88.0/21',
5212 'MK': '77.28.0.0/15',
5213 'ML': '154.118.128.0/18',
5214 'MM': '37.111.0.0/17',
5215 'MN': '49.0.128.0/17',
5216 'MO': '60.246.0.0/16',
5217 'MP': '202.88.64.0/20',
5218 'MQ': '109.203.224.0/19',
5219 'MR': '41.188.64.0/18',
5220 'MS': '208.90.112.0/22',
5221 'MT': '46.11.0.0/16',
5222 'MU': '105.16.0.0/12',
5223 'MV': '27.114.128.0/18',
5224 'MW': '102.70.0.0/15',
5225 'MX': '187.192.0.0/11',
5226 'MY': '175.136.0.0/13',
5227 'MZ': '197.218.0.0/15',
5228 'NA': '41.182.0.0/16',
5229 'NC': '101.101.0.0/18',
5230 'NE': '197.214.0.0/18',
5231 'NF': '203.17.240.0/22',
5232 'NG': '105.112.0.0/12',
5233 'NI': '186.76.0.0/15',
5234 'NL': '145.96.0.0/11',
5235 'NO': '84.208.0.0/13',
5236 'NP': '36.252.0.0/15',
5237 'NR': '203.98.224.0/19',
5238 'NU': '49.156.48.0/22',
5239 'NZ': '49.224.0.0/14',
5240 'OM': '5.36.0.0/15',
5241 'PA': '186.72.0.0/15',
5242 'PE': '186.160.0.0/14',
5243 'PF': '123.50.64.0/18',
5244 'PG': '124.240.192.0/19',
5245 'PH': '49.144.0.0/13',
5246 'PK': '39.32.0.0/11',
5247 'PL': '83.0.0.0/11',
5248 'PM': '70.36.0.0/20',
5249 'PR': '66.50.0.0/16',
5250 'PS': '188.161.0.0/16',
5251 'PT': '85.240.0.0/13',
5252 'PW': '202.124.224.0/20',
5253 'PY': '181.120.0.0/14',
5254 'QA': '37.210.0.0/15',
5255 'RE': '102.35.0.0/16',
5256 'RO': '79.112.0.0/13',
5257 'RS': '93.86.0.0/15',
5258 'RU': '5.136.0.0/13',
5259 'RW': '41.186.0.0/16',
5260 'SA': '188.48.0.0/13',
5261 'SB': '202.1.160.0/19',
5262 'SC': '154.192.0.0/11',
5263 'SD': '102.120.0.0/13',
5264 'SE': '78.64.0.0/12',
5265 'SG': '8.128.0.0/10',
5266 'SI': '188.196.0.0/14',
5267 'SK': '78.98.0.0/15',
5268 'SL': '102.143.0.0/17',
5269 'SM': '89.186.32.0/19',
5270 'SN': '41.82.0.0/15',
5271 'SO': '154.115.192.0/18',
5272 'SR': '186.179.128.0/17',
5273 'SS': '105.235.208.0/21',
5274 'ST': '197.159.160.0/19',
5275 'SV': '168.243.0.0/16',
5276 'SX': '190.102.0.0/20',
5277 'SY': '5.0.0.0/16',
5278 'SZ': '41.84.224.0/19',
5279 'TC': '65.255.48.0/20',
5280 'TD': '154.68.128.0/19',
5281 'TG': '196.168.0.0/14',
5282 'TH': '171.96.0.0/13',
5283 'TJ': '85.9.128.0/18',
5284 'TK': '27.96.24.0/21',
5285 'TL': '180.189.160.0/20',
5286 'TM': '95.85.96.0/19',
5287 'TN': '197.0.0.0/11',
5288 'TO': '175.176.144.0/21',
5289 'TR': '78.160.0.0/11',
5290 'TT': '186.44.0.0/15',
5291 'TV': '202.2.96.0/19',
5292 'TW': '120.96.0.0/11',
5293 'TZ': '156.156.0.0/14',
5294 'UA': '37.52.0.0/14',
5295 'UG': '102.80.0.0/13',
5296 'US': '6.0.0.0/8',
5297 'UY': '167.56.0.0/13',
5298 'UZ': '84.54.64.0/18',
5299 'VA': '212.77.0.0/19',
5300 'VC': '207.191.240.0/21',
5301 'VE': '186.88.0.0/13',
5302 'VG': '66.81.192.0/20',
5303 'VI': '146.226.0.0/16',
5304 'VN': '14.160.0.0/11',
5305 'VU': '202.80.32.0/20',
5306 'WF': '117.20.32.0/21',
5307 'WS': '202.4.32.0/19',
5308 'YE': '134.35.0.0/16',
5309 'YT': '41.242.116.0/22',
5310 'ZA': '41.0.0.0/11',
5311 'ZM': '102.144.0.0/13',
5312 'ZW': '102.177.192.0/18',
5313 }
5314
5315 @classmethod
5316 def random_ipv4(cls, code_or_block):
5317 if len(code_or_block) == 2:
5318 block = cls._country_ip_map.get(code_or_block.upper())
5319 if not block:
5320 return None
5321 else:
5322 block = code_or_block
5323 addr, preflen = block.split('/')
5324 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5325 addr_max = addr_min | (0xffffffff >> int(preflen))
5326 return compat_str(socket.inet_ntoa(
5327 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5328
5329
5330 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5331 def __init__(self, proxies=None):
5332 # Set default handlers
5333 for type in ('http', 'https'):
5334 setattr(self, '%s_open' % type,
5335 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5336 meth(r, proxy, type))
5337 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5338
5339 def proxy_open(self, req, proxy, type):
5340 req_proxy = req.headers.get('Ytdl-request-proxy')
5341 if req_proxy is not None:
5342 proxy = req_proxy
5343 del req.headers['Ytdl-request-proxy']
5344
5345 if proxy == '__noproxy__':
5346 return None # No Proxy
5347 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5348 req.add_header('Ytdl-socks-proxy', proxy)
5349 # youtube-dl's http/https handlers do wrapping the socket with socks
5350 return None
5351 return compat_urllib_request.ProxyHandler.proxy_open(
5352 self, req, proxy, type)
5353
5354
5355 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5356 # released into Public Domain
5357 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5358
5359 def long_to_bytes(n, blocksize=0):
5360 """long_to_bytes(n:long, blocksize:int) : string
5361 Convert a long integer to a byte string.
5362
5363 If optional blocksize is given and greater than zero, pad the front of the
5364 byte string with binary zeros so that the length is a multiple of
5365 blocksize.
5366 """
5367 # after much testing, this algorithm was deemed to be the fastest
5368 s = b''
5369 n = int(n)
5370 while n > 0:
5371 s = compat_struct_pack('>I', n & 0xffffffff) + s
5372 n = n >> 32
5373 # strip off leading zeros
5374 for i in range(len(s)):
5375 if s[i] != b'\000'[0]:
5376 break
5377 else:
5378 # only happens when n == 0
5379 s = b'\000'
5380 i = 0
5381 s = s[i:]
5382 # add back some pad bytes. this could be done more efficiently w.r.t. the
5383 # de-padding being done above, but sigh...
5384 if blocksize > 0 and len(s) % blocksize:
5385 s = (blocksize - len(s) % blocksize) * b'\000' + s
5386 return s
5387
5388
5389 def bytes_to_long(s):
5390 """bytes_to_long(string) : long
5391 Convert a byte string to a long integer.
5392
5393 This is (essentially) the inverse of long_to_bytes().
5394 """
5395 acc = 0
5396 length = len(s)
5397 if length % 4:
5398 extra = (4 - length % 4)
5399 s = b'\000' * extra + s
5400 length = length + extra
5401 for i in range(0, length, 4):
5402 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5403 return acc
5404
5405
5406 def ohdave_rsa_encrypt(data, exponent, modulus):
5407 '''
5408 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5409
5410 Input:
5411 data: data to encrypt, bytes-like object
5412 exponent, modulus: parameter e and N of RSA algorithm, both integer
5413 Output: hex string of encrypted data
5414
5415 Limitation: supports one block encryption only
5416 '''
5417
5418 payload = int(binascii.hexlify(data[::-1]), 16)
5419 encrypted = pow(payload, exponent, modulus)
5420 return '%x' % encrypted
5421
5422
5423 def pkcs1pad(data, length):
5424 """
5425 Padding input data with PKCS#1 scheme
5426
5427 @param {int[]} data input data
5428 @param {int} length target length
5429 @returns {int[]} padded data
5430 """
5431 if len(data) > length - 11:
5432 raise ValueError('Input data too long for PKCS#1 padding')
5433
5434 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5435 return [0, 2] + pseudo_random + [0] + data
5436
5437
5438 def encode_base_n(num, n, table=None):
5439 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5440 if not table:
5441 table = FULL_TABLE[:n]
5442
5443 if n > len(table):
5444 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5445
5446 if num == 0:
5447 return table[0]
5448
5449 ret = ''
5450 while num:
5451 ret = table[num % n] + ret
5452 num = num // n
5453 return ret
5454
5455
5456 def decode_packed_codes(code):
5457 mobj = re.search(PACKED_CODES_RE, code)
5458 obfucasted_code, base, count, symbols = mobj.groups()
5459 base = int(base)
5460 count = int(count)
5461 symbols = symbols.split('|')
5462 symbol_table = {}
5463
5464 while count:
5465 count -= 1
5466 base_n_count = encode_base_n(count, base)
5467 symbol_table[base_n_count] = symbols[count] or base_n_count
5468
5469 return re.sub(
5470 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5471 obfucasted_code)
5472
5473
5474 def caesar(s, alphabet, shift):
5475 if shift == 0:
5476 return s
5477 l = len(alphabet)
5478 return ''.join(
5479 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5480 for c in s)
5481
5482
5483 def rot47(s):
5484 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5485
5486
5487 def parse_m3u8_attributes(attrib):
5488 info = {}
5489 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5490 if val.startswith('"'):
5491 val = val[1:-1]
5492 info[key] = val
5493 return info
5494
5495
5496 def urshift(val, n):
5497 return val >> n if val >= 0 else (val + 0x100000000) >> n
5498
5499
5500 # Based on png2str() written by @gdkchan and improved by @yokrysty
5501 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5502 def decode_png(png_data):
5503 # Reference: https://www.w3.org/TR/PNG/
5504 header = png_data[8:]
5505
5506 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5507 raise IOError('Not a valid PNG file.')
5508
5509 int_map = {1: '>B', 2: '>H', 4: '>I'}
5510 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5511
5512 chunks = []
5513
5514 while header:
5515 length = unpack_integer(header[:4])
5516 header = header[4:]
5517
5518 chunk_type = header[:4]
5519 header = header[4:]
5520
5521 chunk_data = header[:length]
5522 header = header[length:]
5523
5524 header = header[4:] # Skip CRC
5525
5526 chunks.append({
5527 'type': chunk_type,
5528 'length': length,
5529 'data': chunk_data
5530 })
5531
5532 ihdr = chunks[0]['data']
5533
5534 width = unpack_integer(ihdr[:4])
5535 height = unpack_integer(ihdr[4:8])
5536
5537 idat = b''
5538
5539 for chunk in chunks:
5540 if chunk['type'] == b'IDAT':
5541 idat += chunk['data']
5542
5543 if not idat:
5544 raise IOError('Unable to read PNG data.')
5545
5546 decompressed_data = bytearray(zlib.decompress(idat))
5547
5548 stride = width * 3
5549 pixels = []
5550
5551 def _get_pixel(idx):
5552 x = idx % stride
5553 y = idx // stride
5554 return pixels[y][x]
5555
5556 for y in range(height):
5557 basePos = y * (1 + stride)
5558 filter_type = decompressed_data[basePos]
5559
5560 current_row = []
5561
5562 pixels.append(current_row)
5563
5564 for x in range(stride):
5565 color = decompressed_data[1 + basePos + x]
5566 basex = y * stride + x
5567 left = 0
5568 up = 0
5569
5570 if x > 2:
5571 left = _get_pixel(basex - 3)
5572 if y > 0:
5573 up = _get_pixel(basex - stride)
5574
5575 if filter_type == 1: # Sub
5576 color = (color + left) & 0xff
5577 elif filter_type == 2: # Up
5578 color = (color + up) & 0xff
5579 elif filter_type == 3: # Average
5580 color = (color + ((left + up) >> 1)) & 0xff
5581 elif filter_type == 4: # Paeth
5582 a = left
5583 b = up
5584 c = 0
5585
5586 if x > 2 and y > 0:
5587 c = _get_pixel(basex - stride - 3)
5588
5589 p = a + b - c
5590
5591 pa = abs(p - a)
5592 pb = abs(p - b)
5593 pc = abs(p - c)
5594
5595 if pa <= pb and pa <= pc:
5596 color = (color + a) & 0xff
5597 elif pb <= pc:
5598 color = (color + b) & 0xff
5599 else:
5600 color = (color + c) & 0xff
5601
5602 current_row.append(color)
5603
5604 return width, height, pixels
5605
5606
5607 def write_xattr(path, key, value):
5608 # This mess below finds the best xattr tool for the job
5609 try:
5610 # try the pyxattr module...
5611 import xattr
5612
5613 if hasattr(xattr, 'set'): # pyxattr
5614 # Unicode arguments are not supported in python-pyxattr until
5615 # version 0.5.0
5616 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5617 pyxattr_required_version = '0.5.0'
5618 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5619 # TODO: fallback to CLI tools
5620 raise XAttrUnavailableError(
5621 'python-pyxattr is detected but is too old. '
5622 'youtube-dl requires %s or above while your version is %s. '
5623 'Falling back to other xattr implementations' % (
5624 pyxattr_required_version, xattr.__version__))
5625
5626 setxattr = xattr.set
5627 else: # xattr
5628 setxattr = xattr.setxattr
5629
5630 try:
5631 setxattr(path, key, value)
5632 except EnvironmentError as e:
5633 raise XAttrMetadataError(e.errno, e.strerror)
5634
5635 except ImportError:
5636 if compat_os_name == 'nt':
5637 # Write xattrs to NTFS Alternate Data Streams:
5638 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5639 assert ':' not in key
5640 assert os.path.exists(path)
5641
5642 ads_fn = path + ':' + key
5643 try:
5644 with open(ads_fn, 'wb') as f:
5645 f.write(value)
5646 except EnvironmentError as e:
5647 raise XAttrMetadataError(e.errno, e.strerror)
5648 else:
5649 user_has_setfattr = check_executable('setfattr', ['--version'])
5650 user_has_xattr = check_executable('xattr', ['-h'])
5651
5652 if user_has_setfattr or user_has_xattr:
5653
5654 value = value.decode('utf-8')
5655 if user_has_setfattr:
5656 executable = 'setfattr'
5657 opts = ['-n', key, '-v', value]
5658 elif user_has_xattr:
5659 executable = 'xattr'
5660 opts = ['-w', key, value]
5661
5662 cmd = ([encodeFilename(executable, True)]
5663 + [encodeArgument(o) for o in opts]
5664 + [encodeFilename(path, True)])
5665
5666 try:
5667 p = subprocess.Popen(
5668 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5669 except EnvironmentError as e:
5670 raise XAttrMetadataError(e.errno, e.strerror)
5671 stdout, stderr = p.communicate()
5672 stderr = stderr.decode('utf-8', 'replace')
5673 if p.returncode != 0:
5674 raise XAttrMetadataError(p.returncode, stderr)
5675
5676 else:
5677 # On Unix, and can't find pyxattr, setfattr, or xattr.
5678 if sys.platform.startswith('linux'):
5679 raise XAttrUnavailableError(
5680 "Couldn't find a tool to set the xattrs. "
5681 "Install either the python 'pyxattr' or 'xattr' "
5682 "modules, or the GNU 'attr' package "
5683 "(which contains the 'setfattr' tool).")
5684 else:
5685 raise XAttrUnavailableError(
5686 "Couldn't find a tool to set the xattrs. "
5687 "Install either the python 'xattr' module, "
5688 "or the 'xattr' binary.")
5689
5690
5691 def random_birthday(year_field, month_field, day_field):
5692 start_date = datetime.date(1950, 1, 1)
5693 end_date = datetime.date(1995, 12, 31)
5694 offset = random.randint(0, (end_date - start_date).days)
5695 random_date = start_date + datetime.timedelta(offset)
5696 return {
5697 year_field: str(random_date.year),
5698 month_field: str(random_date.month),
5699 day_field: str(random_date.day),
5700 }