]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/utils.py
Prepare new version
[youtubedl] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import contextlib
11 import ctypes
12 import datetime
13 import email.utils
14 import email.header
15 import errno
16 import functools
17 import gzip
18 import io
19 import itertools
20 import json
21 import locale
22 import math
23 import operator
24 import os
25 import platform
26 import random
27 import re
28 import socket
29 import ssl
30 import subprocess
31 import sys
32 import tempfile
33 import traceback
34 import xml.etree.ElementTree
35 import zlib
36
37 from .compat import (
38 compat_HTMLParseError,
39 compat_HTMLParser,
40 compat_basestring,
41 compat_chr,
42 compat_cookiejar,
43 compat_ctypes_WINFUNCTYPE,
44 compat_etree_fromstring,
45 compat_expanduser,
46 compat_html_entities,
47 compat_html_entities_html5,
48 compat_http_client,
49 compat_kwargs,
50 compat_os_name,
51 compat_parse_qs,
52 compat_shlex_quote,
53 compat_str,
54 compat_struct_pack,
55 compat_struct_unpack,
56 compat_urllib_error,
57 compat_urllib_parse,
58 compat_urllib_parse_urlencode,
59 compat_urllib_parse_urlparse,
60 compat_urllib_parse_unquote_plus,
61 compat_urllib_request,
62 compat_urlparse,
63 compat_xpath,
64 )
65
66 from .socks import (
67 ProxyType,
68 sockssocket,
69 )
70
71
72 def register_socks_protocols():
73 # "Register" SOCKS protocols
74 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
75 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
76 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
77 if scheme not in compat_urlparse.uses_netloc:
78 compat_urlparse.uses_netloc.append(scheme)
79
80
81 # This is not clearly defined otherwise
82 compiled_regex_type = type(re.compile(''))
83
84
85 def random_user_agent():
86 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
87 _CHROME_VERSIONS = (
88 '74.0.3729.129',
89 '76.0.3780.3',
90 '76.0.3780.2',
91 '74.0.3729.128',
92 '76.0.3780.1',
93 '76.0.3780.0',
94 '75.0.3770.15',
95 '74.0.3729.127',
96 '74.0.3729.126',
97 '76.0.3779.1',
98 '76.0.3779.0',
99 '75.0.3770.14',
100 '74.0.3729.125',
101 '76.0.3778.1',
102 '76.0.3778.0',
103 '75.0.3770.13',
104 '74.0.3729.124',
105 '74.0.3729.123',
106 '73.0.3683.121',
107 '76.0.3777.1',
108 '76.0.3777.0',
109 '75.0.3770.12',
110 '74.0.3729.122',
111 '76.0.3776.4',
112 '75.0.3770.11',
113 '74.0.3729.121',
114 '76.0.3776.3',
115 '76.0.3776.2',
116 '73.0.3683.120',
117 '74.0.3729.120',
118 '74.0.3729.119',
119 '74.0.3729.118',
120 '76.0.3776.1',
121 '76.0.3776.0',
122 '76.0.3775.5',
123 '75.0.3770.10',
124 '74.0.3729.117',
125 '76.0.3775.4',
126 '76.0.3775.3',
127 '74.0.3729.116',
128 '75.0.3770.9',
129 '76.0.3775.2',
130 '76.0.3775.1',
131 '76.0.3775.0',
132 '75.0.3770.8',
133 '74.0.3729.115',
134 '74.0.3729.114',
135 '76.0.3774.1',
136 '76.0.3774.0',
137 '75.0.3770.7',
138 '74.0.3729.113',
139 '74.0.3729.112',
140 '74.0.3729.111',
141 '76.0.3773.1',
142 '76.0.3773.0',
143 '75.0.3770.6',
144 '74.0.3729.110',
145 '74.0.3729.109',
146 '76.0.3772.1',
147 '76.0.3772.0',
148 '75.0.3770.5',
149 '74.0.3729.108',
150 '74.0.3729.107',
151 '76.0.3771.1',
152 '76.0.3771.0',
153 '75.0.3770.4',
154 '74.0.3729.106',
155 '74.0.3729.105',
156 '75.0.3770.3',
157 '74.0.3729.104',
158 '74.0.3729.103',
159 '74.0.3729.102',
160 '75.0.3770.2',
161 '74.0.3729.101',
162 '75.0.3770.1',
163 '75.0.3770.0',
164 '74.0.3729.100',
165 '75.0.3769.5',
166 '75.0.3769.4',
167 '74.0.3729.99',
168 '75.0.3769.3',
169 '75.0.3769.2',
170 '75.0.3768.6',
171 '74.0.3729.98',
172 '75.0.3769.1',
173 '75.0.3769.0',
174 '74.0.3729.97',
175 '73.0.3683.119',
176 '73.0.3683.118',
177 '74.0.3729.96',
178 '75.0.3768.5',
179 '75.0.3768.4',
180 '75.0.3768.3',
181 '75.0.3768.2',
182 '74.0.3729.95',
183 '74.0.3729.94',
184 '75.0.3768.1',
185 '75.0.3768.0',
186 '74.0.3729.93',
187 '74.0.3729.92',
188 '73.0.3683.117',
189 '74.0.3729.91',
190 '75.0.3766.3',
191 '74.0.3729.90',
192 '75.0.3767.2',
193 '75.0.3767.1',
194 '75.0.3767.0',
195 '74.0.3729.89',
196 '73.0.3683.116',
197 '75.0.3766.2',
198 '74.0.3729.88',
199 '75.0.3766.1',
200 '75.0.3766.0',
201 '74.0.3729.87',
202 '73.0.3683.115',
203 '74.0.3729.86',
204 '75.0.3765.1',
205 '75.0.3765.0',
206 '74.0.3729.85',
207 '73.0.3683.114',
208 '74.0.3729.84',
209 '75.0.3764.1',
210 '75.0.3764.0',
211 '74.0.3729.83',
212 '73.0.3683.113',
213 '75.0.3763.2',
214 '75.0.3761.4',
215 '74.0.3729.82',
216 '75.0.3763.1',
217 '75.0.3763.0',
218 '74.0.3729.81',
219 '73.0.3683.112',
220 '75.0.3762.1',
221 '75.0.3762.0',
222 '74.0.3729.80',
223 '75.0.3761.3',
224 '74.0.3729.79',
225 '73.0.3683.111',
226 '75.0.3761.2',
227 '74.0.3729.78',
228 '74.0.3729.77',
229 '75.0.3761.1',
230 '75.0.3761.0',
231 '73.0.3683.110',
232 '74.0.3729.76',
233 '74.0.3729.75',
234 '75.0.3760.0',
235 '74.0.3729.74',
236 '75.0.3759.8',
237 '75.0.3759.7',
238 '75.0.3759.6',
239 '74.0.3729.73',
240 '75.0.3759.5',
241 '74.0.3729.72',
242 '73.0.3683.109',
243 '75.0.3759.4',
244 '75.0.3759.3',
245 '74.0.3729.71',
246 '75.0.3759.2',
247 '74.0.3729.70',
248 '73.0.3683.108',
249 '74.0.3729.69',
250 '75.0.3759.1',
251 '75.0.3759.0',
252 '74.0.3729.68',
253 '73.0.3683.107',
254 '74.0.3729.67',
255 '75.0.3758.1',
256 '75.0.3758.0',
257 '74.0.3729.66',
258 '73.0.3683.106',
259 '74.0.3729.65',
260 '75.0.3757.1',
261 '75.0.3757.0',
262 '74.0.3729.64',
263 '73.0.3683.105',
264 '74.0.3729.63',
265 '75.0.3756.1',
266 '75.0.3756.0',
267 '74.0.3729.62',
268 '73.0.3683.104',
269 '75.0.3755.3',
270 '75.0.3755.2',
271 '73.0.3683.103',
272 '75.0.3755.1',
273 '75.0.3755.0',
274 '74.0.3729.61',
275 '73.0.3683.102',
276 '74.0.3729.60',
277 '75.0.3754.2',
278 '74.0.3729.59',
279 '75.0.3753.4',
280 '74.0.3729.58',
281 '75.0.3754.1',
282 '75.0.3754.0',
283 '74.0.3729.57',
284 '73.0.3683.101',
285 '75.0.3753.3',
286 '75.0.3752.2',
287 '75.0.3753.2',
288 '74.0.3729.56',
289 '75.0.3753.1',
290 '75.0.3753.0',
291 '74.0.3729.55',
292 '73.0.3683.100',
293 '74.0.3729.54',
294 '75.0.3752.1',
295 '75.0.3752.0',
296 '74.0.3729.53',
297 '73.0.3683.99',
298 '74.0.3729.52',
299 '75.0.3751.1',
300 '75.0.3751.0',
301 '74.0.3729.51',
302 '73.0.3683.98',
303 '74.0.3729.50',
304 '75.0.3750.0',
305 '74.0.3729.49',
306 '74.0.3729.48',
307 '74.0.3729.47',
308 '75.0.3749.3',
309 '74.0.3729.46',
310 '73.0.3683.97',
311 '75.0.3749.2',
312 '74.0.3729.45',
313 '75.0.3749.1',
314 '75.0.3749.0',
315 '74.0.3729.44',
316 '73.0.3683.96',
317 '74.0.3729.43',
318 '74.0.3729.42',
319 '75.0.3748.1',
320 '75.0.3748.0',
321 '74.0.3729.41',
322 '75.0.3747.1',
323 '73.0.3683.95',
324 '75.0.3746.4',
325 '74.0.3729.40',
326 '74.0.3729.39',
327 '75.0.3747.0',
328 '75.0.3746.3',
329 '75.0.3746.2',
330 '74.0.3729.38',
331 '75.0.3746.1',
332 '75.0.3746.0',
333 '74.0.3729.37',
334 '73.0.3683.94',
335 '75.0.3745.5',
336 '75.0.3745.4',
337 '75.0.3745.3',
338 '75.0.3745.2',
339 '74.0.3729.36',
340 '75.0.3745.1',
341 '75.0.3745.0',
342 '75.0.3744.2',
343 '74.0.3729.35',
344 '73.0.3683.93',
345 '74.0.3729.34',
346 '75.0.3744.1',
347 '75.0.3744.0',
348 '74.0.3729.33',
349 '73.0.3683.92',
350 '74.0.3729.32',
351 '74.0.3729.31',
352 '73.0.3683.91',
353 '75.0.3741.2',
354 '75.0.3740.5',
355 '74.0.3729.30',
356 '75.0.3741.1',
357 '75.0.3741.0',
358 '74.0.3729.29',
359 '75.0.3740.4',
360 '73.0.3683.90',
361 '74.0.3729.28',
362 '75.0.3740.3',
363 '73.0.3683.89',
364 '75.0.3740.2',
365 '74.0.3729.27',
366 '75.0.3740.1',
367 '75.0.3740.0',
368 '74.0.3729.26',
369 '73.0.3683.88',
370 '73.0.3683.87',
371 '74.0.3729.25',
372 '75.0.3739.1',
373 '75.0.3739.0',
374 '73.0.3683.86',
375 '74.0.3729.24',
376 '73.0.3683.85',
377 '75.0.3738.4',
378 '75.0.3738.3',
379 '75.0.3738.2',
380 '75.0.3738.1',
381 '75.0.3738.0',
382 '74.0.3729.23',
383 '73.0.3683.84',
384 '74.0.3729.22',
385 '74.0.3729.21',
386 '75.0.3737.1',
387 '75.0.3737.0',
388 '74.0.3729.20',
389 '73.0.3683.83',
390 '74.0.3729.19',
391 '75.0.3736.1',
392 '75.0.3736.0',
393 '74.0.3729.18',
394 '73.0.3683.82',
395 '74.0.3729.17',
396 '75.0.3735.1',
397 '75.0.3735.0',
398 '74.0.3729.16',
399 '73.0.3683.81',
400 '75.0.3734.1',
401 '75.0.3734.0',
402 '74.0.3729.15',
403 '73.0.3683.80',
404 '74.0.3729.14',
405 '75.0.3733.1',
406 '75.0.3733.0',
407 '75.0.3732.1',
408 '74.0.3729.13',
409 '74.0.3729.12',
410 '73.0.3683.79',
411 '74.0.3729.11',
412 '75.0.3732.0',
413 '74.0.3729.10',
414 '73.0.3683.78',
415 '74.0.3729.9',
416 '74.0.3729.8',
417 '74.0.3729.7',
418 '75.0.3731.3',
419 '75.0.3731.2',
420 '75.0.3731.0',
421 '74.0.3729.6',
422 '73.0.3683.77',
423 '73.0.3683.76',
424 '75.0.3730.5',
425 '75.0.3730.4',
426 '73.0.3683.75',
427 '74.0.3729.5',
428 '73.0.3683.74',
429 '75.0.3730.3',
430 '75.0.3730.2',
431 '74.0.3729.4',
432 '73.0.3683.73',
433 '73.0.3683.72',
434 '75.0.3730.1',
435 '75.0.3730.0',
436 '74.0.3729.3',
437 '73.0.3683.71',
438 '74.0.3729.2',
439 '73.0.3683.70',
440 '74.0.3729.1',
441 '74.0.3729.0',
442 '74.0.3726.4',
443 '73.0.3683.69',
444 '74.0.3726.3',
445 '74.0.3728.0',
446 '74.0.3726.2',
447 '73.0.3683.68',
448 '74.0.3726.1',
449 '74.0.3726.0',
450 '74.0.3725.4',
451 '73.0.3683.67',
452 '73.0.3683.66',
453 '74.0.3725.3',
454 '74.0.3725.2',
455 '74.0.3725.1',
456 '74.0.3724.8',
457 '74.0.3725.0',
458 '73.0.3683.65',
459 '74.0.3724.7',
460 '74.0.3724.6',
461 '74.0.3724.5',
462 '74.0.3724.4',
463 '74.0.3724.3',
464 '74.0.3724.2',
465 '74.0.3724.1',
466 '74.0.3724.0',
467 '73.0.3683.64',
468 '74.0.3723.1',
469 '74.0.3723.0',
470 '73.0.3683.63',
471 '74.0.3722.1',
472 '74.0.3722.0',
473 '73.0.3683.62',
474 '74.0.3718.9',
475 '74.0.3702.3',
476 '74.0.3721.3',
477 '74.0.3721.2',
478 '74.0.3721.1',
479 '74.0.3721.0',
480 '74.0.3720.6',
481 '73.0.3683.61',
482 '72.0.3626.122',
483 '73.0.3683.60',
484 '74.0.3720.5',
485 '72.0.3626.121',
486 '74.0.3718.8',
487 '74.0.3720.4',
488 '74.0.3720.3',
489 '74.0.3718.7',
490 '74.0.3720.2',
491 '74.0.3720.1',
492 '74.0.3720.0',
493 '74.0.3718.6',
494 '74.0.3719.5',
495 '73.0.3683.59',
496 '74.0.3718.5',
497 '74.0.3718.4',
498 '74.0.3719.4',
499 '74.0.3719.3',
500 '74.0.3719.2',
501 '74.0.3719.1',
502 '73.0.3683.58',
503 '74.0.3719.0',
504 '73.0.3683.57',
505 '73.0.3683.56',
506 '74.0.3718.3',
507 '73.0.3683.55',
508 '74.0.3718.2',
509 '74.0.3718.1',
510 '74.0.3718.0',
511 '73.0.3683.54',
512 '74.0.3717.2',
513 '73.0.3683.53',
514 '74.0.3717.1',
515 '74.0.3717.0',
516 '73.0.3683.52',
517 '74.0.3716.1',
518 '74.0.3716.0',
519 '73.0.3683.51',
520 '74.0.3715.1',
521 '74.0.3715.0',
522 '73.0.3683.50',
523 '74.0.3711.2',
524 '74.0.3714.2',
525 '74.0.3713.3',
526 '74.0.3714.1',
527 '74.0.3714.0',
528 '73.0.3683.49',
529 '74.0.3713.1',
530 '74.0.3713.0',
531 '72.0.3626.120',
532 '73.0.3683.48',
533 '74.0.3712.2',
534 '74.0.3712.1',
535 '74.0.3712.0',
536 '73.0.3683.47',
537 '72.0.3626.119',
538 '73.0.3683.46',
539 '74.0.3710.2',
540 '72.0.3626.118',
541 '74.0.3711.1',
542 '74.0.3711.0',
543 '73.0.3683.45',
544 '72.0.3626.117',
545 '74.0.3710.1',
546 '74.0.3710.0',
547 '73.0.3683.44',
548 '72.0.3626.116',
549 '74.0.3709.1',
550 '74.0.3709.0',
551 '74.0.3704.9',
552 '73.0.3683.43',
553 '72.0.3626.115',
554 '74.0.3704.8',
555 '74.0.3704.7',
556 '74.0.3708.0',
557 '74.0.3706.7',
558 '74.0.3704.6',
559 '73.0.3683.42',
560 '72.0.3626.114',
561 '74.0.3706.6',
562 '72.0.3626.113',
563 '74.0.3704.5',
564 '74.0.3706.5',
565 '74.0.3706.4',
566 '74.0.3706.3',
567 '74.0.3706.2',
568 '74.0.3706.1',
569 '74.0.3706.0',
570 '73.0.3683.41',
571 '72.0.3626.112',
572 '74.0.3705.1',
573 '74.0.3705.0',
574 '73.0.3683.40',
575 '72.0.3626.111',
576 '73.0.3683.39',
577 '74.0.3704.4',
578 '73.0.3683.38',
579 '74.0.3704.3',
580 '74.0.3704.2',
581 '74.0.3704.1',
582 '74.0.3704.0',
583 '73.0.3683.37',
584 '72.0.3626.110',
585 '72.0.3626.109',
586 '74.0.3703.3',
587 '74.0.3703.2',
588 '73.0.3683.36',
589 '74.0.3703.1',
590 '74.0.3703.0',
591 '73.0.3683.35',
592 '72.0.3626.108',
593 '74.0.3702.2',
594 '74.0.3699.3',
595 '74.0.3702.1',
596 '74.0.3702.0',
597 '73.0.3683.34',
598 '72.0.3626.107',
599 '73.0.3683.33',
600 '74.0.3701.1',
601 '74.0.3701.0',
602 '73.0.3683.32',
603 '73.0.3683.31',
604 '72.0.3626.105',
605 '74.0.3700.1',
606 '74.0.3700.0',
607 '73.0.3683.29',
608 '72.0.3626.103',
609 '74.0.3699.2',
610 '74.0.3699.1',
611 '74.0.3699.0',
612 '73.0.3683.28',
613 '72.0.3626.102',
614 '73.0.3683.27',
615 '73.0.3683.26',
616 '74.0.3698.0',
617 '74.0.3696.2',
618 '72.0.3626.101',
619 '73.0.3683.25',
620 '74.0.3696.1',
621 '74.0.3696.0',
622 '74.0.3694.8',
623 '72.0.3626.100',
624 '74.0.3694.7',
625 '74.0.3694.6',
626 '74.0.3694.5',
627 '74.0.3694.4',
628 '72.0.3626.99',
629 '72.0.3626.98',
630 '74.0.3694.3',
631 '73.0.3683.24',
632 '72.0.3626.97',
633 '72.0.3626.96',
634 '72.0.3626.95',
635 '73.0.3683.23',
636 '72.0.3626.94',
637 '73.0.3683.22',
638 '73.0.3683.21',
639 '72.0.3626.93',
640 '74.0.3694.2',
641 '72.0.3626.92',
642 '74.0.3694.1',
643 '74.0.3694.0',
644 '74.0.3693.6',
645 '73.0.3683.20',
646 '72.0.3626.91',
647 '74.0.3693.5',
648 '74.0.3693.4',
649 '74.0.3693.3',
650 '74.0.3693.2',
651 '73.0.3683.19',
652 '74.0.3693.1',
653 '74.0.3693.0',
654 '73.0.3683.18',
655 '72.0.3626.90',
656 '74.0.3692.1',
657 '74.0.3692.0',
658 '73.0.3683.17',
659 '72.0.3626.89',
660 '74.0.3687.3',
661 '74.0.3691.1',
662 '74.0.3691.0',
663 '73.0.3683.16',
664 '72.0.3626.88',
665 '72.0.3626.87',
666 '73.0.3683.15',
667 '74.0.3690.1',
668 '74.0.3690.0',
669 '73.0.3683.14',
670 '72.0.3626.86',
671 '73.0.3683.13',
672 '73.0.3683.12',
673 '74.0.3689.1',
674 '74.0.3689.0',
675 '73.0.3683.11',
676 '72.0.3626.85',
677 '73.0.3683.10',
678 '72.0.3626.84',
679 '73.0.3683.9',
680 '74.0.3688.1',
681 '74.0.3688.0',
682 '73.0.3683.8',
683 '72.0.3626.83',
684 '74.0.3687.2',
685 '74.0.3687.1',
686 '74.0.3687.0',
687 '73.0.3683.7',
688 '72.0.3626.82',
689 '74.0.3686.4',
690 '72.0.3626.81',
691 '74.0.3686.3',
692 '74.0.3686.2',
693 '74.0.3686.1',
694 '74.0.3686.0',
695 '73.0.3683.6',
696 '72.0.3626.80',
697 '74.0.3685.1',
698 '74.0.3685.0',
699 '73.0.3683.5',
700 '72.0.3626.79',
701 '74.0.3684.1',
702 '74.0.3684.0',
703 '73.0.3683.4',
704 '72.0.3626.78',
705 '72.0.3626.77',
706 '73.0.3683.3',
707 '73.0.3683.2',
708 '72.0.3626.76',
709 '73.0.3683.1',
710 '73.0.3683.0',
711 '72.0.3626.75',
712 '71.0.3578.141',
713 '73.0.3682.1',
714 '73.0.3682.0',
715 '72.0.3626.74',
716 '71.0.3578.140',
717 '73.0.3681.4',
718 '73.0.3681.3',
719 '73.0.3681.2',
720 '73.0.3681.1',
721 '73.0.3681.0',
722 '72.0.3626.73',
723 '71.0.3578.139',
724 '72.0.3626.72',
725 '72.0.3626.71',
726 '73.0.3680.1',
727 '73.0.3680.0',
728 '72.0.3626.70',
729 '71.0.3578.138',
730 '73.0.3678.2',
731 '73.0.3679.1',
732 '73.0.3679.0',
733 '72.0.3626.69',
734 '71.0.3578.137',
735 '73.0.3678.1',
736 '73.0.3678.0',
737 '71.0.3578.136',
738 '73.0.3677.1',
739 '73.0.3677.0',
740 '72.0.3626.68',
741 '72.0.3626.67',
742 '71.0.3578.135',
743 '73.0.3676.1',
744 '73.0.3676.0',
745 '73.0.3674.2',
746 '72.0.3626.66',
747 '71.0.3578.134',
748 '73.0.3674.1',
749 '73.0.3674.0',
750 '72.0.3626.65',
751 '71.0.3578.133',
752 '73.0.3673.2',
753 '73.0.3673.1',
754 '73.0.3673.0',
755 '72.0.3626.64',
756 '71.0.3578.132',
757 '72.0.3626.63',
758 '72.0.3626.62',
759 '72.0.3626.61',
760 '72.0.3626.60',
761 '73.0.3672.1',
762 '73.0.3672.0',
763 '72.0.3626.59',
764 '71.0.3578.131',
765 '73.0.3671.3',
766 '73.0.3671.2',
767 '73.0.3671.1',
768 '73.0.3671.0',
769 '72.0.3626.58',
770 '71.0.3578.130',
771 '73.0.3670.1',
772 '73.0.3670.0',
773 '72.0.3626.57',
774 '71.0.3578.129',
775 '73.0.3669.1',
776 '73.0.3669.0',
777 '72.0.3626.56',
778 '71.0.3578.128',
779 '73.0.3668.2',
780 '73.0.3668.1',
781 '73.0.3668.0',
782 '72.0.3626.55',
783 '71.0.3578.127',
784 '73.0.3667.2',
785 '73.0.3667.1',
786 '73.0.3667.0',
787 '72.0.3626.54',
788 '71.0.3578.126',
789 '73.0.3666.1',
790 '73.0.3666.0',
791 '72.0.3626.53',
792 '71.0.3578.125',
793 '73.0.3665.4',
794 '73.0.3665.3',
795 '72.0.3626.52',
796 '73.0.3665.2',
797 '73.0.3664.4',
798 '73.0.3665.1',
799 '73.0.3665.0',
800 '72.0.3626.51',
801 '71.0.3578.124',
802 '72.0.3626.50',
803 '73.0.3664.3',
804 '73.0.3664.2',
805 '73.0.3664.1',
806 '73.0.3664.0',
807 '73.0.3663.2',
808 '72.0.3626.49',
809 '71.0.3578.123',
810 '73.0.3663.1',
811 '73.0.3663.0',
812 '72.0.3626.48',
813 '71.0.3578.122',
814 '73.0.3662.1',
815 '73.0.3662.0',
816 '72.0.3626.47',
817 '71.0.3578.121',
818 '73.0.3661.1',
819 '72.0.3626.46',
820 '73.0.3661.0',
821 '72.0.3626.45',
822 '71.0.3578.120',
823 '73.0.3660.2',
824 '73.0.3660.1',
825 '73.0.3660.0',
826 '72.0.3626.44',
827 '71.0.3578.119',
828 '73.0.3659.1',
829 '73.0.3659.0',
830 '72.0.3626.43',
831 '71.0.3578.118',
832 '73.0.3658.1',
833 '73.0.3658.0',
834 '72.0.3626.42',
835 '71.0.3578.117',
836 '73.0.3657.1',
837 '73.0.3657.0',
838 '72.0.3626.41',
839 '71.0.3578.116',
840 '73.0.3656.1',
841 '73.0.3656.0',
842 '72.0.3626.40',
843 '71.0.3578.115',
844 '73.0.3655.1',
845 '73.0.3655.0',
846 '72.0.3626.39',
847 '71.0.3578.114',
848 '73.0.3654.1',
849 '73.0.3654.0',
850 '72.0.3626.38',
851 '71.0.3578.113',
852 '73.0.3653.1',
853 '73.0.3653.0',
854 '72.0.3626.37',
855 '71.0.3578.112',
856 '73.0.3652.1',
857 '73.0.3652.0',
858 '72.0.3626.36',
859 '71.0.3578.111',
860 '73.0.3651.1',
861 '73.0.3651.0',
862 '72.0.3626.35',
863 '71.0.3578.110',
864 '73.0.3650.1',
865 '73.0.3650.0',
866 '72.0.3626.34',
867 '71.0.3578.109',
868 '73.0.3649.1',
869 '73.0.3649.0',
870 '72.0.3626.33',
871 '71.0.3578.108',
872 '73.0.3648.2',
873 '73.0.3648.1',
874 '73.0.3648.0',
875 '72.0.3626.32',
876 '71.0.3578.107',
877 '73.0.3647.2',
878 '73.0.3647.1',
879 '73.0.3647.0',
880 '72.0.3626.31',
881 '71.0.3578.106',
882 '73.0.3635.3',
883 '73.0.3646.2',
884 '73.0.3646.1',
885 '73.0.3646.0',
886 '72.0.3626.30',
887 '71.0.3578.105',
888 '72.0.3626.29',
889 '73.0.3645.2',
890 '73.0.3645.1',
891 '73.0.3645.0',
892 '72.0.3626.28',
893 '71.0.3578.104',
894 '72.0.3626.27',
895 '72.0.3626.26',
896 '72.0.3626.25',
897 '72.0.3626.24',
898 '73.0.3644.0',
899 '73.0.3643.2',
900 '72.0.3626.23',
901 '71.0.3578.103',
902 '73.0.3643.1',
903 '73.0.3643.0',
904 '72.0.3626.22',
905 '71.0.3578.102',
906 '73.0.3642.1',
907 '73.0.3642.0',
908 '72.0.3626.21',
909 '71.0.3578.101',
910 '73.0.3641.1',
911 '73.0.3641.0',
912 '72.0.3626.20',
913 '71.0.3578.100',
914 '72.0.3626.19',
915 '73.0.3640.1',
916 '73.0.3640.0',
917 '72.0.3626.18',
918 '73.0.3639.1',
919 '71.0.3578.99',
920 '73.0.3639.0',
921 '72.0.3626.17',
922 '73.0.3638.2',
923 '72.0.3626.16',
924 '73.0.3638.1',
925 '73.0.3638.0',
926 '72.0.3626.15',
927 '71.0.3578.98',
928 '73.0.3635.2',
929 '71.0.3578.97',
930 '73.0.3637.1',
931 '73.0.3637.0',
932 '72.0.3626.14',
933 '71.0.3578.96',
934 '71.0.3578.95',
935 '72.0.3626.13',
936 '71.0.3578.94',
937 '73.0.3636.2',
938 '71.0.3578.93',
939 '73.0.3636.1',
940 '73.0.3636.0',
941 '72.0.3626.12',
942 '71.0.3578.92',
943 '73.0.3635.1',
944 '73.0.3635.0',
945 '72.0.3626.11',
946 '71.0.3578.91',
947 '73.0.3634.2',
948 '73.0.3634.1',
949 '73.0.3634.0',
950 '72.0.3626.10',
951 '71.0.3578.90',
952 '71.0.3578.89',
953 '73.0.3633.2',
954 '73.0.3633.1',
955 '73.0.3633.0',
956 '72.0.3610.4',
957 '72.0.3626.9',
958 '71.0.3578.88',
959 '73.0.3632.5',
960 '73.0.3632.4',
961 '73.0.3632.3',
962 '73.0.3632.2',
963 '73.0.3632.1',
964 '73.0.3632.0',
965 '72.0.3626.8',
966 '71.0.3578.87',
967 '73.0.3631.2',
968 '73.0.3631.1',
969 '73.0.3631.0',
970 '72.0.3626.7',
971 '71.0.3578.86',
972 '72.0.3626.6',
973 '73.0.3630.1',
974 '73.0.3630.0',
975 '72.0.3626.5',
976 '71.0.3578.85',
977 '72.0.3626.4',
978 '73.0.3628.3',
979 '73.0.3628.2',
980 '73.0.3629.1',
981 '73.0.3629.0',
982 '72.0.3626.3',
983 '71.0.3578.84',
984 '73.0.3628.1',
985 '73.0.3628.0',
986 '71.0.3578.83',
987 '73.0.3627.1',
988 '73.0.3627.0',
989 '72.0.3626.2',
990 '71.0.3578.82',
991 '71.0.3578.81',
992 '71.0.3578.80',
993 '72.0.3626.1',
994 '72.0.3626.0',
995 '71.0.3578.79',
996 '70.0.3538.124',
997 '71.0.3578.78',
998 '72.0.3623.4',
999 '72.0.3625.2',
1000 '72.0.3625.1',
1001 '72.0.3625.0',
1002 '71.0.3578.77',
1003 '70.0.3538.123',
1004 '72.0.3624.4',
1005 '72.0.3624.3',
1006 '72.0.3624.2',
1007 '71.0.3578.76',
1008 '72.0.3624.1',
1009 '72.0.3624.0',
1010 '72.0.3623.3',
1011 '71.0.3578.75',
1012 '70.0.3538.122',
1013 '71.0.3578.74',
1014 '72.0.3623.2',
1015 '72.0.3610.3',
1016 '72.0.3623.1',
1017 '72.0.3623.0',
1018 '72.0.3622.3',
1019 '72.0.3622.2',
1020 '71.0.3578.73',
1021 '70.0.3538.121',
1022 '72.0.3622.1',
1023 '72.0.3622.0',
1024 '71.0.3578.72',
1025 '70.0.3538.120',
1026 '72.0.3621.1',
1027 '72.0.3621.0',
1028 '71.0.3578.71',
1029 '70.0.3538.119',
1030 '72.0.3620.1',
1031 '72.0.3620.0',
1032 '71.0.3578.70',
1033 '70.0.3538.118',
1034 '71.0.3578.69',
1035 '72.0.3619.1',
1036 '72.0.3619.0',
1037 '71.0.3578.68',
1038 '70.0.3538.117',
1039 '71.0.3578.67',
1040 '72.0.3618.1',
1041 '72.0.3618.0',
1042 '71.0.3578.66',
1043 '70.0.3538.116',
1044 '72.0.3617.1',
1045 '72.0.3617.0',
1046 '71.0.3578.65',
1047 '70.0.3538.115',
1048 '72.0.3602.3',
1049 '71.0.3578.64',
1050 '72.0.3616.1',
1051 '72.0.3616.0',
1052 '71.0.3578.63',
1053 '70.0.3538.114',
1054 '71.0.3578.62',
1055 '72.0.3615.1',
1056 '72.0.3615.0',
1057 '71.0.3578.61',
1058 '70.0.3538.113',
1059 '72.0.3614.1',
1060 '72.0.3614.0',
1061 '71.0.3578.60',
1062 '70.0.3538.112',
1063 '72.0.3613.1',
1064 '72.0.3613.0',
1065 '71.0.3578.59',
1066 '70.0.3538.111',
1067 '72.0.3612.2',
1068 '72.0.3612.1',
1069 '72.0.3612.0',
1070 '70.0.3538.110',
1071 '71.0.3578.58',
1072 '70.0.3538.109',
1073 '72.0.3611.2',
1074 '72.0.3611.1',
1075 '72.0.3611.0',
1076 '71.0.3578.57',
1077 '70.0.3538.108',
1078 '72.0.3610.2',
1079 '71.0.3578.56',
1080 '71.0.3578.55',
1081 '72.0.3610.1',
1082 '72.0.3610.0',
1083 '71.0.3578.54',
1084 '70.0.3538.107',
1085 '71.0.3578.53',
1086 '72.0.3609.3',
1087 '71.0.3578.52',
1088 '72.0.3609.2',
1089 '71.0.3578.51',
1090 '72.0.3608.5',
1091 '72.0.3609.1',
1092 '72.0.3609.0',
1093 '71.0.3578.50',
1094 '70.0.3538.106',
1095 '72.0.3608.4',
1096 '72.0.3608.3',
1097 '72.0.3608.2',
1098 '71.0.3578.49',
1099 '72.0.3608.1',
1100 '72.0.3608.0',
1101 '70.0.3538.105',
1102 '71.0.3578.48',
1103 '72.0.3607.1',
1104 '72.0.3607.0',
1105 '71.0.3578.47',
1106 '70.0.3538.104',
1107 '72.0.3606.2',
1108 '72.0.3606.1',
1109 '72.0.3606.0',
1110 '71.0.3578.46',
1111 '70.0.3538.103',
1112 '70.0.3538.102',
1113 '72.0.3605.3',
1114 '72.0.3605.2',
1115 '72.0.3605.1',
1116 '72.0.3605.0',
1117 '71.0.3578.45',
1118 '70.0.3538.101',
1119 '71.0.3578.44',
1120 '71.0.3578.43',
1121 '70.0.3538.100',
1122 '70.0.3538.99',
1123 '71.0.3578.42',
1124 '72.0.3604.1',
1125 '72.0.3604.0',
1126 '71.0.3578.41',
1127 '70.0.3538.98',
1128 '71.0.3578.40',
1129 '72.0.3603.2',
1130 '72.0.3603.1',
1131 '72.0.3603.0',
1132 '71.0.3578.39',
1133 '70.0.3538.97',
1134 '72.0.3602.2',
1135 '71.0.3578.38',
1136 '71.0.3578.37',
1137 '72.0.3602.1',
1138 '72.0.3602.0',
1139 '71.0.3578.36',
1140 '70.0.3538.96',
1141 '72.0.3601.1',
1142 '72.0.3601.0',
1143 '71.0.3578.35',
1144 '70.0.3538.95',
1145 '72.0.3600.1',
1146 '72.0.3600.0',
1147 '71.0.3578.34',
1148 '70.0.3538.94',
1149 '72.0.3599.3',
1150 '72.0.3599.2',
1151 '72.0.3599.1',
1152 '72.0.3599.0',
1153 '71.0.3578.33',
1154 '70.0.3538.93',
1155 '72.0.3598.1',
1156 '72.0.3598.0',
1157 '71.0.3578.32',
1158 '70.0.3538.87',
1159 '72.0.3597.1',
1160 '72.0.3597.0',
1161 '72.0.3596.2',
1162 '71.0.3578.31',
1163 '70.0.3538.86',
1164 '71.0.3578.30',
1165 '71.0.3578.29',
1166 '72.0.3596.1',
1167 '72.0.3596.0',
1168 '71.0.3578.28',
1169 '70.0.3538.85',
1170 '72.0.3595.2',
1171 '72.0.3591.3',
1172 '72.0.3595.1',
1173 '72.0.3595.0',
1174 '71.0.3578.27',
1175 '70.0.3538.84',
1176 '72.0.3594.1',
1177 '72.0.3594.0',
1178 '71.0.3578.26',
1179 '70.0.3538.83',
1180 '72.0.3593.2',
1181 '72.0.3593.1',
1182 '72.0.3593.0',
1183 '71.0.3578.25',
1184 '70.0.3538.82',
1185 '72.0.3589.3',
1186 '72.0.3592.2',
1187 '72.0.3592.1',
1188 '72.0.3592.0',
1189 '71.0.3578.24',
1190 '72.0.3589.2',
1191 '70.0.3538.81',
1192 '70.0.3538.80',
1193 '72.0.3591.2',
1194 '72.0.3591.1',
1195 '72.0.3591.0',
1196 '71.0.3578.23',
1197 '70.0.3538.79',
1198 '71.0.3578.22',
1199 '72.0.3590.1',
1200 '72.0.3590.0',
1201 '71.0.3578.21',
1202 '70.0.3538.78',
1203 '70.0.3538.77',
1204 '72.0.3589.1',
1205 '72.0.3589.0',
1206 '71.0.3578.20',
1207 '70.0.3538.76',
1208 '71.0.3578.19',
1209 '70.0.3538.75',
1210 '72.0.3588.1',
1211 '72.0.3588.0',
1212 '71.0.3578.18',
1213 '70.0.3538.74',
1214 '72.0.3586.2',
1215 '72.0.3587.0',
1216 '71.0.3578.17',
1217 '70.0.3538.73',
1218 '72.0.3586.1',
1219 '72.0.3586.0',
1220 '71.0.3578.16',
1221 '70.0.3538.72',
1222 '72.0.3585.1',
1223 '72.0.3585.0',
1224 '71.0.3578.15',
1225 '70.0.3538.71',
1226 '71.0.3578.14',
1227 '72.0.3584.1',
1228 '72.0.3584.0',
1229 '71.0.3578.13',
1230 '70.0.3538.70',
1231 '72.0.3583.2',
1232 '71.0.3578.12',
1233 '72.0.3583.1',
1234 '72.0.3583.0',
1235 '71.0.3578.11',
1236 '70.0.3538.69',
1237 '71.0.3578.10',
1238 '72.0.3582.0',
1239 '72.0.3581.4',
1240 '71.0.3578.9',
1241 '70.0.3538.67',
1242 '72.0.3581.3',
1243 '72.0.3581.2',
1244 '72.0.3581.1',
1245 '72.0.3581.0',
1246 '71.0.3578.8',
1247 '70.0.3538.66',
1248 '72.0.3580.1',
1249 '72.0.3580.0',
1250 '71.0.3578.7',
1251 '70.0.3538.65',
1252 '71.0.3578.6',
1253 '72.0.3579.1',
1254 '72.0.3579.0',
1255 '71.0.3578.5',
1256 '70.0.3538.64',
1257 '71.0.3578.4',
1258 '71.0.3578.3',
1259 '71.0.3578.2',
1260 '71.0.3578.1',
1261 '71.0.3578.0',
1262 '70.0.3538.63',
1263 '69.0.3497.128',
1264 '70.0.3538.62',
1265 '70.0.3538.61',
1266 '70.0.3538.60',
1267 '70.0.3538.59',
1268 '71.0.3577.1',
1269 '71.0.3577.0',
1270 '70.0.3538.58',
1271 '69.0.3497.127',
1272 '71.0.3576.2',
1273 '71.0.3576.1',
1274 '71.0.3576.0',
1275 '70.0.3538.57',
1276 '70.0.3538.56',
1277 '71.0.3575.2',
1278 '70.0.3538.55',
1279 '69.0.3497.126',
1280 '70.0.3538.54',
1281 '71.0.3575.1',
1282 '71.0.3575.0',
1283 '71.0.3574.1',
1284 '71.0.3574.0',
1285 '70.0.3538.53',
1286 '69.0.3497.125',
1287 '70.0.3538.52',
1288 '71.0.3573.1',
1289 '71.0.3573.0',
1290 '70.0.3538.51',
1291 '69.0.3497.124',
1292 '71.0.3572.1',
1293 '71.0.3572.0',
1294 '70.0.3538.50',
1295 '69.0.3497.123',
1296 '71.0.3571.2',
1297 '70.0.3538.49',
1298 '69.0.3497.122',
1299 '71.0.3571.1',
1300 '71.0.3571.0',
1301 '70.0.3538.48',
1302 '69.0.3497.121',
1303 '71.0.3570.1',
1304 '71.0.3570.0',
1305 '70.0.3538.47',
1306 '69.0.3497.120',
1307 '71.0.3568.2',
1308 '71.0.3569.1',
1309 '71.0.3569.0',
1310 '70.0.3538.46',
1311 '69.0.3497.119',
1312 '70.0.3538.45',
1313 '71.0.3568.1',
1314 '71.0.3568.0',
1315 '70.0.3538.44',
1316 '69.0.3497.118',
1317 '70.0.3538.43',
1318 '70.0.3538.42',
1319 '71.0.3567.1',
1320 '71.0.3567.0',
1321 '70.0.3538.41',
1322 '69.0.3497.117',
1323 '71.0.3566.1',
1324 '71.0.3566.0',
1325 '70.0.3538.40',
1326 '69.0.3497.116',
1327 '71.0.3565.1',
1328 '71.0.3565.0',
1329 '70.0.3538.39',
1330 '69.0.3497.115',
1331 '71.0.3564.1',
1332 '71.0.3564.0',
1333 '70.0.3538.38',
1334 '69.0.3497.114',
1335 '71.0.3563.0',
1336 '71.0.3562.2',
1337 '70.0.3538.37',
1338 '69.0.3497.113',
1339 '70.0.3538.36',
1340 '70.0.3538.35',
1341 '71.0.3562.1',
1342 '71.0.3562.0',
1343 '70.0.3538.34',
1344 '69.0.3497.112',
1345 '70.0.3538.33',
1346 '71.0.3561.1',
1347 '71.0.3561.0',
1348 '70.0.3538.32',
1349 '69.0.3497.111',
1350 '71.0.3559.6',
1351 '71.0.3560.1',
1352 '71.0.3560.0',
1353 '71.0.3559.5',
1354 '71.0.3559.4',
1355 '70.0.3538.31',
1356 '69.0.3497.110',
1357 '71.0.3559.3',
1358 '70.0.3538.30',
1359 '69.0.3497.109',
1360 '71.0.3559.2',
1361 '71.0.3559.1',
1362 '71.0.3559.0',
1363 '70.0.3538.29',
1364 '69.0.3497.108',
1365 '71.0.3558.2',
1366 '71.0.3558.1',
1367 '71.0.3558.0',
1368 '70.0.3538.28',
1369 '69.0.3497.107',
1370 '71.0.3557.2',
1371 '71.0.3557.1',
1372 '71.0.3557.0',
1373 '70.0.3538.27',
1374 '69.0.3497.106',
1375 '71.0.3554.4',
1376 '70.0.3538.26',
1377 '71.0.3556.1',
1378 '71.0.3556.0',
1379 '70.0.3538.25',
1380 '71.0.3554.3',
1381 '69.0.3497.105',
1382 '71.0.3554.2',
1383 '70.0.3538.24',
1384 '69.0.3497.104',
1385 '71.0.3555.2',
1386 '70.0.3538.23',
1387 '71.0.3555.1',
1388 '71.0.3555.0',
1389 '70.0.3538.22',
1390 '69.0.3497.103',
1391 '71.0.3554.1',
1392 '71.0.3554.0',
1393 '70.0.3538.21',
1394 '69.0.3497.102',
1395 '71.0.3553.3',
1396 '70.0.3538.20',
1397 '69.0.3497.101',
1398 '71.0.3553.2',
1399 '69.0.3497.100',
1400 '71.0.3553.1',
1401 '71.0.3553.0',
1402 '70.0.3538.19',
1403 '69.0.3497.99',
1404 '69.0.3497.98',
1405 '69.0.3497.97',
1406 '71.0.3552.6',
1407 '71.0.3552.5',
1408 '71.0.3552.4',
1409 '71.0.3552.3',
1410 '71.0.3552.2',
1411 '71.0.3552.1',
1412 '71.0.3552.0',
1413 '70.0.3538.18',
1414 '69.0.3497.96',
1415 '71.0.3551.3',
1416 '71.0.3551.2',
1417 '71.0.3551.1',
1418 '71.0.3551.0',
1419 '70.0.3538.17',
1420 '69.0.3497.95',
1421 '71.0.3550.3',
1422 '71.0.3550.2',
1423 '71.0.3550.1',
1424 '71.0.3550.0',
1425 '70.0.3538.16',
1426 '69.0.3497.94',
1427 '71.0.3549.1',
1428 '71.0.3549.0',
1429 '70.0.3538.15',
1430 '69.0.3497.93',
1431 '69.0.3497.92',
1432 '71.0.3548.1',
1433 '71.0.3548.0',
1434 '70.0.3538.14',
1435 '69.0.3497.91',
1436 '71.0.3547.1',
1437 '71.0.3547.0',
1438 '70.0.3538.13',
1439 '69.0.3497.90',
1440 '71.0.3546.2',
1441 '69.0.3497.89',
1442 '71.0.3546.1',
1443 '71.0.3546.0',
1444 '70.0.3538.12',
1445 '69.0.3497.88',
1446 '71.0.3545.4',
1447 '71.0.3545.3',
1448 '71.0.3545.2',
1449 '71.0.3545.1',
1450 '71.0.3545.0',
1451 '70.0.3538.11',
1452 '69.0.3497.87',
1453 '71.0.3544.5',
1454 '71.0.3544.4',
1455 '71.0.3544.3',
1456 '71.0.3544.2',
1457 '71.0.3544.1',
1458 '71.0.3544.0',
1459 '69.0.3497.86',
1460 '70.0.3538.10',
1461 '69.0.3497.85',
1462 '70.0.3538.9',
1463 '69.0.3497.84',
1464 '71.0.3543.4',
1465 '70.0.3538.8',
1466 '71.0.3543.3',
1467 '71.0.3543.2',
1468 '71.0.3543.1',
1469 '71.0.3543.0',
1470 '70.0.3538.7',
1471 '69.0.3497.83',
1472 '71.0.3542.2',
1473 '71.0.3542.1',
1474 '71.0.3542.0',
1475 '70.0.3538.6',
1476 '69.0.3497.82',
1477 '69.0.3497.81',
1478 '71.0.3541.1',
1479 '71.0.3541.0',
1480 '70.0.3538.5',
1481 '69.0.3497.80',
1482 '71.0.3540.1',
1483 '71.0.3540.0',
1484 '70.0.3538.4',
1485 '69.0.3497.79',
1486 '70.0.3538.3',
1487 '71.0.3539.1',
1488 '71.0.3539.0',
1489 '69.0.3497.78',
1490 '68.0.3440.134',
1491 '69.0.3497.77',
1492 '70.0.3538.2',
1493 '70.0.3538.1',
1494 '70.0.3538.0',
1495 '69.0.3497.76',
1496 '68.0.3440.133',
1497 '69.0.3497.75',
1498 '70.0.3537.2',
1499 '70.0.3537.1',
1500 '70.0.3537.0',
1501 '69.0.3497.74',
1502 '68.0.3440.132',
1503 '70.0.3536.0',
1504 '70.0.3535.5',
1505 '70.0.3535.4',
1506 '70.0.3535.3',
1507 '69.0.3497.73',
1508 '68.0.3440.131',
1509 '70.0.3532.8',
1510 '70.0.3532.7',
1511 '69.0.3497.72',
1512 '69.0.3497.71',
1513 '70.0.3535.2',
1514 '70.0.3535.1',
1515 '70.0.3535.0',
1516 '69.0.3497.70',
1517 '68.0.3440.130',
1518 '69.0.3497.69',
1519 '68.0.3440.129',
1520 '70.0.3534.4',
1521 '70.0.3534.3',
1522 '70.0.3534.2',
1523 '70.0.3534.1',
1524 '70.0.3534.0',
1525 '69.0.3497.68',
1526 '68.0.3440.128',
1527 '70.0.3533.2',
1528 '70.0.3533.1',
1529 '70.0.3533.0',
1530 '69.0.3497.67',
1531 '68.0.3440.127',
1532 '70.0.3532.6',
1533 '70.0.3532.5',
1534 '70.0.3532.4',
1535 '69.0.3497.66',
1536 '68.0.3440.126',
1537 '70.0.3532.3',
1538 '70.0.3532.2',
1539 '70.0.3532.1',
1540 '69.0.3497.60',
1541 '69.0.3497.65',
1542 '69.0.3497.64',
1543 '70.0.3532.0',
1544 '70.0.3531.0',
1545 '70.0.3530.4',
1546 '70.0.3530.3',
1547 '70.0.3530.2',
1548 '69.0.3497.58',
1549 '68.0.3440.125',
1550 '69.0.3497.57',
1551 '69.0.3497.56',
1552 '69.0.3497.55',
1553 '69.0.3497.54',
1554 '70.0.3530.1',
1555 '70.0.3530.0',
1556 '69.0.3497.53',
1557 '68.0.3440.124',
1558 '69.0.3497.52',
1559 '70.0.3529.3',
1560 '70.0.3529.2',
1561 '70.0.3529.1',
1562 '70.0.3529.0',
1563 '69.0.3497.51',
1564 '70.0.3528.4',
1565 '68.0.3440.123',
1566 '70.0.3528.3',
1567 '70.0.3528.2',
1568 '70.0.3528.1',
1569 '70.0.3528.0',
1570 '69.0.3497.50',
1571 '68.0.3440.122',
1572 '70.0.3527.1',
1573 '70.0.3527.0',
1574 '69.0.3497.49',
1575 '68.0.3440.121',
1576 '70.0.3526.1',
1577 '70.0.3526.0',
1578 '68.0.3440.120',
1579 '69.0.3497.48',
1580 '69.0.3497.47',
1581 '68.0.3440.119',
1582 '68.0.3440.118',
1583 '70.0.3525.5',
1584 '70.0.3525.4',
1585 '70.0.3525.3',
1586 '68.0.3440.117',
1587 '69.0.3497.46',
1588 '70.0.3525.2',
1589 '70.0.3525.1',
1590 '70.0.3525.0',
1591 '69.0.3497.45',
1592 '68.0.3440.116',
1593 '70.0.3524.4',
1594 '70.0.3524.3',
1595 '69.0.3497.44',
1596 '70.0.3524.2',
1597 '70.0.3524.1',
1598 '70.0.3524.0',
1599 '70.0.3523.2',
1600 '69.0.3497.43',
1601 '68.0.3440.115',
1602 '70.0.3505.9',
1603 '69.0.3497.42',
1604 '70.0.3505.8',
1605 '70.0.3523.1',
1606 '70.0.3523.0',
1607 '69.0.3497.41',
1608 '68.0.3440.114',
1609 '70.0.3505.7',
1610 '69.0.3497.40',
1611 '70.0.3522.1',
1612 '70.0.3522.0',
1613 '70.0.3521.2',
1614 '69.0.3497.39',
1615 '68.0.3440.113',
1616 '70.0.3505.6',
1617 '70.0.3521.1',
1618 '70.0.3521.0',
1619 '69.0.3497.38',
1620 '68.0.3440.112',
1621 '70.0.3520.1',
1622 '70.0.3520.0',
1623 '69.0.3497.37',
1624 '68.0.3440.111',
1625 '70.0.3519.3',
1626 '70.0.3519.2',
1627 '70.0.3519.1',
1628 '70.0.3519.0',
1629 '69.0.3497.36',
1630 '68.0.3440.110',
1631 '70.0.3518.1',
1632 '70.0.3518.0',
1633 '69.0.3497.35',
1634 '69.0.3497.34',
1635 '68.0.3440.109',
1636 '70.0.3517.1',
1637 '70.0.3517.0',
1638 '69.0.3497.33',
1639 '68.0.3440.108',
1640 '69.0.3497.32',
1641 '70.0.3516.3',
1642 '70.0.3516.2',
1643 '70.0.3516.1',
1644 '70.0.3516.0',
1645 '69.0.3497.31',
1646 '68.0.3440.107',
1647 '70.0.3515.4',
1648 '68.0.3440.106',
1649 '70.0.3515.3',
1650 '70.0.3515.2',
1651 '70.0.3515.1',
1652 '70.0.3515.0',
1653 '69.0.3497.30',
1654 '68.0.3440.105',
1655 '68.0.3440.104',
1656 '70.0.3514.2',
1657 '70.0.3514.1',
1658 '70.0.3514.0',
1659 '69.0.3497.29',
1660 '68.0.3440.103',
1661 '70.0.3513.1',
1662 '70.0.3513.0',
1663 '69.0.3497.28',
1664 )
1665 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1666
1667
1668 std_headers = {
1669 'User-Agent': random_user_agent(),
1670 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1671 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1672 'Accept-Encoding': 'gzip, deflate',
1673 'Accept-Language': 'en-us,en;q=0.5',
1674 }
1675
1676
1677 USER_AGENTS = {
1678 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1679 }
1680
1681
1682 NO_DEFAULT = object()
1683
1684 ENGLISH_MONTH_NAMES = [
1685 'January', 'February', 'March', 'April', 'May', 'June',
1686 'July', 'August', 'September', 'October', 'November', 'December']
1687
1688 MONTH_NAMES = {
1689 'en': ENGLISH_MONTH_NAMES,
1690 'fr': [
1691 'janvier', 'fƩvrier', 'mars', 'avril', 'mai', 'juin',
1692 'juillet', 'aoƻt', 'septembre', 'octobre', 'novembre', 'dƩcembre'],
1693 }
1694
1695 KNOWN_EXTENSIONS = (
1696 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1697 'flv', 'f4v', 'f4a', 'f4b',
1698 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1699 'mkv', 'mka', 'mk3d',
1700 'avi', 'divx',
1701 'mov',
1702 'asf', 'wmv', 'wma',
1703 '3gp', '3g2',
1704 'mp3',
1705 'flac',
1706 'ape',
1707 'wav',
1708 'f4f', 'f4m', 'm3u8', 'smil')
1709
1710 # needed for sanitizing filenames in restricted mode
1711 ACCENT_CHARS = dict(zip('Ć‚ĆƒĆ„Ć€ĆĆ…Ć†Ć‡ĆˆĆ‰ĆŠĆ‹ĆŒĆĆŽĆĆĆ‘Ć’Ć“Ć”Ć•Ć–ÅĆ˜Å’Ć™ĆšĆ›ĆœÅ°ĆĆžĆŸĆ Ć”Ć¢Ć£Ć¤Ć„Ć¦Ć§ĆØĆ©ĆŖƫƬƭƮĆÆĆ°Ć±Ć²Ć³Ć“ĆµĆ¶Å‘ĆøÅ“Ć¹ĆŗĆ»Ć¼Å±Ć½Ć¾Ćæ',
1712 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1713 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1714
1715 DATE_FORMATS = (
1716 '%d %B %Y',
1717 '%d %b %Y',
1718 '%B %d %Y',
1719 '%B %dst %Y',
1720 '%B %dnd %Y',
1721 '%B %dth %Y',
1722 '%b %d %Y',
1723 '%b %dst %Y',
1724 '%b %dnd %Y',
1725 '%b %dth %Y',
1726 '%b %dst %Y %I:%M',
1727 '%b %dnd %Y %I:%M',
1728 '%b %dth %Y %I:%M',
1729 '%Y %m %d',
1730 '%Y-%m-%d',
1731 '%Y/%m/%d',
1732 '%Y/%m/%d %H:%M',
1733 '%Y/%m/%d %H:%M:%S',
1734 '%Y-%m-%d %H:%M',
1735 '%Y-%m-%d %H:%M:%S',
1736 '%Y-%m-%d %H:%M:%S.%f',
1737 '%d.%m.%Y %H:%M',
1738 '%d.%m.%Y %H.%M',
1739 '%Y-%m-%dT%H:%M:%SZ',
1740 '%Y-%m-%dT%H:%M:%S.%fZ',
1741 '%Y-%m-%dT%H:%M:%S.%f0Z',
1742 '%Y-%m-%dT%H:%M:%S',
1743 '%Y-%m-%dT%H:%M:%S.%f',
1744 '%Y-%m-%dT%H:%M',
1745 '%b %d %Y at %H:%M',
1746 '%b %d %Y at %H:%M:%S',
1747 '%B %d %Y at %H:%M',
1748 '%B %d %Y at %H:%M:%S',
1749 )
1750
1751 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1752 DATE_FORMATS_DAY_FIRST.extend([
1753 '%d-%m-%Y',
1754 '%d.%m.%Y',
1755 '%d.%m.%y',
1756 '%d/%m/%Y',
1757 '%d/%m/%y',
1758 '%d/%m/%Y %H:%M:%S',
1759 ])
1760
1761 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1762 DATE_FORMATS_MONTH_FIRST.extend([
1763 '%m-%d-%Y',
1764 '%m.%d.%Y',
1765 '%m/%d/%Y',
1766 '%m/%d/%y',
1767 '%m/%d/%Y %H:%M:%S',
1768 ])
1769
1770 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1771 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1772
1773
1774 def preferredencoding():
1775 """Get preferred encoding.
1776
1777 Returns the best encoding scheme for the system, based on
1778 locale.getpreferredencoding() and some further tweaks.
1779 """
1780 try:
1781 pref = locale.getpreferredencoding()
1782 'TEST'.encode(pref)
1783 except Exception:
1784 pref = 'UTF-8'
1785
1786 return pref
1787
1788
1789 def write_json_file(obj, fn):
1790 """ Encode obj as JSON and write it to fn, atomically if possible """
1791
1792 fn = encodeFilename(fn)
1793 if sys.version_info < (3, 0) and sys.platform != 'win32':
1794 encoding = get_filesystem_encoding()
1795 # os.path.basename returns a bytes object, but NamedTemporaryFile
1796 # will fail if the filename contains non ascii characters unless we
1797 # use a unicode object
1798 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1799 # the same for os.path.dirname
1800 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1801 else:
1802 path_basename = os.path.basename
1803 path_dirname = os.path.dirname
1804
1805 args = {
1806 'suffix': '.tmp',
1807 'prefix': path_basename(fn) + '.',
1808 'dir': path_dirname(fn),
1809 'delete': False,
1810 }
1811
1812 # In Python 2.x, json.dump expects a bytestream.
1813 # In Python 3.x, it writes to a character stream
1814 if sys.version_info < (3, 0):
1815 args['mode'] = 'wb'
1816 else:
1817 args.update({
1818 'mode': 'w',
1819 'encoding': 'utf-8',
1820 })
1821
1822 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1823
1824 try:
1825 with tf:
1826 json.dump(obj, tf)
1827 if sys.platform == 'win32':
1828 # Need to remove existing file on Windows, else os.rename raises
1829 # WindowsError or FileExistsError.
1830 try:
1831 os.unlink(fn)
1832 except OSError:
1833 pass
1834 os.rename(tf.name, fn)
1835 except Exception:
1836 try:
1837 os.remove(tf.name)
1838 except OSError:
1839 pass
1840 raise
1841
1842
1843 if sys.version_info >= (2, 7):
1844 def find_xpath_attr(node, xpath, key, val=None):
1845 """ Find the xpath xpath[@key=val] """
1846 assert re.match(r'^[a-zA-Z_-]+$', key)
1847 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1848 return node.find(expr)
1849 else:
1850 def find_xpath_attr(node, xpath, key, val=None):
1851 for f in node.findall(compat_xpath(xpath)):
1852 if key not in f.attrib:
1853 continue
1854 if val is None or f.attrib.get(key) == val:
1855 return f
1856 return None
1857
1858 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1859 # the namespace parameter
1860
1861
1862 def xpath_with_ns(path, ns_map):
1863 components = [c.split(':') for c in path.split('/')]
1864 replaced = []
1865 for c in components:
1866 if len(c) == 1:
1867 replaced.append(c[0])
1868 else:
1869 ns, tag = c
1870 replaced.append('{%s}%s' % (ns_map[ns], tag))
1871 return '/'.join(replaced)
1872
1873
1874 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1875 def _find_xpath(xpath):
1876 return node.find(compat_xpath(xpath))
1877
1878 if isinstance(xpath, (str, compat_str)):
1879 n = _find_xpath(xpath)
1880 else:
1881 for xp in xpath:
1882 n = _find_xpath(xp)
1883 if n is not None:
1884 break
1885
1886 if n is None:
1887 if default is not NO_DEFAULT:
1888 return default
1889 elif fatal:
1890 name = xpath if name is None else name
1891 raise ExtractorError('Could not find XML element %s' % name)
1892 else:
1893 return None
1894 return n
1895
1896
1897 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1898 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1899 if n is None or n == default:
1900 return n
1901 if n.text is None:
1902 if default is not NO_DEFAULT:
1903 return default
1904 elif fatal:
1905 name = xpath if name is None else name
1906 raise ExtractorError('Could not find XML element\'s text %s' % name)
1907 else:
1908 return None
1909 return n.text
1910
1911
1912 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1913 n = find_xpath_attr(node, xpath, key)
1914 if n is None:
1915 if default is not NO_DEFAULT:
1916 return default
1917 elif fatal:
1918 name = '%s[@%s]' % (xpath, key) if name is None else name
1919 raise ExtractorError('Could not find XML attribute %s' % name)
1920 else:
1921 return None
1922 return n.attrib[key]
1923
1924
1925 def get_element_by_id(id, html):
1926 """Return the content of the tag with the specified ID in the passed HTML document"""
1927 return get_element_by_attribute('id', id, html)
1928
1929
1930 def get_element_by_class(class_name, html):
1931 """Return the content of the first tag with the specified class in the passed HTML document"""
1932 retval = get_elements_by_class(class_name, html)
1933 return retval[0] if retval else None
1934
1935
1936 def get_element_by_attribute(attribute, value, html, escape_value=True):
1937 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1938 return retval[0] if retval else None
1939
1940
1941 def get_elements_by_class(class_name, html):
1942 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1943 return get_elements_by_attribute(
1944 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1945 html, escape_value=False)
1946
1947
1948 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1949 """Return the content of the tag with the specified attribute in the passed HTML document"""
1950
1951 value = re.escape(value) if escape_value else value
1952
1953 retlist = []
1954 for m in re.finditer(r'''(?xs)
1955 <([a-zA-Z0-9:._-]+)
1956 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1957 \s+%s=['"]?%s['"]?
1958 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1959 \s*>
1960 (?P<content>.*?)
1961 </\1>
1962 ''' % (re.escape(attribute), value), html):
1963 res = m.group('content')
1964
1965 if res.startswith('"') or res.startswith("'"):
1966 res = res[1:-1]
1967
1968 retlist.append(unescapeHTML(res))
1969
1970 return retlist
1971
1972
1973 class HTMLAttributeParser(compat_HTMLParser):
1974 """Trivial HTML parser to gather the attributes for a single element"""
1975 def __init__(self):
1976 self.attrs = {}
1977 compat_HTMLParser.__init__(self)
1978
1979 def handle_starttag(self, tag, attrs):
1980 self.attrs = dict(attrs)
1981
1982
1983 def extract_attributes(html_element):
1984 """Given a string for an HTML element such as
1985 <el
1986 a="foo" B="bar" c="&98;az" d=boz
1987 empty= noval entity="&amp;"
1988 sq='"' dq="'"
1989 >
1990 Decode and return a dictionary of attributes.
1991 {
1992 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
1993 'empty': '', 'noval': None, 'entity': '&',
1994 'sq': '"', 'dq': '\''
1995 }.
1996 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
1997 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
1998 """
1999 parser = HTMLAttributeParser()
2000 try:
2001 parser.feed(html_element)
2002 parser.close()
2003 # Older Python may throw HTMLParseError in case of malformed HTML
2004 except compat_HTMLParseError:
2005 pass
2006 return parser.attrs
2007
2008
2009 def clean_html(html):
2010 """Clean an HTML snippet into a readable string"""
2011
2012 if html is None: # Convenience for sanitizing descriptions etc.
2013 return html
2014
2015 # Newline vs <br />
2016 html = html.replace('\n', ' ')
2017 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2018 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2019 # Strip html tags
2020 html = re.sub('<.*?>', '', html)
2021 # Replace html entities
2022 html = unescapeHTML(html)
2023 return html.strip()
2024
2025
2026 def sanitize_open(filename, open_mode):
2027 """Try to open the given filename, and slightly tweak it if this fails.
2028
2029 Attempts to open the given filename. If this fails, it tries to change
2030 the filename slightly, step by step, until it's either able to open it
2031 or it fails and raises a final exception, like the standard open()
2032 function.
2033
2034 It returns the tuple (stream, definitive_file_name).
2035 """
2036 try:
2037 if filename == '-':
2038 if sys.platform == 'win32':
2039 import msvcrt
2040 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2041 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2042 stream = open(encodeFilename(filename), open_mode)
2043 return (stream, filename)
2044 except (IOError, OSError) as err:
2045 if err.errno in (errno.EACCES,):
2046 raise
2047
2048 # In case of error, try to remove win32 forbidden chars
2049 alt_filename = sanitize_path(filename)
2050 if alt_filename == filename:
2051 raise
2052 else:
2053 # An exception here should be caught in the caller
2054 stream = open(encodeFilename(alt_filename), open_mode)
2055 return (stream, alt_filename)
2056
2057
2058 def timeconvert(timestr):
2059 """Convert RFC 2822 defined time string into system timestamp"""
2060 timestamp = None
2061 timetuple = email.utils.parsedate_tz(timestr)
2062 if timetuple is not None:
2063 timestamp = email.utils.mktime_tz(timetuple)
2064 return timestamp
2065
2066
2067 def sanitize_filename(s, restricted=False, is_id=False):
2068 """Sanitizes a string so it could be used as part of a filename.
2069 If restricted is set, use a stricter subset of allowed characters.
2070 Set is_id if this is not an arbitrary string, but an ID that should be kept
2071 if possible.
2072 """
2073 def replace_insane(char):
2074 if restricted and char in ACCENT_CHARS:
2075 return ACCENT_CHARS[char]
2076 if char == '?' or ord(char) < 32 or ord(char) == 127:
2077 return ''
2078 elif char == '"':
2079 return '' if restricted else '\''
2080 elif char == ':':
2081 return '_-' if restricted else ' -'
2082 elif char in '\\/|*<>':
2083 return '_'
2084 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2085 return '_'
2086 if restricted and ord(char) > 127:
2087 return '_'
2088 return char
2089
2090 # Handle timestamps
2091 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2092 result = ''.join(map(replace_insane, s))
2093 if not is_id:
2094 while '__' in result:
2095 result = result.replace('__', '_')
2096 result = result.strip('_')
2097 # Common case of "Foreign band name - English song title"
2098 if restricted and result.startswith('-_'):
2099 result = result[2:]
2100 if result.startswith('-'):
2101 result = '_' + result[len('-'):]
2102 result = result.lstrip('.')
2103 if not result:
2104 result = '_'
2105 return result
2106
2107
2108 def sanitize_path(s):
2109 """Sanitizes and normalizes path on Windows"""
2110 if sys.platform != 'win32':
2111 return s
2112 drive_or_unc, _ = os.path.splitdrive(s)
2113 if sys.version_info < (2, 7) and not drive_or_unc:
2114 drive_or_unc, _ = os.path.splitunc(s)
2115 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2116 if drive_or_unc:
2117 norm_path.pop(0)
2118 sanitized_path = [
2119 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2120 for path_part in norm_path]
2121 if drive_or_unc:
2122 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2123 return os.path.join(*sanitized_path)
2124
2125
2126 def sanitize_url(url):
2127 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2128 # the number of unwanted failures due to missing protocol
2129 if url.startswith('//'):
2130 return 'http:%s' % url
2131 # Fix some common typos seen so far
2132 COMMON_TYPOS = (
2133 # https://github.com/ytdl-org/youtube-dl/issues/15649
2134 (r'^httpss://', r'https://'),
2135 # https://bx1.be/lives/direct-tv/
2136 (r'^rmtp([es]?)://', r'rtmp\1://'),
2137 )
2138 for mistake, fixup in COMMON_TYPOS:
2139 if re.match(mistake, url):
2140 return re.sub(mistake, fixup, url)
2141 return url
2142
2143
2144 def sanitized_Request(url, *args, **kwargs):
2145 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2146
2147
2148 def expand_path(s):
2149 """Expand shell variables and ~"""
2150 return os.path.expandvars(compat_expanduser(s))
2151
2152
2153 def orderedSet(iterable):
2154 """ Remove all duplicates from the input iterable """
2155 res = []
2156 for el in iterable:
2157 if el not in res:
2158 res.append(el)
2159 return res
2160
2161
2162 def _htmlentity_transform(entity_with_semicolon):
2163 """Transforms an HTML entity to a character."""
2164 entity = entity_with_semicolon[:-1]
2165
2166 # Known non-numeric HTML entity
2167 if entity in compat_html_entities.name2codepoint:
2168 return compat_chr(compat_html_entities.name2codepoint[entity])
2169
2170 # TODO: HTML5 allows entities without a semicolon. For example,
2171 # '&Eacuteric' should be decoded as 'Ɖric'.
2172 if entity_with_semicolon in compat_html_entities_html5:
2173 return compat_html_entities_html5[entity_with_semicolon]
2174
2175 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2176 if mobj is not None:
2177 numstr = mobj.group(1)
2178 if numstr.startswith('x'):
2179 base = 16
2180 numstr = '0%s' % numstr
2181 else:
2182 base = 10
2183 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2184 try:
2185 return compat_chr(int(numstr, base))
2186 except ValueError:
2187 pass
2188
2189 # Unknown entity in name, return its literal representation
2190 return '&%s;' % entity
2191
2192
2193 def unescapeHTML(s):
2194 if s is None:
2195 return None
2196 assert type(s) == compat_str
2197
2198 return re.sub(
2199 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2200
2201
2202 def get_subprocess_encoding():
2203 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2204 # For subprocess calls, encode with locale encoding
2205 # Refer to http://stackoverflow.com/a/9951851/35070
2206 encoding = preferredencoding()
2207 else:
2208 encoding = sys.getfilesystemencoding()
2209 if encoding is None:
2210 encoding = 'utf-8'
2211 return encoding
2212
2213
2214 def encodeFilename(s, for_subprocess=False):
2215 """
2216 @param s The name of the file
2217 """
2218
2219 assert type(s) == compat_str
2220
2221 # Python 3 has a Unicode API
2222 if sys.version_info >= (3, 0):
2223 return s
2224
2225 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2226 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2227 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2228 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2229 return s
2230
2231 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2232 if sys.platform.startswith('java'):
2233 return s
2234
2235 return s.encode(get_subprocess_encoding(), 'ignore')
2236
2237
2238 def decodeFilename(b, for_subprocess=False):
2239
2240 if sys.version_info >= (3, 0):
2241 return b
2242
2243 if not isinstance(b, bytes):
2244 return b
2245
2246 return b.decode(get_subprocess_encoding(), 'ignore')
2247
2248
2249 def encodeArgument(s):
2250 if not isinstance(s, compat_str):
2251 # Legacy code that uses byte strings
2252 # Uncomment the following line after fixing all post processors
2253 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2254 s = s.decode('ascii')
2255 return encodeFilename(s, True)
2256
2257
2258 def decodeArgument(b):
2259 return decodeFilename(b, True)
2260
2261
2262 def decodeOption(optval):
2263 if optval is None:
2264 return optval
2265 if isinstance(optval, bytes):
2266 optval = optval.decode(preferredencoding())
2267
2268 assert isinstance(optval, compat_str)
2269 return optval
2270
2271
2272 def formatSeconds(secs):
2273 if secs > 3600:
2274 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2275 elif secs > 60:
2276 return '%d:%02d' % (secs // 60, secs % 60)
2277 else:
2278 return '%d' % secs
2279
2280
2281 def make_HTTPS_handler(params, **kwargs):
2282 opts_no_check_certificate = params.get('nocheckcertificate', False)
2283 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2284 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2285 if opts_no_check_certificate:
2286 context.check_hostname = False
2287 context.verify_mode = ssl.CERT_NONE
2288 try:
2289 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2290 except TypeError:
2291 # Python 2.7.8
2292 # (create_default_context present but HTTPSHandler has no context=)
2293 pass
2294
2295 if sys.version_info < (3, 2):
2296 return YoutubeDLHTTPSHandler(params, **kwargs)
2297 else: # Python < 3.4
2298 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2299 context.verify_mode = (ssl.CERT_NONE
2300 if opts_no_check_certificate
2301 else ssl.CERT_REQUIRED)
2302 context.set_default_verify_paths()
2303 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2304
2305
2306 def bug_reports_message():
2307 if ytdl_is_updateable():
2308 update_cmd = 'type youtube-dl -U to update'
2309 else:
2310 update_cmd = 'see https://yt-dl.org/update on how to update'
2311 msg = '; please report this issue on https://yt-dl.org/bug .'
2312 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2313 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2314 return msg
2315
2316
2317 class YoutubeDLError(Exception):
2318 """Base exception for YoutubeDL errors."""
2319 pass
2320
2321
2322 class ExtractorError(YoutubeDLError):
2323 """Error during info extraction."""
2324
2325 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2326 """ tb, if given, is the original traceback (so that it can be printed out).
2327 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2328 """
2329
2330 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2331 expected = True
2332 if video_id is not None:
2333 msg = video_id + ': ' + msg
2334 if cause:
2335 msg += ' (caused by %r)' % cause
2336 if not expected:
2337 msg += bug_reports_message()
2338 super(ExtractorError, self).__init__(msg)
2339
2340 self.traceback = tb
2341 self.exc_info = sys.exc_info() # preserve original exception
2342 self.cause = cause
2343 self.video_id = video_id
2344
2345 def format_traceback(self):
2346 if self.traceback is None:
2347 return None
2348 return ''.join(traceback.format_tb(self.traceback))
2349
2350
2351 class UnsupportedError(ExtractorError):
2352 def __init__(self, url):
2353 super(UnsupportedError, self).__init__(
2354 'Unsupported URL: %s' % url, expected=True)
2355 self.url = url
2356
2357
2358 class RegexNotFoundError(ExtractorError):
2359 """Error when a regex didn't match"""
2360 pass
2361
2362
2363 class GeoRestrictedError(ExtractorError):
2364 """Geographic restriction Error exception.
2365
2366 This exception may be thrown when a video is not available from your
2367 geographic location due to geographic restrictions imposed by a website.
2368 """
2369 def __init__(self, msg, countries=None):
2370 super(GeoRestrictedError, self).__init__(msg, expected=True)
2371 self.msg = msg
2372 self.countries = countries
2373
2374
2375 class DownloadError(YoutubeDLError):
2376 """Download Error exception.
2377
2378 This exception may be thrown by FileDownloader objects if they are not
2379 configured to continue on errors. They will contain the appropriate
2380 error message.
2381 """
2382
2383 def __init__(self, msg, exc_info=None):
2384 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2385 super(DownloadError, self).__init__(msg)
2386 self.exc_info = exc_info
2387
2388
2389 class SameFileError(YoutubeDLError):
2390 """Same File exception.
2391
2392 This exception will be thrown by FileDownloader objects if they detect
2393 multiple files would have to be downloaded to the same file on disk.
2394 """
2395 pass
2396
2397
2398 class PostProcessingError(YoutubeDLError):
2399 """Post Processing exception.
2400
2401 This exception may be raised by PostProcessor's .run() method to
2402 indicate an error in the postprocessing task.
2403 """
2404
2405 def __init__(self, msg):
2406 super(PostProcessingError, self).__init__(msg)
2407 self.msg = msg
2408
2409
2410 class MaxDownloadsReached(YoutubeDLError):
2411 """ --max-downloads limit has been reached. """
2412 pass
2413
2414
2415 class UnavailableVideoError(YoutubeDLError):
2416 """Unavailable Format exception.
2417
2418 This exception will be thrown when a video is requested
2419 in a format that is not available for that video.
2420 """
2421 pass
2422
2423
2424 class ContentTooShortError(YoutubeDLError):
2425 """Content Too Short exception.
2426
2427 This exception may be raised by FileDownloader objects when a file they
2428 download is too small for what the server announced first, indicating
2429 the connection was probably interrupted.
2430 """
2431
2432 def __init__(self, downloaded, expected):
2433 super(ContentTooShortError, self).__init__(
2434 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2435 )
2436 # Both in bytes
2437 self.downloaded = downloaded
2438 self.expected = expected
2439
2440
2441 class XAttrMetadataError(YoutubeDLError):
2442 def __init__(self, code=None, msg='Unknown error'):
2443 super(XAttrMetadataError, self).__init__(msg)
2444 self.code = code
2445 self.msg = msg
2446
2447 # Parsing code and msg
2448 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2449 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2450 self.reason = 'NO_SPACE'
2451 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2452 self.reason = 'VALUE_TOO_LONG'
2453 else:
2454 self.reason = 'NOT_SUPPORTED'
2455
2456
2457 class XAttrUnavailableError(YoutubeDLError):
2458 pass
2459
2460
2461 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2462 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2463 # expected HTTP responses to meet HTTP/1.0 or later (see also
2464 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2465 if sys.version_info < (3, 0):
2466 kwargs['strict'] = True
2467 hc = http_class(*args, **compat_kwargs(kwargs))
2468 source_address = ydl_handler._params.get('source_address')
2469
2470 if source_address is not None:
2471 # This is to workaround _create_connection() from socket where it will try all
2472 # address data from getaddrinfo() including IPv6. This filters the result from
2473 # getaddrinfo() based on the source_address value.
2474 # This is based on the cpython socket.create_connection() function.
2475 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2476 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2477 host, port = address
2478 err = None
2479 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2480 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2481 ip_addrs = [addr for addr in addrs if addr[0] == af]
2482 if addrs and not ip_addrs:
2483 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2484 raise socket.error(
2485 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2486 % (ip_version, source_address[0]))
2487 for res in ip_addrs:
2488 af, socktype, proto, canonname, sa = res
2489 sock = None
2490 try:
2491 sock = socket.socket(af, socktype, proto)
2492 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2493 sock.settimeout(timeout)
2494 sock.bind(source_address)
2495 sock.connect(sa)
2496 err = None # Explicitly break reference cycle
2497 return sock
2498 except socket.error as _:
2499 err = _
2500 if sock is not None:
2501 sock.close()
2502 if err is not None:
2503 raise err
2504 else:
2505 raise socket.error('getaddrinfo returns an empty list')
2506 if hasattr(hc, '_create_connection'):
2507 hc._create_connection = _create_connection
2508 sa = (source_address, 0)
2509 if hasattr(hc, 'source_address'): # Python 2.7+
2510 hc.source_address = sa
2511 else: # Python 2.6
2512 def _hc_connect(self, *args, **kwargs):
2513 sock = _create_connection(
2514 (self.host, self.port), self.timeout, sa)
2515 if is_https:
2516 self.sock = ssl.wrap_socket(
2517 sock, self.key_file, self.cert_file,
2518 ssl_version=ssl.PROTOCOL_TLSv1)
2519 else:
2520 self.sock = sock
2521 hc.connect = functools.partial(_hc_connect, hc)
2522
2523 return hc
2524
2525
2526 def handle_youtubedl_headers(headers):
2527 filtered_headers = headers
2528
2529 if 'Youtubedl-no-compression' in filtered_headers:
2530 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2531 del filtered_headers['Youtubedl-no-compression']
2532
2533 return filtered_headers
2534
2535
2536 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2537 """Handler for HTTP requests and responses.
2538
2539 This class, when installed with an OpenerDirector, automatically adds
2540 the standard headers to every HTTP request and handles gzipped and
2541 deflated responses from web servers. If compression is to be avoided in
2542 a particular request, the original request in the program code only has
2543 to include the HTTP header "Youtubedl-no-compression", which will be
2544 removed before making the real request.
2545
2546 Part of this code was copied from:
2547
2548 http://techknack.net/python-urllib2-handlers/
2549
2550 Andrew Rowls, the author of that code, agreed to release it to the
2551 public domain.
2552 """
2553
2554 def __init__(self, params, *args, **kwargs):
2555 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2556 self._params = params
2557
2558 def http_open(self, req):
2559 conn_class = compat_http_client.HTTPConnection
2560
2561 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2562 if socks_proxy:
2563 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2564 del req.headers['Ytdl-socks-proxy']
2565
2566 return self.do_open(functools.partial(
2567 _create_http_connection, self, conn_class, False),
2568 req)
2569
2570 @staticmethod
2571 def deflate(data):
2572 try:
2573 return zlib.decompress(data, -zlib.MAX_WBITS)
2574 except zlib.error:
2575 return zlib.decompress(data)
2576
2577 def http_request(self, req):
2578 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2579 # always respected by websites, some tend to give out URLs with non percent-encoded
2580 # non-ASCII characters (see telemb.py, ard.py [#3412])
2581 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2582 # To work around aforementioned issue we will replace request's original URL with
2583 # percent-encoded one
2584 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2585 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2586 url = req.get_full_url()
2587 url_escaped = escape_url(url)
2588
2589 # Substitute URL if any change after escaping
2590 if url != url_escaped:
2591 req = update_Request(req, url=url_escaped)
2592
2593 for h, v in std_headers.items():
2594 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2595 # The dict keys are capitalized because of this bug by urllib
2596 if h.capitalize() not in req.headers:
2597 req.add_header(h, v)
2598
2599 req.headers = handle_youtubedl_headers(req.headers)
2600
2601 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2602 # Python 2.6 is brain-dead when it comes to fragments
2603 req._Request__original = req._Request__original.partition('#')[0]
2604 req._Request__r_type = req._Request__r_type.partition('#')[0]
2605
2606 return req
2607
2608 def http_response(self, req, resp):
2609 old_resp = resp
2610 # gzip
2611 if resp.headers.get('Content-encoding', '') == 'gzip':
2612 content = resp.read()
2613 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2614 try:
2615 uncompressed = io.BytesIO(gz.read())
2616 except IOError as original_ioerror:
2617 # There may be junk add the end of the file
2618 # See http://stackoverflow.com/q/4928560/35070 for details
2619 for i in range(1, 1024):
2620 try:
2621 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2622 uncompressed = io.BytesIO(gz.read())
2623 except IOError:
2624 continue
2625 break
2626 else:
2627 raise original_ioerror
2628 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2629 resp.msg = old_resp.msg
2630 del resp.headers['Content-encoding']
2631 # deflate
2632 if resp.headers.get('Content-encoding', '') == 'deflate':
2633 gz = io.BytesIO(self.deflate(resp.read()))
2634 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2635 resp.msg = old_resp.msg
2636 del resp.headers['Content-encoding']
2637 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2638 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2639 if 300 <= resp.code < 400:
2640 location = resp.headers.get('Location')
2641 if location:
2642 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2643 if sys.version_info >= (3, 0):
2644 location = location.encode('iso-8859-1').decode('utf-8')
2645 else:
2646 location = location.decode('utf-8')
2647 location_escaped = escape_url(location)
2648 if location != location_escaped:
2649 del resp.headers['Location']
2650 if sys.version_info < (3, 0):
2651 location_escaped = location_escaped.encode('utf-8')
2652 resp.headers['Location'] = location_escaped
2653 return resp
2654
2655 https_request = http_request
2656 https_response = http_response
2657
2658
2659 def make_socks_conn_class(base_class, socks_proxy):
2660 assert issubclass(base_class, (
2661 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2662
2663 url_components = compat_urlparse.urlparse(socks_proxy)
2664 if url_components.scheme.lower() == 'socks5':
2665 socks_type = ProxyType.SOCKS5
2666 elif url_components.scheme.lower() in ('socks', 'socks4'):
2667 socks_type = ProxyType.SOCKS4
2668 elif url_components.scheme.lower() == 'socks4a':
2669 socks_type = ProxyType.SOCKS4A
2670
2671 def unquote_if_non_empty(s):
2672 if not s:
2673 return s
2674 return compat_urllib_parse_unquote_plus(s)
2675
2676 proxy_args = (
2677 socks_type,
2678 url_components.hostname, url_components.port or 1080,
2679 True, # Remote DNS
2680 unquote_if_non_empty(url_components.username),
2681 unquote_if_non_empty(url_components.password),
2682 )
2683
2684 class SocksConnection(base_class):
2685 def connect(self):
2686 self.sock = sockssocket()
2687 self.sock.setproxy(*proxy_args)
2688 if type(self.timeout) in (int, float):
2689 self.sock.settimeout(self.timeout)
2690 self.sock.connect((self.host, self.port))
2691
2692 if isinstance(self, compat_http_client.HTTPSConnection):
2693 if hasattr(self, '_context'): # Python > 2.6
2694 self.sock = self._context.wrap_socket(
2695 self.sock, server_hostname=self.host)
2696 else:
2697 self.sock = ssl.wrap_socket(self.sock)
2698
2699 return SocksConnection
2700
2701
2702 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2703 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2704 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2705 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2706 self._params = params
2707
2708 def https_open(self, req):
2709 kwargs = {}
2710 conn_class = self._https_conn_class
2711
2712 if hasattr(self, '_context'): # python > 2.6
2713 kwargs['context'] = self._context
2714 if hasattr(self, '_check_hostname'): # python 3.x
2715 kwargs['check_hostname'] = self._check_hostname
2716
2717 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2718 if socks_proxy:
2719 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2720 del req.headers['Ytdl-socks-proxy']
2721
2722 return self.do_open(functools.partial(
2723 _create_http_connection, self, conn_class, True),
2724 req, **kwargs)
2725
2726
2727 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2728 _HTTPONLY_PREFIX = '#HttpOnly_'
2729
2730 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2731 # Store session cookies with `expires` set to 0 instead of an empty
2732 # string
2733 for cookie in self:
2734 if cookie.expires is None:
2735 cookie.expires = 0
2736 compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
2737
2738 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2739 """Load cookies from a file."""
2740 if filename is None:
2741 if self.filename is not None:
2742 filename = self.filename
2743 else:
2744 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2745
2746 cf = io.StringIO()
2747 with open(filename) as f:
2748 for line in f:
2749 if line.startswith(self._HTTPONLY_PREFIX):
2750 line = line[len(self._HTTPONLY_PREFIX):]
2751 cf.write(compat_str(line))
2752 cf.seek(0)
2753 self._really_load(cf, filename, ignore_discard, ignore_expires)
2754 # Session cookies are denoted by either `expires` field set to
2755 # an empty string or 0. MozillaCookieJar only recognizes the former
2756 # (see [1]). So we need force the latter to be recognized as session
2757 # cookies on our own.
2758 # Session cookies may be important for cookies-based authentication,
2759 # e.g. usually, when user does not check 'Remember me' check box while
2760 # logging in on a site, some important cookies are stored as session
2761 # cookies so that not recognizing them will result in failed login.
2762 # 1. https://bugs.python.org/issue17164
2763 for cookie in self:
2764 # Treat `expires=0` cookies as session cookies
2765 if cookie.expires == 0:
2766 cookie.expires = None
2767 cookie.discard = True
2768
2769
2770 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2771 def __init__(self, cookiejar=None):
2772 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2773
2774 def http_response(self, request, response):
2775 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2776 # characters in Set-Cookie HTTP header of last response (see
2777 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2778 # In order to at least prevent crashing we will percent encode Set-Cookie
2779 # header before HTTPCookieProcessor starts processing it.
2780 # if sys.version_info < (3, 0) and response.headers:
2781 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2782 # set_cookie = response.headers.get(set_cookie_header)
2783 # if set_cookie:
2784 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2785 # if set_cookie != set_cookie_escaped:
2786 # del response.headers[set_cookie_header]
2787 # response.headers[set_cookie_header] = set_cookie_escaped
2788 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2789
2790 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2791 https_response = http_response
2792
2793
2794 def extract_timezone(date_str):
2795 m = re.search(
2796 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2797 date_str)
2798 if not m:
2799 timezone = datetime.timedelta()
2800 else:
2801 date_str = date_str[:-len(m.group('tz'))]
2802 if not m.group('sign'):
2803 timezone = datetime.timedelta()
2804 else:
2805 sign = 1 if m.group('sign') == '+' else -1
2806 timezone = datetime.timedelta(
2807 hours=sign * int(m.group('hours')),
2808 minutes=sign * int(m.group('minutes')))
2809 return timezone, date_str
2810
2811
2812 def parse_iso8601(date_str, delimiter='T', timezone=None):
2813 """ Return a UNIX timestamp from the given date """
2814
2815 if date_str is None:
2816 return None
2817
2818 date_str = re.sub(r'\.[0-9]+', '', date_str)
2819
2820 if timezone is None:
2821 timezone, date_str = extract_timezone(date_str)
2822
2823 try:
2824 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2825 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2826 return calendar.timegm(dt.timetuple())
2827 except ValueError:
2828 pass
2829
2830
2831 def date_formats(day_first=True):
2832 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2833
2834
2835 def unified_strdate(date_str, day_first=True):
2836 """Return a string with the date in the format YYYYMMDD"""
2837
2838 if date_str is None:
2839 return None
2840 upload_date = None
2841 # Replace commas
2842 date_str = date_str.replace(',', ' ')
2843 # Remove AM/PM + timezone
2844 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2845 _, date_str = extract_timezone(date_str)
2846
2847 for expression in date_formats(day_first):
2848 try:
2849 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2850 except ValueError:
2851 pass
2852 if upload_date is None:
2853 timetuple = email.utils.parsedate_tz(date_str)
2854 if timetuple:
2855 try:
2856 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2857 except ValueError:
2858 pass
2859 if upload_date is not None:
2860 return compat_str(upload_date)
2861
2862
2863 def unified_timestamp(date_str, day_first=True):
2864 if date_str is None:
2865 return None
2866
2867 date_str = re.sub(r'[,|]', '', date_str)
2868
2869 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2870 timezone, date_str = extract_timezone(date_str)
2871
2872 # Remove AM/PM + timezone
2873 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2874
2875 # Remove unrecognized timezones from ISO 8601 alike timestamps
2876 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2877 if m:
2878 date_str = date_str[:-len(m.group('tz'))]
2879
2880 # Python only supports microseconds, so remove nanoseconds
2881 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2882 if m:
2883 date_str = m.group(1)
2884
2885 for expression in date_formats(day_first):
2886 try:
2887 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2888 return calendar.timegm(dt.timetuple())
2889 except ValueError:
2890 pass
2891 timetuple = email.utils.parsedate_tz(date_str)
2892 if timetuple:
2893 return calendar.timegm(timetuple) + pm_delta * 3600
2894
2895
2896 def determine_ext(url, default_ext='unknown_video'):
2897 if url is None or '.' not in url:
2898 return default_ext
2899 guess = url.partition('?')[0].rpartition('.')[2]
2900 if re.match(r'^[A-Za-z0-9]+$', guess):
2901 return guess
2902 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2903 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
2904 return guess.rstrip('/')
2905 else:
2906 return default_ext
2907
2908
2909 def subtitles_filename(filename, sub_lang, sub_format):
2910 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
2911
2912
2913 def date_from_str(date_str):
2914 """
2915 Return a datetime object from a string in the format YYYYMMDD or
2916 (now|today)[+-][0-9](day|week|month|year)(s)?"""
2917 today = datetime.date.today()
2918 if date_str in ('now', 'today'):
2919 return today
2920 if date_str == 'yesterday':
2921 return today - datetime.timedelta(days=1)
2922 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
2923 if match is not None:
2924 sign = match.group('sign')
2925 time = int(match.group('time'))
2926 if sign == '-':
2927 time = -time
2928 unit = match.group('unit')
2929 # A bad approximation?
2930 if unit == 'month':
2931 unit = 'day'
2932 time *= 30
2933 elif unit == 'year':
2934 unit = 'day'
2935 time *= 365
2936 unit += 's'
2937 delta = datetime.timedelta(**{unit: time})
2938 return today + delta
2939 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
2940
2941
2942 def hyphenate_date(date_str):
2943 """
2944 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
2945 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
2946 if match is not None:
2947 return '-'.join(match.groups())
2948 else:
2949 return date_str
2950
2951
2952 class DateRange(object):
2953 """Represents a time interval between two dates"""
2954
2955 def __init__(self, start=None, end=None):
2956 """start and end must be strings in the format accepted by date"""
2957 if start is not None:
2958 self.start = date_from_str(start)
2959 else:
2960 self.start = datetime.datetime.min.date()
2961 if end is not None:
2962 self.end = date_from_str(end)
2963 else:
2964 self.end = datetime.datetime.max.date()
2965 if self.start > self.end:
2966 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
2967
2968 @classmethod
2969 def day(cls, day):
2970 """Returns a range that only contains the given day"""
2971 return cls(day, day)
2972
2973 def __contains__(self, date):
2974 """Check if the date is in the range"""
2975 if not isinstance(date, datetime.date):
2976 date = date_from_str(date)
2977 return self.start <= date <= self.end
2978
2979 def __str__(self):
2980 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
2981
2982
2983 def platform_name():
2984 """ Returns the platform name as a compat_str """
2985 res = platform.platform()
2986 if isinstance(res, bytes):
2987 res = res.decode(preferredencoding())
2988
2989 assert isinstance(res, compat_str)
2990 return res
2991
2992
2993 def _windows_write_string(s, out):
2994 """ Returns True if the string was written using special methods,
2995 False if it has yet to be written out."""
2996 # Adapted from http://stackoverflow.com/a/3259271/35070
2997
2998 import ctypes
2999 import ctypes.wintypes
3000
3001 WIN_OUTPUT_IDS = {
3002 1: -11,
3003 2: -12,
3004 }
3005
3006 try:
3007 fileno = out.fileno()
3008 except AttributeError:
3009 # If the output stream doesn't have a fileno, it's virtual
3010 return False
3011 except io.UnsupportedOperation:
3012 # Some strange Windows pseudo files?
3013 return False
3014 if fileno not in WIN_OUTPUT_IDS:
3015 return False
3016
3017 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3018 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3019 ('GetStdHandle', ctypes.windll.kernel32))
3020 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3021
3022 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3023 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3024 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3025 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3026 written = ctypes.wintypes.DWORD(0)
3027
3028 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3029 FILE_TYPE_CHAR = 0x0002
3030 FILE_TYPE_REMOTE = 0x8000
3031 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3032 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3033 ctypes.POINTER(ctypes.wintypes.DWORD))(
3034 ('GetConsoleMode', ctypes.windll.kernel32))
3035 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3036
3037 def not_a_console(handle):
3038 if handle == INVALID_HANDLE_VALUE or handle is None:
3039 return True
3040 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3041 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3042
3043 if not_a_console(h):
3044 return False
3045
3046 def next_nonbmp_pos(s):
3047 try:
3048 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3049 except StopIteration:
3050 return len(s)
3051
3052 while s:
3053 count = min(next_nonbmp_pos(s), 1024)
3054
3055 ret = WriteConsoleW(
3056 h, s, count if count else 2, ctypes.byref(written), None)
3057 if ret == 0:
3058 raise OSError('Failed to write string')
3059 if not count: # We just wrote a non-BMP character
3060 assert written.value == 2
3061 s = s[1:]
3062 else:
3063 assert written.value > 0
3064 s = s[written.value:]
3065 return True
3066
3067
3068 def write_string(s, out=None, encoding=None):
3069 if out is None:
3070 out = sys.stderr
3071 assert type(s) == compat_str
3072
3073 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3074 if _windows_write_string(s, out):
3075 return
3076
3077 if ('b' in getattr(out, 'mode', '')
3078 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3079 byt = s.encode(encoding or preferredencoding(), 'ignore')
3080 out.write(byt)
3081 elif hasattr(out, 'buffer'):
3082 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3083 byt = s.encode(enc, 'ignore')
3084 out.buffer.write(byt)
3085 else:
3086 out.write(s)
3087 out.flush()
3088
3089
3090 def bytes_to_intlist(bs):
3091 if not bs:
3092 return []
3093 if isinstance(bs[0], int): # Python 3
3094 return list(bs)
3095 else:
3096 return [ord(c) for c in bs]
3097
3098
3099 def intlist_to_bytes(xs):
3100 if not xs:
3101 return b''
3102 return compat_struct_pack('%dB' % len(xs), *xs)
3103
3104
3105 # Cross-platform file locking
3106 if sys.platform == 'win32':
3107 import ctypes.wintypes
3108 import msvcrt
3109
3110 class OVERLAPPED(ctypes.Structure):
3111 _fields_ = [
3112 ('Internal', ctypes.wintypes.LPVOID),
3113 ('InternalHigh', ctypes.wintypes.LPVOID),
3114 ('Offset', ctypes.wintypes.DWORD),
3115 ('OffsetHigh', ctypes.wintypes.DWORD),
3116 ('hEvent', ctypes.wintypes.HANDLE),
3117 ]
3118
3119 kernel32 = ctypes.windll.kernel32
3120 LockFileEx = kernel32.LockFileEx
3121 LockFileEx.argtypes = [
3122 ctypes.wintypes.HANDLE, # hFile
3123 ctypes.wintypes.DWORD, # dwFlags
3124 ctypes.wintypes.DWORD, # dwReserved
3125 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3126 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3127 ctypes.POINTER(OVERLAPPED) # Overlapped
3128 ]
3129 LockFileEx.restype = ctypes.wintypes.BOOL
3130 UnlockFileEx = kernel32.UnlockFileEx
3131 UnlockFileEx.argtypes = [
3132 ctypes.wintypes.HANDLE, # hFile
3133 ctypes.wintypes.DWORD, # dwReserved
3134 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3135 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3136 ctypes.POINTER(OVERLAPPED) # Overlapped
3137 ]
3138 UnlockFileEx.restype = ctypes.wintypes.BOOL
3139 whole_low = 0xffffffff
3140 whole_high = 0x7fffffff
3141
3142 def _lock_file(f, exclusive):
3143 overlapped = OVERLAPPED()
3144 overlapped.Offset = 0
3145 overlapped.OffsetHigh = 0
3146 overlapped.hEvent = 0
3147 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3148 handle = msvcrt.get_osfhandle(f.fileno())
3149 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3150 whole_low, whole_high, f._lock_file_overlapped_p):
3151 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3152
3153 def _unlock_file(f):
3154 assert f._lock_file_overlapped_p
3155 handle = msvcrt.get_osfhandle(f.fileno())
3156 if not UnlockFileEx(handle, 0,
3157 whole_low, whole_high, f._lock_file_overlapped_p):
3158 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3159
3160 else:
3161 # Some platforms, such as Jython, is missing fcntl
3162 try:
3163 import fcntl
3164
3165 def _lock_file(f, exclusive):
3166 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3167
3168 def _unlock_file(f):
3169 fcntl.flock(f, fcntl.LOCK_UN)
3170 except ImportError:
3171 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3172
3173 def _lock_file(f, exclusive):
3174 raise IOError(UNSUPPORTED_MSG)
3175
3176 def _unlock_file(f):
3177 raise IOError(UNSUPPORTED_MSG)
3178
3179
3180 class locked_file(object):
3181 def __init__(self, filename, mode, encoding=None):
3182 assert mode in ['r', 'a', 'w']
3183 self.f = io.open(filename, mode, encoding=encoding)
3184 self.mode = mode
3185
3186 def __enter__(self):
3187 exclusive = self.mode != 'r'
3188 try:
3189 _lock_file(self.f, exclusive)
3190 except IOError:
3191 self.f.close()
3192 raise
3193 return self
3194
3195 def __exit__(self, etype, value, traceback):
3196 try:
3197 _unlock_file(self.f)
3198 finally:
3199 self.f.close()
3200
3201 def __iter__(self):
3202 return iter(self.f)
3203
3204 def write(self, *args):
3205 return self.f.write(*args)
3206
3207 def read(self, *args):
3208 return self.f.read(*args)
3209
3210
3211 def get_filesystem_encoding():
3212 encoding = sys.getfilesystemencoding()
3213 return encoding if encoding is not None else 'utf-8'
3214
3215
3216 def shell_quote(args):
3217 quoted_args = []
3218 encoding = get_filesystem_encoding()
3219 for a in args:
3220 if isinstance(a, bytes):
3221 # We may get a filename encoded with 'encodeFilename'
3222 a = a.decode(encoding)
3223 quoted_args.append(compat_shlex_quote(a))
3224 return ' '.join(quoted_args)
3225
3226
3227 def smuggle_url(url, data):
3228 """ Pass additional data in a URL for internal use. """
3229
3230 url, idata = unsmuggle_url(url, {})
3231 data.update(idata)
3232 sdata = compat_urllib_parse_urlencode(
3233 {'__youtubedl_smuggle': json.dumps(data)})
3234 return url + '#' + sdata
3235
3236
3237 def unsmuggle_url(smug_url, default=None):
3238 if '#__youtubedl_smuggle' not in smug_url:
3239 return smug_url, default
3240 url, _, sdata = smug_url.rpartition('#')
3241 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3242 data = json.loads(jsond)
3243 return url, data
3244
3245
3246 def format_bytes(bytes):
3247 if bytes is None:
3248 return 'N/A'
3249 if type(bytes) is str:
3250 bytes = float(bytes)
3251 if bytes == 0.0:
3252 exponent = 0
3253 else:
3254 exponent = int(math.log(bytes, 1024.0))
3255 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3256 converted = float(bytes) / float(1024 ** exponent)
3257 return '%.2f%s' % (converted, suffix)
3258
3259
3260 def lookup_unit_table(unit_table, s):
3261 units_re = '|'.join(re.escape(u) for u in unit_table)
3262 m = re.match(
3263 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3264 if not m:
3265 return None
3266 num_str = m.group('num').replace(',', '.')
3267 mult = unit_table[m.group('unit')]
3268 return int(float(num_str) * mult)
3269
3270
3271 def parse_filesize(s):
3272 if s is None:
3273 return None
3274
3275 # The lower-case forms are of course incorrect and unofficial,
3276 # but we support those too
3277 _UNIT_TABLE = {
3278 'B': 1,
3279 'b': 1,
3280 'bytes': 1,
3281 'KiB': 1024,
3282 'KB': 1000,
3283 'kB': 1024,
3284 'Kb': 1000,
3285 'kb': 1000,
3286 'kilobytes': 1000,
3287 'kibibytes': 1024,
3288 'MiB': 1024 ** 2,
3289 'MB': 1000 ** 2,
3290 'mB': 1024 ** 2,
3291 'Mb': 1000 ** 2,
3292 'mb': 1000 ** 2,
3293 'megabytes': 1000 ** 2,
3294 'mebibytes': 1024 ** 2,
3295 'GiB': 1024 ** 3,
3296 'GB': 1000 ** 3,
3297 'gB': 1024 ** 3,
3298 'Gb': 1000 ** 3,
3299 'gb': 1000 ** 3,
3300 'gigabytes': 1000 ** 3,
3301 'gibibytes': 1024 ** 3,
3302 'TiB': 1024 ** 4,
3303 'TB': 1000 ** 4,
3304 'tB': 1024 ** 4,
3305 'Tb': 1000 ** 4,
3306 'tb': 1000 ** 4,
3307 'terabytes': 1000 ** 4,
3308 'tebibytes': 1024 ** 4,
3309 'PiB': 1024 ** 5,
3310 'PB': 1000 ** 5,
3311 'pB': 1024 ** 5,
3312 'Pb': 1000 ** 5,
3313 'pb': 1000 ** 5,
3314 'petabytes': 1000 ** 5,
3315 'pebibytes': 1024 ** 5,
3316 'EiB': 1024 ** 6,
3317 'EB': 1000 ** 6,
3318 'eB': 1024 ** 6,
3319 'Eb': 1000 ** 6,
3320 'eb': 1000 ** 6,
3321 'exabytes': 1000 ** 6,
3322 'exbibytes': 1024 ** 6,
3323 'ZiB': 1024 ** 7,
3324 'ZB': 1000 ** 7,
3325 'zB': 1024 ** 7,
3326 'Zb': 1000 ** 7,
3327 'zb': 1000 ** 7,
3328 'zettabytes': 1000 ** 7,
3329 'zebibytes': 1024 ** 7,
3330 'YiB': 1024 ** 8,
3331 'YB': 1000 ** 8,
3332 'yB': 1024 ** 8,
3333 'Yb': 1000 ** 8,
3334 'yb': 1000 ** 8,
3335 'yottabytes': 1000 ** 8,
3336 'yobibytes': 1024 ** 8,
3337 }
3338
3339 return lookup_unit_table(_UNIT_TABLE, s)
3340
3341
3342 def parse_count(s):
3343 if s is None:
3344 return None
3345
3346 s = s.strip()
3347
3348 if re.match(r'^[\d,.]+$', s):
3349 return str_to_int(s)
3350
3351 _UNIT_TABLE = {
3352 'k': 1000,
3353 'K': 1000,
3354 'm': 1000 ** 2,
3355 'M': 1000 ** 2,
3356 'kk': 1000 ** 2,
3357 'KK': 1000 ** 2,
3358 }
3359
3360 return lookup_unit_table(_UNIT_TABLE, s)
3361
3362
3363 def parse_resolution(s):
3364 if s is None:
3365 return {}
3366
3367 mobj = re.search(r'\b(?P<w>\d+)\s*[xXƗ]\s*(?P<h>\d+)\b', s)
3368 if mobj:
3369 return {
3370 'width': int(mobj.group('w')),
3371 'height': int(mobj.group('h')),
3372 }
3373
3374 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3375 if mobj:
3376 return {'height': int(mobj.group(1))}
3377
3378 mobj = re.search(r'\b([48])[kK]\b', s)
3379 if mobj:
3380 return {'height': int(mobj.group(1)) * 540}
3381
3382 return {}
3383
3384
3385 def parse_bitrate(s):
3386 if not isinstance(s, compat_str):
3387 return
3388 mobj = re.search(r'\b(\d+)\s*kbps', s)
3389 if mobj:
3390 return int(mobj.group(1))
3391
3392
3393 def month_by_name(name, lang='en'):
3394 """ Return the number of a month by (locale-independently) English name """
3395
3396 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3397
3398 try:
3399 return month_names.index(name) + 1
3400 except ValueError:
3401 return None
3402
3403
3404 def month_by_abbreviation(abbrev):
3405 """ Return the number of a month by (locale-independently) English
3406 abbreviations """
3407
3408 try:
3409 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3410 except ValueError:
3411 return None
3412
3413
3414 def fix_xml_ampersands(xml_str):
3415 """Replace all the '&' by '&amp;' in XML"""
3416 return re.sub(
3417 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3418 '&amp;',
3419 xml_str)
3420
3421
3422 def setproctitle(title):
3423 assert isinstance(title, compat_str)
3424
3425 # ctypes in Jython is not complete
3426 # http://bugs.jython.org/issue2148
3427 if sys.platform.startswith('java'):
3428 return
3429
3430 try:
3431 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3432 except OSError:
3433 return
3434 except TypeError:
3435 # LoadLibrary in Windows Python 2.7.13 only expects
3436 # a bytestring, but since unicode_literals turns
3437 # every string into a unicode string, it fails.
3438 return
3439 title_bytes = title.encode('utf-8')
3440 buf = ctypes.create_string_buffer(len(title_bytes))
3441 buf.value = title_bytes
3442 try:
3443 libc.prctl(15, buf, 0, 0, 0)
3444 except AttributeError:
3445 return # Strange libc, just skip this
3446
3447
3448 def remove_start(s, start):
3449 return s[len(start):] if s is not None and s.startswith(start) else s
3450
3451
3452 def remove_end(s, end):
3453 return s[:-len(end)] if s is not None and s.endswith(end) else s
3454
3455
3456 def remove_quotes(s):
3457 if s is None or len(s) < 2:
3458 return s
3459 for quote in ('"', "'", ):
3460 if s[0] == quote and s[-1] == quote:
3461 return s[1:-1]
3462 return s
3463
3464
3465 def url_basename(url):
3466 path = compat_urlparse.urlparse(url).path
3467 return path.strip('/').split('/')[-1]
3468
3469
3470 def base_url(url):
3471 return re.match(r'https?://[^?#&]+/', url).group()
3472
3473
3474 def urljoin(base, path):
3475 if isinstance(path, bytes):
3476 path = path.decode('utf-8')
3477 if not isinstance(path, compat_str) or not path:
3478 return None
3479 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3480 return path
3481 if isinstance(base, bytes):
3482 base = base.decode('utf-8')
3483 if not isinstance(base, compat_str) or not re.match(
3484 r'^(?:https?:)?//', base):
3485 return None
3486 return compat_urlparse.urljoin(base, path)
3487
3488
3489 class HEADRequest(compat_urllib_request.Request):
3490 def get_method(self):
3491 return 'HEAD'
3492
3493
3494 class PUTRequest(compat_urllib_request.Request):
3495 def get_method(self):
3496 return 'PUT'
3497
3498
3499 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3500 if get_attr:
3501 if v is not None:
3502 v = getattr(v, get_attr, None)
3503 if v == '':
3504 v = None
3505 if v is None:
3506 return default
3507 try:
3508 return int(v) * invscale // scale
3509 except (ValueError, TypeError):
3510 return default
3511
3512
3513 def str_or_none(v, default=None):
3514 return default if v is None else compat_str(v)
3515
3516
3517 def str_to_int(int_str):
3518 """ A more relaxed version of int_or_none """
3519 if int_str is None:
3520 return None
3521 int_str = re.sub(r'[,\.\+]', '', int_str)
3522 return int(int_str)
3523
3524
3525 def float_or_none(v, scale=1, invscale=1, default=None):
3526 if v is None:
3527 return default
3528 try:
3529 return float(v) * invscale / scale
3530 except (ValueError, TypeError):
3531 return default
3532
3533
3534 def bool_or_none(v, default=None):
3535 return v if isinstance(v, bool) else default
3536
3537
3538 def strip_or_none(v, default=None):
3539 return v.strip() if isinstance(v, compat_str) else default
3540
3541
3542 def url_or_none(url):
3543 if not url or not isinstance(url, compat_str):
3544 return None
3545 url = url.strip()
3546 return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3547
3548
3549 def parse_duration(s):
3550 if not isinstance(s, compat_basestring):
3551 return None
3552
3553 s = s.strip()
3554
3555 days, hours, mins, secs, ms = [None] * 5
3556 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3557 if m:
3558 days, hours, mins, secs, ms = m.groups()
3559 else:
3560 m = re.match(
3561 r'''(?ix)(?:P?
3562 (?:
3563 [0-9]+\s*y(?:ears?)?\s*
3564 )?
3565 (?:
3566 [0-9]+\s*m(?:onths?)?\s*
3567 )?
3568 (?:
3569 [0-9]+\s*w(?:eeks?)?\s*
3570 )?
3571 (?:
3572 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3573 )?
3574 T)?
3575 (?:
3576 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3577 )?
3578 (?:
3579 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3580 )?
3581 (?:
3582 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3583 )?Z?$''', s)
3584 if m:
3585 days, hours, mins, secs, ms = m.groups()
3586 else:
3587 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3588 if m:
3589 hours, mins = m.groups()
3590 else:
3591 return None
3592
3593 duration = 0
3594 if secs:
3595 duration += float(secs)
3596 if mins:
3597 duration += float(mins) * 60
3598 if hours:
3599 duration += float(hours) * 60 * 60
3600 if days:
3601 duration += float(days) * 24 * 60 * 60
3602 if ms:
3603 duration += float(ms)
3604 return duration
3605
3606
3607 def prepend_extension(filename, ext, expected_real_ext=None):
3608 name, real_ext = os.path.splitext(filename)
3609 return (
3610 '{0}.{1}{2}'.format(name, ext, real_ext)
3611 if not expected_real_ext or real_ext[1:] == expected_real_ext
3612 else '{0}.{1}'.format(filename, ext))
3613
3614
3615 def replace_extension(filename, ext, expected_real_ext=None):
3616 name, real_ext = os.path.splitext(filename)
3617 return '{0}.{1}'.format(
3618 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3619 ext)
3620
3621
3622 def check_executable(exe, args=[]):
3623 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3624 args can be a list of arguments for a short output (like -version) """
3625 try:
3626 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3627 except OSError:
3628 return False
3629 return exe
3630
3631
3632 def get_exe_version(exe, args=['--version'],
3633 version_re=None, unrecognized='present'):
3634 """ Returns the version of the specified executable,
3635 or False if the executable is not present """
3636 try:
3637 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3638 # SIGTTOU if youtube-dl is run in the background.
3639 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3640 out, _ = subprocess.Popen(
3641 [encodeArgument(exe)] + args,
3642 stdin=subprocess.PIPE,
3643 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3644 except OSError:
3645 return False
3646 if isinstance(out, bytes): # Python 2.x
3647 out = out.decode('ascii', 'ignore')
3648 return detect_exe_version(out, version_re, unrecognized)
3649
3650
3651 def detect_exe_version(output, version_re=None, unrecognized='present'):
3652 assert isinstance(output, compat_str)
3653 if version_re is None:
3654 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3655 m = re.search(version_re, output)
3656 if m:
3657 return m.group(1)
3658 else:
3659 return unrecognized
3660
3661
3662 class PagedList(object):
3663 def __len__(self):
3664 # This is only useful for tests
3665 return len(self.getslice())
3666
3667
3668 class OnDemandPagedList(PagedList):
3669 def __init__(self, pagefunc, pagesize, use_cache=True):
3670 self._pagefunc = pagefunc
3671 self._pagesize = pagesize
3672 self._use_cache = use_cache
3673 if use_cache:
3674 self._cache = {}
3675
3676 def getslice(self, start=0, end=None):
3677 res = []
3678 for pagenum in itertools.count(start // self._pagesize):
3679 firstid = pagenum * self._pagesize
3680 nextfirstid = pagenum * self._pagesize + self._pagesize
3681 if start >= nextfirstid:
3682 continue
3683
3684 page_results = None
3685 if self._use_cache:
3686 page_results = self._cache.get(pagenum)
3687 if page_results is None:
3688 page_results = list(self._pagefunc(pagenum))
3689 if self._use_cache:
3690 self._cache[pagenum] = page_results
3691
3692 startv = (
3693 start % self._pagesize
3694 if firstid <= start < nextfirstid
3695 else 0)
3696
3697 endv = (
3698 ((end - 1) % self._pagesize) + 1
3699 if (end is not None and firstid <= end <= nextfirstid)
3700 else None)
3701
3702 if startv != 0 or endv is not None:
3703 page_results = page_results[startv:endv]
3704 res.extend(page_results)
3705
3706 # A little optimization - if current page is not "full", ie. does
3707 # not contain page_size videos then we can assume that this page
3708 # is the last one - there are no more ids on further pages -
3709 # i.e. no need to query again.
3710 if len(page_results) + startv < self._pagesize:
3711 break
3712
3713 # If we got the whole page, but the next page is not interesting,
3714 # break out early as well
3715 if end == nextfirstid:
3716 break
3717 return res
3718
3719
3720 class InAdvancePagedList(PagedList):
3721 def __init__(self, pagefunc, pagecount, pagesize):
3722 self._pagefunc = pagefunc
3723 self._pagecount = pagecount
3724 self._pagesize = pagesize
3725
3726 def getslice(self, start=0, end=None):
3727 res = []
3728 start_page = start // self._pagesize
3729 end_page = (
3730 self._pagecount if end is None else (end // self._pagesize + 1))
3731 skip_elems = start - start_page * self._pagesize
3732 only_more = None if end is None else end - start
3733 for pagenum in range(start_page, end_page):
3734 page = list(self._pagefunc(pagenum))
3735 if skip_elems:
3736 page = page[skip_elems:]
3737 skip_elems = None
3738 if only_more is not None:
3739 if len(page) < only_more:
3740 only_more -= len(page)
3741 else:
3742 page = page[:only_more]
3743 res.extend(page)
3744 break
3745 res.extend(page)
3746 return res
3747
3748
3749 def uppercase_escape(s):
3750 unicode_escape = codecs.getdecoder('unicode_escape')
3751 return re.sub(
3752 r'\\U[0-9a-fA-F]{8}',
3753 lambda m: unicode_escape(m.group(0))[0],
3754 s)
3755
3756
3757 def lowercase_escape(s):
3758 unicode_escape = codecs.getdecoder('unicode_escape')
3759 return re.sub(
3760 r'\\u[0-9a-fA-F]{4}',
3761 lambda m: unicode_escape(m.group(0))[0],
3762 s)
3763
3764
3765 def escape_rfc3986(s):
3766 """Escape non-ASCII characters as suggested by RFC 3986"""
3767 if sys.version_info < (3, 0) and isinstance(s, compat_str):
3768 s = s.encode('utf-8')
3769 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3770
3771
3772 def escape_url(url):
3773 """Escape URL as suggested by RFC 3986"""
3774 url_parsed = compat_urllib_parse_urlparse(url)
3775 return url_parsed._replace(
3776 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3777 path=escape_rfc3986(url_parsed.path),
3778 params=escape_rfc3986(url_parsed.params),
3779 query=escape_rfc3986(url_parsed.query),
3780 fragment=escape_rfc3986(url_parsed.fragment)
3781 ).geturl()
3782
3783
3784 def read_batch_urls(batch_fd):
3785 def fixup(url):
3786 if not isinstance(url, compat_str):
3787 url = url.decode('utf-8', 'replace')
3788 BOM_UTF8 = '\xef\xbb\xbf'
3789 if url.startswith(BOM_UTF8):
3790 url = url[len(BOM_UTF8):]
3791 url = url.strip()
3792 if url.startswith(('#', ';', ']')):
3793 return False
3794 return url
3795
3796 with contextlib.closing(batch_fd) as fd:
3797 return [url for url in map(fixup, fd) if url]
3798
3799
3800 def urlencode_postdata(*args, **kargs):
3801 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3802
3803
3804 def update_url_query(url, query):
3805 if not query:
3806 return url
3807 parsed_url = compat_urlparse.urlparse(url)
3808 qs = compat_parse_qs(parsed_url.query)
3809 qs.update(query)
3810 return compat_urlparse.urlunparse(parsed_url._replace(
3811 query=compat_urllib_parse_urlencode(qs, True)))
3812
3813
3814 def update_Request(req, url=None, data=None, headers={}, query={}):
3815 req_headers = req.headers.copy()
3816 req_headers.update(headers)
3817 req_data = data or req.data
3818 req_url = update_url_query(url or req.get_full_url(), query)
3819 req_get_method = req.get_method()
3820 if req_get_method == 'HEAD':
3821 req_type = HEADRequest
3822 elif req_get_method == 'PUT':
3823 req_type = PUTRequest
3824 else:
3825 req_type = compat_urllib_request.Request
3826 new_req = req_type(
3827 req_url, data=req_data, headers=req_headers,
3828 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3829 if hasattr(req, 'timeout'):
3830 new_req.timeout = req.timeout
3831 return new_req
3832
3833
3834 def _multipart_encode_impl(data, boundary):
3835 content_type = 'multipart/form-data; boundary=%s' % boundary
3836
3837 out = b''
3838 for k, v in data.items():
3839 out += b'--' + boundary.encode('ascii') + b'\r\n'
3840 if isinstance(k, compat_str):
3841 k = k.encode('utf-8')
3842 if isinstance(v, compat_str):
3843 v = v.encode('utf-8')
3844 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3845 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3846 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3847 if boundary.encode('ascii') in content:
3848 raise ValueError('Boundary overlaps with data')
3849 out += content
3850
3851 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3852
3853 return out, content_type
3854
3855
3856 def multipart_encode(data, boundary=None):
3857 '''
3858 Encode a dict to RFC 7578-compliant form-data
3859
3860 data:
3861 A dict where keys and values can be either Unicode or bytes-like
3862 objects.
3863 boundary:
3864 If specified a Unicode object, it's used as the boundary. Otherwise
3865 a random boundary is generated.
3866
3867 Reference: https://tools.ietf.org/html/rfc7578
3868 '''
3869 has_specified_boundary = boundary is not None
3870
3871 while True:
3872 if boundary is None:
3873 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3874
3875 try:
3876 out, content_type = _multipart_encode_impl(data, boundary)
3877 break
3878 except ValueError:
3879 if has_specified_boundary:
3880 raise
3881 boundary = None
3882
3883 return out, content_type
3884
3885
3886 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3887 if isinstance(key_or_keys, (list, tuple)):
3888 for key in key_or_keys:
3889 if key not in d or d[key] is None or skip_false_values and not d[key]:
3890 continue
3891 return d[key]
3892 return default
3893 return d.get(key_or_keys, default)
3894
3895
3896 def try_get(src, getter, expected_type=None):
3897 if not isinstance(getter, (list, tuple)):
3898 getter = [getter]
3899 for get in getter:
3900 try:
3901 v = get(src)
3902 except (AttributeError, KeyError, TypeError, IndexError):
3903 pass
3904 else:
3905 if expected_type is None or isinstance(v, expected_type):
3906 return v
3907
3908
3909 def merge_dicts(*dicts):
3910 merged = {}
3911 for a_dict in dicts:
3912 for k, v in a_dict.items():
3913 if v is None:
3914 continue
3915 if (k not in merged
3916 or (isinstance(v, compat_str) and v
3917 and isinstance(merged[k], compat_str)
3918 and not merged[k])):
3919 merged[k] = v
3920 return merged
3921
3922
3923 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3924 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
3925
3926
3927 US_RATINGS = {
3928 'G': 0,
3929 'PG': 10,
3930 'PG-13': 13,
3931 'R': 16,
3932 'NC': 18,
3933 }
3934
3935
3936 TV_PARENTAL_GUIDELINES = {
3937 'TV-Y': 0,
3938 'TV-Y7': 7,
3939 'TV-G': 0,
3940 'TV-PG': 0,
3941 'TV-14': 14,
3942 'TV-MA': 17,
3943 }
3944
3945
3946 def parse_age_limit(s):
3947 if type(s) == int:
3948 return s if 0 <= s <= 21 else None
3949 if not isinstance(s, compat_basestring):
3950 return None
3951 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3952 if m:
3953 return int(m.group('age'))
3954 if s in US_RATINGS:
3955 return US_RATINGS[s]
3956 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3957 if m:
3958 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3959 return None
3960
3961
3962 def strip_jsonp(code):
3963 return re.sub(
3964 r'''(?sx)^
3965 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3966 (?:\s*&&\s*(?P=func_name))?
3967 \s*\(\s*(?P<callback_data>.*)\);?
3968 \s*?(?://[^\n]*)*$''',
3969 r'\g<callback_data>', code)
3970
3971
3972 def js_to_json(code):
3973 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
3974 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
3975 INTEGER_TABLE = (
3976 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
3977 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
3978 )
3979
3980 def fix_kv(m):
3981 v = m.group(0)
3982 if v in ('true', 'false', 'null'):
3983 return v
3984 elif v.startswith('/*') or v.startswith('//') or v == ',':
3985 return ""
3986
3987 if v[0] in ("'", '"'):
3988 v = re.sub(r'(?s)\\.|"', lambda m: {
3989 '"': '\\"',
3990 "\\'": "'",
3991 '\\\n': '',
3992 '\\x': '\\u00',
3993 }.get(m.group(0), m.group(0)), v[1:-1])
3994
3995 for regex, base in INTEGER_TABLE:
3996 im = re.match(regex, v)
3997 if im:
3998 i = int(im.group(1), base)
3999 return '"%d":' % i if v.endswith(':') else '%d' % i
4000
4001 return '"%s"' % v
4002
4003 return re.sub(r'''(?sx)
4004 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4005 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4006 {comment}|,(?={skip}[\]}}])|
4007 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4008 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4009 [0-9]+(?={skip}:)
4010 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4011
4012
4013 def qualities(quality_ids):
4014 """ Get a numeric quality value out of a list of possible values """
4015 def q(qid):
4016 try:
4017 return quality_ids.index(qid)
4018 except ValueError:
4019 return -1
4020 return q
4021
4022
4023 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4024
4025
4026 def limit_length(s, length):
4027 """ Add ellipses to overly long strings """
4028 if s is None:
4029 return None
4030 ELLIPSES = '...'
4031 if len(s) > length:
4032 return s[:length - len(ELLIPSES)] + ELLIPSES
4033 return s
4034
4035
4036 def version_tuple(v):
4037 return tuple(int(e) for e in re.split(r'[-.]', v))
4038
4039
4040 def is_outdated_version(version, limit, assume_new=True):
4041 if not version:
4042 return not assume_new
4043 try:
4044 return version_tuple(version) < version_tuple(limit)
4045 except ValueError:
4046 return not assume_new
4047
4048
4049 def ytdl_is_updateable():
4050 """ Returns if youtube-dl can be updated with -U """
4051 from zipimport import zipimporter
4052
4053 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4054
4055
4056 def args_to_str(args):
4057 # Get a short string representation for a subprocess command
4058 return ' '.join(compat_shlex_quote(a) for a in args)
4059
4060
4061 def error_to_compat_str(err):
4062 err_str = str(err)
4063 # On python 2 error byte string must be decoded with proper
4064 # encoding rather than ascii
4065 if sys.version_info[0] < 3:
4066 err_str = err_str.decode(preferredencoding())
4067 return err_str
4068
4069
4070 def mimetype2ext(mt):
4071 if mt is None:
4072 return None
4073
4074 ext = {
4075 'audio/mp4': 'm4a',
4076 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4077 # it's the most popular one
4078 'audio/mpeg': 'mp3',
4079 }.get(mt)
4080 if ext is not None:
4081 return ext
4082
4083 _, _, res = mt.rpartition('/')
4084 res = res.split(';')[0].strip().lower()
4085
4086 return {
4087 '3gpp': '3gp',
4088 'smptett+xml': 'tt',
4089 'ttaf+xml': 'dfxp',
4090 'ttml+xml': 'ttml',
4091 'x-flv': 'flv',
4092 'x-mp4-fragmented': 'mp4',
4093 'x-ms-sami': 'sami',
4094 'x-ms-wmv': 'wmv',
4095 'mpegurl': 'm3u8',
4096 'x-mpegurl': 'm3u8',
4097 'vnd.apple.mpegurl': 'm3u8',
4098 'dash+xml': 'mpd',
4099 'f4m+xml': 'f4m',
4100 'hds+xml': 'f4m',
4101 'vnd.ms-sstr+xml': 'ism',
4102 'quicktime': 'mov',
4103 'mp2t': 'ts',
4104 }.get(res, res)
4105
4106
4107 def parse_codecs(codecs_str):
4108 # http://tools.ietf.org/html/rfc6381
4109 if not codecs_str:
4110 return {}
4111 splited_codecs = list(filter(None, map(
4112 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4113 vcodec, acodec = None, None
4114 for full_codec in splited_codecs:
4115 codec = full_codec.split('.')[0]
4116 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4117 if not vcodec:
4118 vcodec = full_codec
4119 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4120 if not acodec:
4121 acodec = full_codec
4122 else:
4123 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4124 if not vcodec and not acodec:
4125 if len(splited_codecs) == 2:
4126 return {
4127 'vcodec': splited_codecs[0],
4128 'acodec': splited_codecs[1],
4129 }
4130 else:
4131 return {
4132 'vcodec': vcodec or 'none',
4133 'acodec': acodec or 'none',
4134 }
4135 return {}
4136
4137
4138 def urlhandle_detect_ext(url_handle):
4139 getheader = url_handle.headers.get
4140
4141 cd = getheader('Content-Disposition')
4142 if cd:
4143 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4144 if m:
4145 e = determine_ext(m.group('filename'), default_ext=None)
4146 if e:
4147 return e
4148
4149 return mimetype2ext(getheader('Content-Type'))
4150
4151
4152 def encode_data_uri(data, mime_type):
4153 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4154
4155
4156 def age_restricted(content_limit, age_limit):
4157 """ Returns True iff the content should be blocked """
4158
4159 if age_limit is None: # No limit set
4160 return False
4161 if content_limit is None:
4162 return False # Content available for everyone
4163 return age_limit < content_limit
4164
4165
4166 def is_html(first_bytes):
4167 """ Detect whether a file contains HTML by examining its first bytes. """
4168
4169 BOMS = [
4170 (b'\xef\xbb\xbf', 'utf-8'),
4171 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4172 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4173 (b'\xff\xfe', 'utf-16-le'),
4174 (b'\xfe\xff', 'utf-16-be'),
4175 ]
4176 for bom, enc in BOMS:
4177 if first_bytes.startswith(bom):
4178 s = first_bytes[len(bom):].decode(enc, 'replace')
4179 break
4180 else:
4181 s = first_bytes.decode('utf-8', 'replace')
4182
4183 return re.match(r'^\s*<', s)
4184
4185
4186 def determine_protocol(info_dict):
4187 protocol = info_dict.get('protocol')
4188 if protocol is not None:
4189 return protocol
4190
4191 url = info_dict['url']
4192 if url.startswith('rtmp'):
4193 return 'rtmp'
4194 elif url.startswith('mms'):
4195 return 'mms'
4196 elif url.startswith('rtsp'):
4197 return 'rtsp'
4198
4199 ext = determine_ext(url)
4200 if ext == 'm3u8':
4201 return 'm3u8'
4202 elif ext == 'f4m':
4203 return 'f4m'
4204
4205 return compat_urllib_parse_urlparse(url).scheme
4206
4207
4208 def render_table(header_row, data):
4209 """ Render a list of rows, each as a list of values """
4210 table = [header_row] + data
4211 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4212 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4213 return '\n'.join(format_str % tuple(row) for row in table)
4214
4215
4216 def _match_one(filter_part, dct):
4217 COMPARISON_OPERATORS = {
4218 '<': operator.lt,
4219 '<=': operator.le,
4220 '>': operator.gt,
4221 '>=': operator.ge,
4222 '=': operator.eq,
4223 '!=': operator.ne,
4224 }
4225 operator_rex = re.compile(r'''(?x)\s*
4226 (?P<key>[a-z_]+)
4227 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4228 (?:
4229 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4230 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4231 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4232 )
4233 \s*$
4234 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4235 m = operator_rex.search(filter_part)
4236 if m:
4237 op = COMPARISON_OPERATORS[m.group('op')]
4238 actual_value = dct.get(m.group('key'))
4239 if (m.group('quotedstrval') is not None
4240 or m.group('strval') is not None
4241 # If the original field is a string and matching comparisonvalue is
4242 # a number we should respect the origin of the original field
4243 # and process comparison value as a string (see
4244 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4245 or actual_value is not None and m.group('intval') is not None
4246 and isinstance(actual_value, compat_str)):
4247 if m.group('op') not in ('=', '!='):
4248 raise ValueError(
4249 'Operator %s does not support string values!' % m.group('op'))
4250 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4251 quote = m.group('quote')
4252 if quote is not None:
4253 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4254 else:
4255 try:
4256 comparison_value = int(m.group('intval'))
4257 except ValueError:
4258 comparison_value = parse_filesize(m.group('intval'))
4259 if comparison_value is None:
4260 comparison_value = parse_filesize(m.group('intval') + 'B')
4261 if comparison_value is None:
4262 raise ValueError(
4263 'Invalid integer value %r in filter part %r' % (
4264 m.group('intval'), filter_part))
4265 if actual_value is None:
4266 return m.group('none_inclusive')
4267 return op(actual_value, comparison_value)
4268
4269 UNARY_OPERATORS = {
4270 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4271 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4272 }
4273 operator_rex = re.compile(r'''(?x)\s*
4274 (?P<op>%s)\s*(?P<key>[a-z_]+)
4275 \s*$
4276 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4277 m = operator_rex.search(filter_part)
4278 if m:
4279 op = UNARY_OPERATORS[m.group('op')]
4280 actual_value = dct.get(m.group('key'))
4281 return op(actual_value)
4282
4283 raise ValueError('Invalid filter part %r' % filter_part)
4284
4285
4286 def match_str(filter_str, dct):
4287 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4288
4289 return all(
4290 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4291
4292
4293 def match_filter_func(filter_str):
4294 def _match_func(info_dict):
4295 if match_str(filter_str, info_dict):
4296 return None
4297 else:
4298 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4299 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4300 return _match_func
4301
4302
4303 def parse_dfxp_time_expr(time_expr):
4304 if not time_expr:
4305 return
4306
4307 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4308 if mobj:
4309 return float(mobj.group('time_offset'))
4310
4311 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4312 if mobj:
4313 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4314
4315
4316 def srt_subtitles_timecode(seconds):
4317 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4318
4319
4320 def dfxp2srt(dfxp_data):
4321 '''
4322 @param dfxp_data A bytes-like object containing DFXP data
4323 @returns A unicode object containing converted SRT data
4324 '''
4325 LEGACY_NAMESPACES = (
4326 (b'http://www.w3.org/ns/ttml', [
4327 b'http://www.w3.org/2004/11/ttaf1',
4328 b'http://www.w3.org/2006/04/ttaf1',
4329 b'http://www.w3.org/2006/10/ttaf1',
4330 ]),
4331 (b'http://www.w3.org/ns/ttml#styling', [
4332 b'http://www.w3.org/ns/ttml#style',
4333 ]),
4334 )
4335
4336 SUPPORTED_STYLING = [
4337 'color',
4338 'fontFamily',
4339 'fontSize',
4340 'fontStyle',
4341 'fontWeight',
4342 'textDecoration'
4343 ]
4344
4345 _x = functools.partial(xpath_with_ns, ns_map={
4346 'xml': 'http://www.w3.org/XML/1998/namespace',
4347 'ttml': 'http://www.w3.org/ns/ttml',
4348 'tts': 'http://www.w3.org/ns/ttml#styling',
4349 })
4350
4351 styles = {}
4352 default_style = {}
4353
4354 class TTMLPElementParser(object):
4355 _out = ''
4356 _unclosed_elements = []
4357 _applied_styles = []
4358
4359 def start(self, tag, attrib):
4360 if tag in (_x('ttml:br'), 'br'):
4361 self._out += '\n'
4362 else:
4363 unclosed_elements = []
4364 style = {}
4365 element_style_id = attrib.get('style')
4366 if default_style:
4367 style.update(default_style)
4368 if element_style_id:
4369 style.update(styles.get(element_style_id, {}))
4370 for prop in SUPPORTED_STYLING:
4371 prop_val = attrib.get(_x('tts:' + prop))
4372 if prop_val:
4373 style[prop] = prop_val
4374 if style:
4375 font = ''
4376 for k, v in sorted(style.items()):
4377 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4378 continue
4379 if k == 'color':
4380 font += ' color="%s"' % v
4381 elif k == 'fontSize':
4382 font += ' size="%s"' % v
4383 elif k == 'fontFamily':
4384 font += ' face="%s"' % v
4385 elif k == 'fontWeight' and v == 'bold':
4386 self._out += '<b>'
4387 unclosed_elements.append('b')
4388 elif k == 'fontStyle' and v == 'italic':
4389 self._out += '<i>'
4390 unclosed_elements.append('i')
4391 elif k == 'textDecoration' and v == 'underline':
4392 self._out += '<u>'
4393 unclosed_elements.append('u')
4394 if font:
4395 self._out += '<font' + font + '>'
4396 unclosed_elements.append('font')
4397 applied_style = {}
4398 if self._applied_styles:
4399 applied_style.update(self._applied_styles[-1])
4400 applied_style.update(style)
4401 self._applied_styles.append(applied_style)
4402 self._unclosed_elements.append(unclosed_elements)
4403
4404 def end(self, tag):
4405 if tag not in (_x('ttml:br'), 'br'):
4406 unclosed_elements = self._unclosed_elements.pop()
4407 for element in reversed(unclosed_elements):
4408 self._out += '</%s>' % element
4409 if unclosed_elements and self._applied_styles:
4410 self._applied_styles.pop()
4411
4412 def data(self, data):
4413 self._out += data
4414
4415 def close(self):
4416 return self._out.strip()
4417
4418 def parse_node(node):
4419 target = TTMLPElementParser()
4420 parser = xml.etree.ElementTree.XMLParser(target=target)
4421 parser.feed(xml.etree.ElementTree.tostring(node))
4422 return parser.close()
4423
4424 for k, v in LEGACY_NAMESPACES:
4425 for ns in v:
4426 dfxp_data = dfxp_data.replace(ns, k)
4427
4428 dfxp = compat_etree_fromstring(dfxp_data)
4429 out = []
4430 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4431
4432 if not paras:
4433 raise ValueError('Invalid dfxp/TTML subtitle')
4434
4435 repeat = False
4436 while True:
4437 for style in dfxp.findall(_x('.//ttml:style')):
4438 style_id = style.get('id') or style.get(_x('xml:id'))
4439 if not style_id:
4440 continue
4441 parent_style_id = style.get('style')
4442 if parent_style_id:
4443 if parent_style_id not in styles:
4444 repeat = True
4445 continue
4446 styles[style_id] = styles[parent_style_id].copy()
4447 for prop in SUPPORTED_STYLING:
4448 prop_val = style.get(_x('tts:' + prop))
4449 if prop_val:
4450 styles.setdefault(style_id, {})[prop] = prop_val
4451 if repeat:
4452 repeat = False
4453 else:
4454 break
4455
4456 for p in ('body', 'div'):
4457 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4458 if ele is None:
4459 continue
4460 style = styles.get(ele.get('style'))
4461 if not style:
4462 continue
4463 default_style.update(style)
4464
4465 for para, index in zip(paras, itertools.count(1)):
4466 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4467 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4468 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4469 if begin_time is None:
4470 continue
4471 if not end_time:
4472 if not dur:
4473 continue
4474 end_time = begin_time + dur
4475 out.append('%d\n%s --> %s\n%s\n\n' % (
4476 index,
4477 srt_subtitles_timecode(begin_time),
4478 srt_subtitles_timecode(end_time),
4479 parse_node(para)))
4480
4481 return ''.join(out)
4482
4483
4484 def cli_option(params, command_option, param):
4485 param = params.get(param)
4486 if param:
4487 param = compat_str(param)
4488 return [command_option, param] if param is not None else []
4489
4490
4491 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4492 param = params.get(param)
4493 if param is None:
4494 return []
4495 assert isinstance(param, bool)
4496 if separator:
4497 return [command_option + separator + (true_value if param else false_value)]
4498 return [command_option, true_value if param else false_value]
4499
4500
4501 def cli_valueless_option(params, command_option, param, expected_value=True):
4502 param = params.get(param)
4503 return [command_option] if param == expected_value else []
4504
4505
4506 def cli_configuration_args(params, param, default=[]):
4507 ex_args = params.get(param)
4508 if ex_args is None:
4509 return default
4510 assert isinstance(ex_args, list)
4511 return ex_args
4512
4513
4514 class ISO639Utils(object):
4515 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4516 _lang_map = {
4517 'aa': 'aar',
4518 'ab': 'abk',
4519 'ae': 'ave',
4520 'af': 'afr',
4521 'ak': 'aka',
4522 'am': 'amh',
4523 'an': 'arg',
4524 'ar': 'ara',
4525 'as': 'asm',
4526 'av': 'ava',
4527 'ay': 'aym',
4528 'az': 'aze',
4529 'ba': 'bak',
4530 'be': 'bel',
4531 'bg': 'bul',
4532 'bh': 'bih',
4533 'bi': 'bis',
4534 'bm': 'bam',
4535 'bn': 'ben',
4536 'bo': 'bod',
4537 'br': 'bre',
4538 'bs': 'bos',
4539 'ca': 'cat',
4540 'ce': 'che',
4541 'ch': 'cha',
4542 'co': 'cos',
4543 'cr': 'cre',
4544 'cs': 'ces',
4545 'cu': 'chu',
4546 'cv': 'chv',
4547 'cy': 'cym',
4548 'da': 'dan',
4549 'de': 'deu',
4550 'dv': 'div',
4551 'dz': 'dzo',
4552 'ee': 'ewe',
4553 'el': 'ell',
4554 'en': 'eng',
4555 'eo': 'epo',
4556 'es': 'spa',
4557 'et': 'est',
4558 'eu': 'eus',
4559 'fa': 'fas',
4560 'ff': 'ful',
4561 'fi': 'fin',
4562 'fj': 'fij',
4563 'fo': 'fao',
4564 'fr': 'fra',
4565 'fy': 'fry',
4566 'ga': 'gle',
4567 'gd': 'gla',
4568 'gl': 'glg',
4569 'gn': 'grn',
4570 'gu': 'guj',
4571 'gv': 'glv',
4572 'ha': 'hau',
4573 'he': 'heb',
4574 'iw': 'heb', # Replaced by he in 1989 revision
4575 'hi': 'hin',
4576 'ho': 'hmo',
4577 'hr': 'hrv',
4578 'ht': 'hat',
4579 'hu': 'hun',
4580 'hy': 'hye',
4581 'hz': 'her',
4582 'ia': 'ina',
4583 'id': 'ind',
4584 'in': 'ind', # Replaced by id in 1989 revision
4585 'ie': 'ile',
4586 'ig': 'ibo',
4587 'ii': 'iii',
4588 'ik': 'ipk',
4589 'io': 'ido',
4590 'is': 'isl',
4591 'it': 'ita',
4592 'iu': 'iku',
4593 'ja': 'jpn',
4594 'jv': 'jav',
4595 'ka': 'kat',
4596 'kg': 'kon',
4597 'ki': 'kik',
4598 'kj': 'kua',
4599 'kk': 'kaz',
4600 'kl': 'kal',
4601 'km': 'khm',
4602 'kn': 'kan',
4603 'ko': 'kor',
4604 'kr': 'kau',
4605 'ks': 'kas',
4606 'ku': 'kur',
4607 'kv': 'kom',
4608 'kw': 'cor',
4609 'ky': 'kir',
4610 'la': 'lat',
4611 'lb': 'ltz',
4612 'lg': 'lug',
4613 'li': 'lim',
4614 'ln': 'lin',
4615 'lo': 'lao',
4616 'lt': 'lit',
4617 'lu': 'lub',
4618 'lv': 'lav',
4619 'mg': 'mlg',
4620 'mh': 'mah',
4621 'mi': 'mri',
4622 'mk': 'mkd',
4623 'ml': 'mal',
4624 'mn': 'mon',
4625 'mr': 'mar',
4626 'ms': 'msa',
4627 'mt': 'mlt',
4628 'my': 'mya',
4629 'na': 'nau',
4630 'nb': 'nob',
4631 'nd': 'nde',
4632 'ne': 'nep',
4633 'ng': 'ndo',
4634 'nl': 'nld',
4635 'nn': 'nno',
4636 'no': 'nor',
4637 'nr': 'nbl',
4638 'nv': 'nav',
4639 'ny': 'nya',
4640 'oc': 'oci',
4641 'oj': 'oji',
4642 'om': 'orm',
4643 'or': 'ori',
4644 'os': 'oss',
4645 'pa': 'pan',
4646 'pi': 'pli',
4647 'pl': 'pol',
4648 'ps': 'pus',
4649 'pt': 'por',
4650 'qu': 'que',
4651 'rm': 'roh',
4652 'rn': 'run',
4653 'ro': 'ron',
4654 'ru': 'rus',
4655 'rw': 'kin',
4656 'sa': 'san',
4657 'sc': 'srd',
4658 'sd': 'snd',
4659 'se': 'sme',
4660 'sg': 'sag',
4661 'si': 'sin',
4662 'sk': 'slk',
4663 'sl': 'slv',
4664 'sm': 'smo',
4665 'sn': 'sna',
4666 'so': 'som',
4667 'sq': 'sqi',
4668 'sr': 'srp',
4669 'ss': 'ssw',
4670 'st': 'sot',
4671 'su': 'sun',
4672 'sv': 'swe',
4673 'sw': 'swa',
4674 'ta': 'tam',
4675 'te': 'tel',
4676 'tg': 'tgk',
4677 'th': 'tha',
4678 'ti': 'tir',
4679 'tk': 'tuk',
4680 'tl': 'tgl',
4681 'tn': 'tsn',
4682 'to': 'ton',
4683 'tr': 'tur',
4684 'ts': 'tso',
4685 'tt': 'tat',
4686 'tw': 'twi',
4687 'ty': 'tah',
4688 'ug': 'uig',
4689 'uk': 'ukr',
4690 'ur': 'urd',
4691 'uz': 'uzb',
4692 've': 'ven',
4693 'vi': 'vie',
4694 'vo': 'vol',
4695 'wa': 'wln',
4696 'wo': 'wol',
4697 'xh': 'xho',
4698 'yi': 'yid',
4699 'ji': 'yid', # Replaced by yi in 1989 revision
4700 'yo': 'yor',
4701 'za': 'zha',
4702 'zh': 'zho',
4703 'zu': 'zul',
4704 }
4705
4706 @classmethod
4707 def short2long(cls, code):
4708 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4709 return cls._lang_map.get(code[:2])
4710
4711 @classmethod
4712 def long2short(cls, code):
4713 """Convert language code from ISO 639-2/T to ISO 639-1"""
4714 for short_name, long_name in cls._lang_map.items():
4715 if long_name == code:
4716 return short_name
4717
4718
4719 class ISO3166Utils(object):
4720 # From http://data.okfn.org/data/core/country-list
4721 _country_map = {
4722 'AF': 'Afghanistan',
4723 'AX': 'ƅland Islands',
4724 'AL': 'Albania',
4725 'DZ': 'Algeria',
4726 'AS': 'American Samoa',
4727 'AD': 'Andorra',
4728 'AO': 'Angola',
4729 'AI': 'Anguilla',
4730 'AQ': 'Antarctica',
4731 'AG': 'Antigua and Barbuda',
4732 'AR': 'Argentina',
4733 'AM': 'Armenia',
4734 'AW': 'Aruba',
4735 'AU': 'Australia',
4736 'AT': 'Austria',
4737 'AZ': 'Azerbaijan',
4738 'BS': 'Bahamas',
4739 'BH': 'Bahrain',
4740 'BD': 'Bangladesh',
4741 'BB': 'Barbados',
4742 'BY': 'Belarus',
4743 'BE': 'Belgium',
4744 'BZ': 'Belize',
4745 'BJ': 'Benin',
4746 'BM': 'Bermuda',
4747 'BT': 'Bhutan',
4748 'BO': 'Bolivia, Plurinational State of',
4749 'BQ': 'Bonaire, Sint Eustatius and Saba',
4750 'BA': 'Bosnia and Herzegovina',
4751 'BW': 'Botswana',
4752 'BV': 'Bouvet Island',
4753 'BR': 'Brazil',
4754 'IO': 'British Indian Ocean Territory',
4755 'BN': 'Brunei Darussalam',
4756 'BG': 'Bulgaria',
4757 'BF': 'Burkina Faso',
4758 'BI': 'Burundi',
4759 'KH': 'Cambodia',
4760 'CM': 'Cameroon',
4761 'CA': 'Canada',
4762 'CV': 'Cape Verde',
4763 'KY': 'Cayman Islands',
4764 'CF': 'Central African Republic',
4765 'TD': 'Chad',
4766 'CL': 'Chile',
4767 'CN': 'China',
4768 'CX': 'Christmas Island',
4769 'CC': 'Cocos (Keeling) Islands',
4770 'CO': 'Colombia',
4771 'KM': 'Comoros',
4772 'CG': 'Congo',
4773 'CD': 'Congo, the Democratic Republic of the',
4774 'CK': 'Cook Islands',
4775 'CR': 'Costa Rica',
4776 'CI': 'CĆ“te d\'Ivoire',
4777 'HR': 'Croatia',
4778 'CU': 'Cuba',
4779 'CW': 'CuraƧao',
4780 'CY': 'Cyprus',
4781 'CZ': 'Czech Republic',
4782 'DK': 'Denmark',
4783 'DJ': 'Djibouti',
4784 'DM': 'Dominica',
4785 'DO': 'Dominican Republic',
4786 'EC': 'Ecuador',
4787 'EG': 'Egypt',
4788 'SV': 'El Salvador',
4789 'GQ': 'Equatorial Guinea',
4790 'ER': 'Eritrea',
4791 'EE': 'Estonia',
4792 'ET': 'Ethiopia',
4793 'FK': 'Falkland Islands (Malvinas)',
4794 'FO': 'Faroe Islands',
4795 'FJ': 'Fiji',
4796 'FI': 'Finland',
4797 'FR': 'France',
4798 'GF': 'French Guiana',
4799 'PF': 'French Polynesia',
4800 'TF': 'French Southern Territories',
4801 'GA': 'Gabon',
4802 'GM': 'Gambia',
4803 'GE': 'Georgia',
4804 'DE': 'Germany',
4805 'GH': 'Ghana',
4806 'GI': 'Gibraltar',
4807 'GR': 'Greece',
4808 'GL': 'Greenland',
4809 'GD': 'Grenada',
4810 'GP': 'Guadeloupe',
4811 'GU': 'Guam',
4812 'GT': 'Guatemala',
4813 'GG': 'Guernsey',
4814 'GN': 'Guinea',
4815 'GW': 'Guinea-Bissau',
4816 'GY': 'Guyana',
4817 'HT': 'Haiti',
4818 'HM': 'Heard Island and McDonald Islands',
4819 'VA': 'Holy See (Vatican City State)',
4820 'HN': 'Honduras',
4821 'HK': 'Hong Kong',
4822 'HU': 'Hungary',
4823 'IS': 'Iceland',
4824 'IN': 'India',
4825 'ID': 'Indonesia',
4826 'IR': 'Iran, Islamic Republic of',
4827 'IQ': 'Iraq',
4828 'IE': 'Ireland',
4829 'IM': 'Isle of Man',
4830 'IL': 'Israel',
4831 'IT': 'Italy',
4832 'JM': 'Jamaica',
4833 'JP': 'Japan',
4834 'JE': 'Jersey',
4835 'JO': 'Jordan',
4836 'KZ': 'Kazakhstan',
4837 'KE': 'Kenya',
4838 'KI': 'Kiribati',
4839 'KP': 'Korea, Democratic People\'s Republic of',
4840 'KR': 'Korea, Republic of',
4841 'KW': 'Kuwait',
4842 'KG': 'Kyrgyzstan',
4843 'LA': 'Lao People\'s Democratic Republic',
4844 'LV': 'Latvia',
4845 'LB': 'Lebanon',
4846 'LS': 'Lesotho',
4847 'LR': 'Liberia',
4848 'LY': 'Libya',
4849 'LI': 'Liechtenstein',
4850 'LT': 'Lithuania',
4851 'LU': 'Luxembourg',
4852 'MO': 'Macao',
4853 'MK': 'Macedonia, the Former Yugoslav Republic of',
4854 'MG': 'Madagascar',
4855 'MW': 'Malawi',
4856 'MY': 'Malaysia',
4857 'MV': 'Maldives',
4858 'ML': 'Mali',
4859 'MT': 'Malta',
4860 'MH': 'Marshall Islands',
4861 'MQ': 'Martinique',
4862 'MR': 'Mauritania',
4863 'MU': 'Mauritius',
4864 'YT': 'Mayotte',
4865 'MX': 'Mexico',
4866 'FM': 'Micronesia, Federated States of',
4867 'MD': 'Moldova, Republic of',
4868 'MC': 'Monaco',
4869 'MN': 'Mongolia',
4870 'ME': 'Montenegro',
4871 'MS': 'Montserrat',
4872 'MA': 'Morocco',
4873 'MZ': 'Mozambique',
4874 'MM': 'Myanmar',
4875 'NA': 'Namibia',
4876 'NR': 'Nauru',
4877 'NP': 'Nepal',
4878 'NL': 'Netherlands',
4879 'NC': 'New Caledonia',
4880 'NZ': 'New Zealand',
4881 'NI': 'Nicaragua',
4882 'NE': 'Niger',
4883 'NG': 'Nigeria',
4884 'NU': 'Niue',
4885 'NF': 'Norfolk Island',
4886 'MP': 'Northern Mariana Islands',
4887 'NO': 'Norway',
4888 'OM': 'Oman',
4889 'PK': 'Pakistan',
4890 'PW': 'Palau',
4891 'PS': 'Palestine, State of',
4892 'PA': 'Panama',
4893 'PG': 'Papua New Guinea',
4894 'PY': 'Paraguay',
4895 'PE': 'Peru',
4896 'PH': 'Philippines',
4897 'PN': 'Pitcairn',
4898 'PL': 'Poland',
4899 'PT': 'Portugal',
4900 'PR': 'Puerto Rico',
4901 'QA': 'Qatar',
4902 'RE': 'RĆ©union',
4903 'RO': 'Romania',
4904 'RU': 'Russian Federation',
4905 'RW': 'Rwanda',
4906 'BL': 'Saint BarthƩlemy',
4907 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4908 'KN': 'Saint Kitts and Nevis',
4909 'LC': 'Saint Lucia',
4910 'MF': 'Saint Martin (French part)',
4911 'PM': 'Saint Pierre and Miquelon',
4912 'VC': 'Saint Vincent and the Grenadines',
4913 'WS': 'Samoa',
4914 'SM': 'San Marino',
4915 'ST': 'Sao Tome and Principe',
4916 'SA': 'Saudi Arabia',
4917 'SN': 'Senegal',
4918 'RS': 'Serbia',
4919 'SC': 'Seychelles',
4920 'SL': 'Sierra Leone',
4921 'SG': 'Singapore',
4922 'SX': 'Sint Maarten (Dutch part)',
4923 'SK': 'Slovakia',
4924 'SI': 'Slovenia',
4925 'SB': 'Solomon Islands',
4926 'SO': 'Somalia',
4927 'ZA': 'South Africa',
4928 'GS': 'South Georgia and the South Sandwich Islands',
4929 'SS': 'South Sudan',
4930 'ES': 'Spain',
4931 'LK': 'Sri Lanka',
4932 'SD': 'Sudan',
4933 'SR': 'Suriname',
4934 'SJ': 'Svalbard and Jan Mayen',
4935 'SZ': 'Swaziland',
4936 'SE': 'Sweden',
4937 'CH': 'Switzerland',
4938 'SY': 'Syrian Arab Republic',
4939 'TW': 'Taiwan, Province of China',
4940 'TJ': 'Tajikistan',
4941 'TZ': 'Tanzania, United Republic of',
4942 'TH': 'Thailand',
4943 'TL': 'Timor-Leste',
4944 'TG': 'Togo',
4945 'TK': 'Tokelau',
4946 'TO': 'Tonga',
4947 'TT': 'Trinidad and Tobago',
4948 'TN': 'Tunisia',
4949 'TR': 'Turkey',
4950 'TM': 'Turkmenistan',
4951 'TC': 'Turks and Caicos Islands',
4952 'TV': 'Tuvalu',
4953 'UG': 'Uganda',
4954 'UA': 'Ukraine',
4955 'AE': 'United Arab Emirates',
4956 'GB': 'United Kingdom',
4957 'US': 'United States',
4958 'UM': 'United States Minor Outlying Islands',
4959 'UY': 'Uruguay',
4960 'UZ': 'Uzbekistan',
4961 'VU': 'Vanuatu',
4962 'VE': 'Venezuela, Bolivarian Republic of',
4963 'VN': 'Viet Nam',
4964 'VG': 'Virgin Islands, British',
4965 'VI': 'Virgin Islands, U.S.',
4966 'WF': 'Wallis and Futuna',
4967 'EH': 'Western Sahara',
4968 'YE': 'Yemen',
4969 'ZM': 'Zambia',
4970 'ZW': 'Zimbabwe',
4971 }
4972
4973 @classmethod
4974 def short2full(cls, code):
4975 """Convert an ISO 3166-2 country code to the corresponding full name"""
4976 return cls._country_map.get(code.upper())
4977
4978
4979 class GeoUtils(object):
4980 # Major IPv4 address blocks per country
4981 _country_ip_map = {
4982 'AD': '85.94.160.0/19',
4983 'AE': '94.200.0.0/13',
4984 'AF': '149.54.0.0/17',
4985 'AG': '209.59.64.0/18',
4986 'AI': '204.14.248.0/21',
4987 'AL': '46.99.0.0/16',
4988 'AM': '46.70.0.0/15',
4989 'AO': '105.168.0.0/13',
4990 'AP': '159.117.192.0/21',
4991 'AR': '181.0.0.0/12',
4992 'AS': '202.70.112.0/20',
4993 'AT': '84.112.0.0/13',
4994 'AU': '1.128.0.0/11',
4995 'AW': '181.41.0.0/18',
4996 'AZ': '5.191.0.0/16',
4997 'BA': '31.176.128.0/17',
4998 'BB': '65.48.128.0/17',
4999 'BD': '114.130.0.0/16',
5000 'BE': '57.0.0.0/8',
5001 'BF': '129.45.128.0/17',
5002 'BG': '95.42.0.0/15',
5003 'BH': '37.131.0.0/17',
5004 'BI': '154.117.192.0/18',
5005 'BJ': '137.255.0.0/16',
5006 'BL': '192.131.134.0/24',
5007 'BM': '196.12.64.0/18',
5008 'BN': '156.31.0.0/16',
5009 'BO': '161.56.0.0/16',
5010 'BQ': '161.0.80.0/20',
5011 'BR': '152.240.0.0/12',
5012 'BS': '24.51.64.0/18',
5013 'BT': '119.2.96.0/19',
5014 'BW': '168.167.0.0/16',
5015 'BY': '178.120.0.0/13',
5016 'BZ': '179.42.192.0/18',
5017 'CA': '99.224.0.0/11',
5018 'CD': '41.243.0.0/16',
5019 'CF': '196.32.200.0/21',
5020 'CG': '197.214.128.0/17',
5021 'CH': '85.0.0.0/13',
5022 'CI': '154.232.0.0/14',
5023 'CK': '202.65.32.0/19',
5024 'CL': '152.172.0.0/14',
5025 'CM': '165.210.0.0/15',
5026 'CN': '36.128.0.0/10',
5027 'CO': '181.240.0.0/12',
5028 'CR': '201.192.0.0/12',
5029 'CU': '152.206.0.0/15',
5030 'CV': '165.90.96.0/19',
5031 'CW': '190.88.128.0/17',
5032 'CY': '46.198.0.0/15',
5033 'CZ': '88.100.0.0/14',
5034 'DE': '53.0.0.0/8',
5035 'DJ': '197.241.0.0/17',
5036 'DK': '87.48.0.0/12',
5037 'DM': '192.243.48.0/20',
5038 'DO': '152.166.0.0/15',
5039 'DZ': '41.96.0.0/12',
5040 'EC': '186.68.0.0/15',
5041 'EE': '90.190.0.0/15',
5042 'EG': '156.160.0.0/11',
5043 'ER': '196.200.96.0/20',
5044 'ES': '88.0.0.0/11',
5045 'ET': '196.188.0.0/14',
5046 'EU': '2.16.0.0/13',
5047 'FI': '91.152.0.0/13',
5048 'FJ': '144.120.0.0/16',
5049 'FM': '119.252.112.0/20',
5050 'FO': '88.85.32.0/19',
5051 'FR': '90.0.0.0/9',
5052 'GA': '41.158.0.0/15',
5053 'GB': '25.0.0.0/8',
5054 'GD': '74.122.88.0/21',
5055 'GE': '31.146.0.0/16',
5056 'GF': '161.22.64.0/18',
5057 'GG': '62.68.160.0/19',
5058 'GH': '45.208.0.0/14',
5059 'GI': '85.115.128.0/19',
5060 'GL': '88.83.0.0/19',
5061 'GM': '160.182.0.0/15',
5062 'GN': '197.149.192.0/18',
5063 'GP': '104.250.0.0/19',
5064 'GQ': '105.235.224.0/20',
5065 'GR': '94.64.0.0/13',
5066 'GT': '168.234.0.0/16',
5067 'GU': '168.123.0.0/16',
5068 'GW': '197.214.80.0/20',
5069 'GY': '181.41.64.0/18',
5070 'HK': '113.252.0.0/14',
5071 'HN': '181.210.0.0/16',
5072 'HR': '93.136.0.0/13',
5073 'HT': '148.102.128.0/17',
5074 'HU': '84.0.0.0/14',
5075 'ID': '39.192.0.0/10',
5076 'IE': '87.32.0.0/12',
5077 'IL': '79.176.0.0/13',
5078 'IM': '5.62.80.0/20',
5079 'IN': '117.192.0.0/10',
5080 'IO': '203.83.48.0/21',
5081 'IQ': '37.236.0.0/14',
5082 'IR': '2.176.0.0/12',
5083 'IS': '82.221.0.0/16',
5084 'IT': '79.0.0.0/10',
5085 'JE': '87.244.64.0/18',
5086 'JM': '72.27.0.0/17',
5087 'JO': '176.29.0.0/16',
5088 'JP': '126.0.0.0/8',
5089 'KE': '105.48.0.0/12',
5090 'KG': '158.181.128.0/17',
5091 'KH': '36.37.128.0/17',
5092 'KI': '103.25.140.0/22',
5093 'KM': '197.255.224.0/20',
5094 'KN': '198.32.32.0/19',
5095 'KP': '175.45.176.0/22',
5096 'KR': '175.192.0.0/10',
5097 'KW': '37.36.0.0/14',
5098 'KY': '64.96.0.0/15',
5099 'KZ': '2.72.0.0/13',
5100 'LA': '115.84.64.0/18',
5101 'LB': '178.135.0.0/16',
5102 'LC': '192.147.231.0/24',
5103 'LI': '82.117.0.0/19',
5104 'LK': '112.134.0.0/15',
5105 'LR': '41.86.0.0/19',
5106 'LS': '129.232.0.0/17',
5107 'LT': '78.56.0.0/13',
5108 'LU': '188.42.0.0/16',
5109 'LV': '46.109.0.0/16',
5110 'LY': '41.252.0.0/14',
5111 'MA': '105.128.0.0/11',
5112 'MC': '88.209.64.0/18',
5113 'MD': '37.246.0.0/16',
5114 'ME': '178.175.0.0/17',
5115 'MF': '74.112.232.0/21',
5116 'MG': '154.126.0.0/17',
5117 'MH': '117.103.88.0/21',
5118 'MK': '77.28.0.0/15',
5119 'ML': '154.118.128.0/18',
5120 'MM': '37.111.0.0/17',
5121 'MN': '49.0.128.0/17',
5122 'MO': '60.246.0.0/16',
5123 'MP': '202.88.64.0/20',
5124 'MQ': '109.203.224.0/19',
5125 'MR': '41.188.64.0/18',
5126 'MS': '208.90.112.0/22',
5127 'MT': '46.11.0.0/16',
5128 'MU': '105.16.0.0/12',
5129 'MV': '27.114.128.0/18',
5130 'MW': '105.234.0.0/16',
5131 'MX': '187.192.0.0/11',
5132 'MY': '175.136.0.0/13',
5133 'MZ': '197.218.0.0/15',
5134 'NA': '41.182.0.0/16',
5135 'NC': '101.101.0.0/18',
5136 'NE': '197.214.0.0/18',
5137 'NF': '203.17.240.0/22',
5138 'NG': '105.112.0.0/12',
5139 'NI': '186.76.0.0/15',
5140 'NL': '145.96.0.0/11',
5141 'NO': '84.208.0.0/13',
5142 'NP': '36.252.0.0/15',
5143 'NR': '203.98.224.0/19',
5144 'NU': '49.156.48.0/22',
5145 'NZ': '49.224.0.0/14',
5146 'OM': '5.36.0.0/15',
5147 'PA': '186.72.0.0/15',
5148 'PE': '186.160.0.0/14',
5149 'PF': '123.50.64.0/18',
5150 'PG': '124.240.192.0/19',
5151 'PH': '49.144.0.0/13',
5152 'PK': '39.32.0.0/11',
5153 'PL': '83.0.0.0/11',
5154 'PM': '70.36.0.0/20',
5155 'PR': '66.50.0.0/16',
5156 'PS': '188.161.0.0/16',
5157 'PT': '85.240.0.0/13',
5158 'PW': '202.124.224.0/20',
5159 'PY': '181.120.0.0/14',
5160 'QA': '37.210.0.0/15',
5161 'RE': '139.26.0.0/16',
5162 'RO': '79.112.0.0/13',
5163 'RS': '178.220.0.0/14',
5164 'RU': '5.136.0.0/13',
5165 'RW': '105.178.0.0/15',
5166 'SA': '188.48.0.0/13',
5167 'SB': '202.1.160.0/19',
5168 'SC': '154.192.0.0/11',
5169 'SD': '154.96.0.0/13',
5170 'SE': '78.64.0.0/12',
5171 'SG': '152.56.0.0/14',
5172 'SI': '188.196.0.0/14',
5173 'SK': '78.98.0.0/15',
5174 'SL': '197.215.0.0/17',
5175 'SM': '89.186.32.0/19',
5176 'SN': '41.82.0.0/15',
5177 'SO': '197.220.64.0/19',
5178 'SR': '186.179.128.0/17',
5179 'SS': '105.235.208.0/21',
5180 'ST': '197.159.160.0/19',
5181 'SV': '168.243.0.0/16',
5182 'SX': '190.102.0.0/20',
5183 'SY': '5.0.0.0/16',
5184 'SZ': '41.84.224.0/19',
5185 'TC': '65.255.48.0/20',
5186 'TD': '154.68.128.0/19',
5187 'TG': '196.168.0.0/14',
5188 'TH': '171.96.0.0/13',
5189 'TJ': '85.9.128.0/18',
5190 'TK': '27.96.24.0/21',
5191 'TL': '180.189.160.0/20',
5192 'TM': '95.85.96.0/19',
5193 'TN': '197.0.0.0/11',
5194 'TO': '175.176.144.0/21',
5195 'TR': '78.160.0.0/11',
5196 'TT': '186.44.0.0/15',
5197 'TV': '202.2.96.0/19',
5198 'TW': '120.96.0.0/11',
5199 'TZ': '156.156.0.0/14',
5200 'UA': '93.72.0.0/13',
5201 'UG': '154.224.0.0/13',
5202 'US': '3.0.0.0/8',
5203 'UY': '167.56.0.0/13',
5204 'UZ': '82.215.64.0/18',
5205 'VA': '212.77.0.0/19',
5206 'VC': '24.92.144.0/20',
5207 'VE': '186.88.0.0/13',
5208 'VG': '172.103.64.0/18',
5209 'VI': '146.226.0.0/16',
5210 'VN': '14.160.0.0/11',
5211 'VU': '202.80.32.0/20',
5212 'WF': '117.20.32.0/21',
5213 'WS': '202.4.32.0/19',
5214 'YE': '134.35.0.0/16',
5215 'YT': '41.242.116.0/22',
5216 'ZA': '41.0.0.0/11',
5217 'ZM': '165.56.0.0/13',
5218 'ZW': '41.85.192.0/19',
5219 }
5220
5221 @classmethod
5222 def random_ipv4(cls, code_or_block):
5223 if len(code_or_block) == 2:
5224 block = cls._country_ip_map.get(code_or_block.upper())
5225 if not block:
5226 return None
5227 else:
5228 block = code_or_block
5229 addr, preflen = block.split('/')
5230 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5231 addr_max = addr_min | (0xffffffff >> int(preflen))
5232 return compat_str(socket.inet_ntoa(
5233 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5234
5235
5236 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5237 def __init__(self, proxies=None):
5238 # Set default handlers
5239 for type in ('http', 'https'):
5240 setattr(self, '%s_open' % type,
5241 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5242 meth(r, proxy, type))
5243 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5244
5245 def proxy_open(self, req, proxy, type):
5246 req_proxy = req.headers.get('Ytdl-request-proxy')
5247 if req_proxy is not None:
5248 proxy = req_proxy
5249 del req.headers['Ytdl-request-proxy']
5250
5251 if proxy == '__noproxy__':
5252 return None # No Proxy
5253 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5254 req.add_header('Ytdl-socks-proxy', proxy)
5255 # youtube-dl's http/https handlers do wrapping the socket with socks
5256 return None
5257 return compat_urllib_request.ProxyHandler.proxy_open(
5258 self, req, proxy, type)
5259
5260
5261 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5262 # released into Public Domain
5263 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5264
5265 def long_to_bytes(n, blocksize=0):
5266 """long_to_bytes(n:long, blocksize:int) : string
5267 Convert a long integer to a byte string.
5268
5269 If optional blocksize is given and greater than zero, pad the front of the
5270 byte string with binary zeros so that the length is a multiple of
5271 blocksize.
5272 """
5273 # after much testing, this algorithm was deemed to be the fastest
5274 s = b''
5275 n = int(n)
5276 while n > 0:
5277 s = compat_struct_pack('>I', n & 0xffffffff) + s
5278 n = n >> 32
5279 # strip off leading zeros
5280 for i in range(len(s)):
5281 if s[i] != b'\000'[0]:
5282 break
5283 else:
5284 # only happens when n == 0
5285 s = b'\000'
5286 i = 0
5287 s = s[i:]
5288 # add back some pad bytes. this could be done more efficiently w.r.t. the
5289 # de-padding being done above, but sigh...
5290 if blocksize > 0 and len(s) % blocksize:
5291 s = (blocksize - len(s) % blocksize) * b'\000' + s
5292 return s
5293
5294
5295 def bytes_to_long(s):
5296 """bytes_to_long(string) : long
5297 Convert a byte string to a long integer.
5298
5299 This is (essentially) the inverse of long_to_bytes().
5300 """
5301 acc = 0
5302 length = len(s)
5303 if length % 4:
5304 extra = (4 - length % 4)
5305 s = b'\000' * extra + s
5306 length = length + extra
5307 for i in range(0, length, 4):
5308 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5309 return acc
5310
5311
5312 def ohdave_rsa_encrypt(data, exponent, modulus):
5313 '''
5314 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5315
5316 Input:
5317 data: data to encrypt, bytes-like object
5318 exponent, modulus: parameter e and N of RSA algorithm, both integer
5319 Output: hex string of encrypted data
5320
5321 Limitation: supports one block encryption only
5322 '''
5323
5324 payload = int(binascii.hexlify(data[::-1]), 16)
5325 encrypted = pow(payload, exponent, modulus)
5326 return '%x' % encrypted
5327
5328
5329 def pkcs1pad(data, length):
5330 """
5331 Padding input data with PKCS#1 scheme
5332
5333 @param {int[]} data input data
5334 @param {int} length target length
5335 @returns {int[]} padded data
5336 """
5337 if len(data) > length - 11:
5338 raise ValueError('Input data too long for PKCS#1 padding')
5339
5340 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5341 return [0, 2] + pseudo_random + [0] + data
5342
5343
5344 def encode_base_n(num, n, table=None):
5345 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5346 if not table:
5347 table = FULL_TABLE[:n]
5348
5349 if n > len(table):
5350 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5351
5352 if num == 0:
5353 return table[0]
5354
5355 ret = ''
5356 while num:
5357 ret = table[num % n] + ret
5358 num = num // n
5359 return ret
5360
5361
5362 def decode_packed_codes(code):
5363 mobj = re.search(PACKED_CODES_RE, code)
5364 obfucasted_code, base, count, symbols = mobj.groups()
5365 base = int(base)
5366 count = int(count)
5367 symbols = symbols.split('|')
5368 symbol_table = {}
5369
5370 while count:
5371 count -= 1
5372 base_n_count = encode_base_n(count, base)
5373 symbol_table[base_n_count] = symbols[count] or base_n_count
5374
5375 return re.sub(
5376 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5377 obfucasted_code)
5378
5379
5380 def parse_m3u8_attributes(attrib):
5381 info = {}
5382 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5383 if val.startswith('"'):
5384 val = val[1:-1]
5385 info[key] = val
5386 return info
5387
5388
5389 def urshift(val, n):
5390 return val >> n if val >= 0 else (val + 0x100000000) >> n
5391
5392
5393 # Based on png2str() written by @gdkchan and improved by @yokrysty
5394 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5395 def decode_png(png_data):
5396 # Reference: https://www.w3.org/TR/PNG/
5397 header = png_data[8:]
5398
5399 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5400 raise IOError('Not a valid PNG file.')
5401
5402 int_map = {1: '>B', 2: '>H', 4: '>I'}
5403 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5404
5405 chunks = []
5406
5407 while header:
5408 length = unpack_integer(header[:4])
5409 header = header[4:]
5410
5411 chunk_type = header[:4]
5412 header = header[4:]
5413
5414 chunk_data = header[:length]
5415 header = header[length:]
5416
5417 header = header[4:] # Skip CRC
5418
5419 chunks.append({
5420 'type': chunk_type,
5421 'length': length,
5422 'data': chunk_data
5423 })
5424
5425 ihdr = chunks[0]['data']
5426
5427 width = unpack_integer(ihdr[:4])
5428 height = unpack_integer(ihdr[4:8])
5429
5430 idat = b''
5431
5432 for chunk in chunks:
5433 if chunk['type'] == b'IDAT':
5434 idat += chunk['data']
5435
5436 if not idat:
5437 raise IOError('Unable to read PNG data.')
5438
5439 decompressed_data = bytearray(zlib.decompress(idat))
5440
5441 stride = width * 3
5442 pixels = []
5443
5444 def _get_pixel(idx):
5445 x = idx % stride
5446 y = idx // stride
5447 return pixels[y][x]
5448
5449 for y in range(height):
5450 basePos = y * (1 + stride)
5451 filter_type = decompressed_data[basePos]
5452
5453 current_row = []
5454
5455 pixels.append(current_row)
5456
5457 for x in range(stride):
5458 color = decompressed_data[1 + basePos + x]
5459 basex = y * stride + x
5460 left = 0
5461 up = 0
5462
5463 if x > 2:
5464 left = _get_pixel(basex - 3)
5465 if y > 0:
5466 up = _get_pixel(basex - stride)
5467
5468 if filter_type == 1: # Sub
5469 color = (color + left) & 0xff
5470 elif filter_type == 2: # Up
5471 color = (color + up) & 0xff
5472 elif filter_type == 3: # Average
5473 color = (color + ((left + up) >> 1)) & 0xff
5474 elif filter_type == 4: # Paeth
5475 a = left
5476 b = up
5477 c = 0
5478
5479 if x > 2 and y > 0:
5480 c = _get_pixel(basex - stride - 3)
5481
5482 p = a + b - c
5483
5484 pa = abs(p - a)
5485 pb = abs(p - b)
5486 pc = abs(p - c)
5487
5488 if pa <= pb and pa <= pc:
5489 color = (color + a) & 0xff
5490 elif pb <= pc:
5491 color = (color + b) & 0xff
5492 else:
5493 color = (color + c) & 0xff
5494
5495 current_row.append(color)
5496
5497 return width, height, pixels
5498
5499
5500 def write_xattr(path, key, value):
5501 # This mess below finds the best xattr tool for the job
5502 try:
5503 # try the pyxattr module...
5504 import xattr
5505
5506 if hasattr(xattr, 'set'): # pyxattr
5507 # Unicode arguments are not supported in python-pyxattr until
5508 # version 0.5.0
5509 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5510 pyxattr_required_version = '0.5.0'
5511 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5512 # TODO: fallback to CLI tools
5513 raise XAttrUnavailableError(
5514 'python-pyxattr is detected but is too old. '
5515 'youtube-dl requires %s or above while your version is %s. '
5516 'Falling back to other xattr implementations' % (
5517 pyxattr_required_version, xattr.__version__))
5518
5519 setxattr = xattr.set
5520 else: # xattr
5521 setxattr = xattr.setxattr
5522
5523 try:
5524 setxattr(path, key, value)
5525 except EnvironmentError as e:
5526 raise XAttrMetadataError(e.errno, e.strerror)
5527
5528 except ImportError:
5529 if compat_os_name == 'nt':
5530 # Write xattrs to NTFS Alternate Data Streams:
5531 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5532 assert ':' not in key
5533 assert os.path.exists(path)
5534
5535 ads_fn = path + ':' + key
5536 try:
5537 with open(ads_fn, 'wb') as f:
5538 f.write(value)
5539 except EnvironmentError as e:
5540 raise XAttrMetadataError(e.errno, e.strerror)
5541 else:
5542 user_has_setfattr = check_executable('setfattr', ['--version'])
5543 user_has_xattr = check_executable('xattr', ['-h'])
5544
5545 if user_has_setfattr or user_has_xattr:
5546
5547 value = value.decode('utf-8')
5548 if user_has_setfattr:
5549 executable = 'setfattr'
5550 opts = ['-n', key, '-v', value]
5551 elif user_has_xattr:
5552 executable = 'xattr'
5553 opts = ['-w', key, value]
5554
5555 cmd = ([encodeFilename(executable, True)]
5556 + [encodeArgument(o) for o in opts]
5557 + [encodeFilename(path, True)])
5558
5559 try:
5560 p = subprocess.Popen(
5561 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5562 except EnvironmentError as e:
5563 raise XAttrMetadataError(e.errno, e.strerror)
5564 stdout, stderr = p.communicate()
5565 stderr = stderr.decode('utf-8', 'replace')
5566 if p.returncode != 0:
5567 raise XAttrMetadataError(p.returncode, stderr)
5568
5569 else:
5570 # On Unix, and can't find pyxattr, setfattr, or xattr.
5571 if sys.platform.startswith('linux'):
5572 raise XAttrUnavailableError(
5573 "Couldn't find a tool to set the xattrs. "
5574 "Install either the python 'pyxattr' or 'xattr' "
5575 "modules, or the GNU 'attr' package "
5576 "(which contains the 'setfattr' tool).")
5577 else:
5578 raise XAttrUnavailableError(
5579 "Couldn't find a tool to set the xattrs. "
5580 "Install either the python 'xattr' module, "
5581 "or the 'xattr' binary.")
5582
5583
5584 def random_birthday(year_field, month_field, day_field):
5585 start_date = datetime.date(1950, 1, 1)
5586 end_date = datetime.date(1995, 12, 31)
5587 offset = random.randint(0, (end_date - start_date).days)
5588 random_date = start_date + datetime.timedelta(offset)
5589 return {
5590 year_field: str(random_date.year),
5591 month_field: str(random_date.month),
5592 day_field: str(random_date.day),
5593 }