2 from __future__
import unicode_literals
22 import xml
.etree
.ElementTree
26 import urllib
.request
as compat_urllib_request
27 except ImportError: # Python 2
28 import urllib2
as compat_urllib_request
31 import urllib
.error
as compat_urllib_error
32 except ImportError: # Python 2
33 import urllib2
as compat_urllib_error
36 import urllib
.parse
as compat_urllib_parse
37 except ImportError: # Python 2
38 import urllib
as compat_urllib_parse
41 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
42 except ImportError: # Python 2
43 from urlparse
import urlparse
as compat_urllib_parse_urlparse
46 import urllib
.parse
as compat_urlparse
47 except ImportError: # Python 2
48 import urlparse
as compat_urlparse
51 import urllib
.response
as compat_urllib_response
52 except ImportError: # Python 2
53 import urllib
as compat_urllib_response
56 import http
.cookiejar
as compat_cookiejar
57 except ImportError: # Python 2
58 import cookielib
as compat_cookiejar
60 if sys
.version_info
[0] == 2:
61 class compat_cookiejar_Cookie(compat_cookiejar
.Cookie
):
62 def __init__(self
, version
, name
, value
, *args
, **kwargs
):
63 if isinstance(name
, compat_str
):
65 if isinstance(value
, compat_str
):
66 value
= value
.encode()
67 compat_cookiejar
.Cookie
.__init
__(self
, version
, name
, value
, *args
, **kwargs
)
69 compat_cookiejar_Cookie
= compat_cookiejar
.Cookie
72 import http
.cookies
as compat_cookies
73 except ImportError: # Python 2
74 import Cookie
as compat_cookies
77 import html
.entities
as compat_html_entities
78 except ImportError: # Python 2
79 import htmlentitydefs
as compat_html_entities
82 compat_html_entities_html5
= compat_html_entities
.html5
83 except AttributeError:
84 # Copied from CPython 3.5.1 html/entities.py
85 compat_html_entities_html5
= {
94 'acE;': '\u223e\u0333',
108 'Afr;': '\U0001d504',
109 'afr;': '\U0001d51e',
114 'alefsym;': '\u2135',
129 'andslope;': '\u2a58',
135 'angmsdaa;': '\u29a8',
136 'angmsdab;': '\u29a9',
137 'angmsdac;': '\u29aa',
138 'angmsdad;': '\u29ab',
139 'angmsdae;': '\u29ac',
140 'angmsdaf;': '\u29ad',
141 'angmsdag;': '\u29ae',
142 'angmsdah;': '\u29af',
144 'angrtvb;': '\u22be',
145 'angrtvbd;': '\u299d',
148 'angzarr;': '\u237c',
151 'Aopf;': '\U0001d538',
152 'aopf;': '\U0001d552',
159 'ApplyFunction;': '\u2061',
161 'approxeq;': '\u224a',
166 'Ascr;': '\U0001d49c',
167 'ascr;': '\U0001d4b6',
171 'asympeq;': '\u224d',
180 'awconint;': '\u2233',
182 'backcong;': '\u224c',
183 'backepsilon;': '\u03f6',
184 'backprime;': '\u2035',
185 'backsim;': '\u223d',
186 'backsimeq;': '\u22cd',
187 'Backslash;': '\u2216',
192 'barwedge;': '\u2305',
194 'bbrktbrk;': '\u23b6',
200 'Because;': '\u2235',
201 'because;': '\u2235',
202 'bemptyv;': '\u29b0',
205 'Bernoullis;': '\u212c',
209 'between;': '\u226c',
210 'Bfr;': '\U0001d505',
211 'bfr;': '\U0001d51f',
213 'bigcirc;': '\u25ef',
215 'bigodot;': '\u2a00',
216 'bigoplus;': '\u2a01',
217 'bigotimes;': '\u2a02',
218 'bigsqcup;': '\u2a06',
219 'bigstar;': '\u2605',
220 'bigtriangledown;': '\u25bd',
221 'bigtriangleup;': '\u25b3',
222 'biguplus;': '\u2a04',
224 'bigwedge;': '\u22c0',
226 'blacklozenge;': '\u29eb',
227 'blacksquare;': '\u25aa',
228 'blacktriangle;': '\u25b4',
229 'blacktriangledown;': '\u25be',
230 'blacktriangleleft;': '\u25c2',
231 'blacktriangleright;': '\u25b8',
238 'bnequiv;': '\u2261\u20e5',
241 'Bopf;': '\U0001d539',
242 'bopf;': '\U0001d553',
265 'boxminus;': '\u229f',
266 'boxplus;': '\u229e',
267 'boxtimes;': '\u22a0',
296 'bscr;': '\U0001d4b7',
302 'bsolhsub;': '\u27c8',
315 'capbrcup;': '\u2a49',
319 'CapitalDifferentialD;': '\u2145',
320 'caps;': '\u2229\ufe00',
323 'Cayleys;': '\u212d',
333 'Cconint;': '\u2230',
335 'ccupssm;': '\u2a50',
341 'cemptyv;': '\u29b2',
344 'CenterDot;': '\xb7',
345 'centerdot;': '\xb7',
347 'cfr;': '\U0001d520',
351 'checkmark;': '\u2713',
357 'circlearrowleft;': '\u21ba',
358 'circlearrowright;': '\u21bb',
359 'circledast;': '\u229b',
360 'circledcirc;': '\u229a',
361 'circleddash;': '\u229d',
362 'CircleDot;': '\u2299',
364 'circledS;': '\u24c8',
365 'CircleMinus;': '\u2296',
366 'CirclePlus;': '\u2295',
367 'CircleTimes;': '\u2297',
370 'cirfnint;': '\u2a10',
372 'cirscir;': '\u29c2',
373 'ClockwiseContourIntegral;': '\u2232',
374 'CloseCurlyDoubleQuote;': '\u201d',
375 'CloseCurlyQuote;': '\u2019',
377 'clubsuit;': '\u2663',
382 'coloneq;': '\u2254',
387 'complement;': '\u2201',
388 'complexes;': '\u2102',
390 'congdot;': '\u2a6d',
391 'Congruent;': '\u2261',
394 'ContourIntegral;': '\u222e',
396 'copf;': '\U0001d554',
398 'Coproduct;': '\u2210',
404 'CounterClockwiseContourIntegral;': '\u2233',
408 'Cscr;': '\U0001d49e',
409 'cscr;': '\U0001d4b8',
415 'cudarrl;': '\u2938',
416 'cudarrr;': '\u2935',
420 'cularrp;': '\u293d',
423 'cupbrcap;': '\u2a48',
429 'cups;': '\u222a\ufe00',
431 'curarrm;': '\u293c',
432 'curlyeqprec;': '\u22de',
433 'curlyeqsucc;': '\u22df',
434 'curlyvee;': '\u22ce',
435 'curlywedge;': '\u22cf',
438 'curvearrowleft;': '\u21b6',
439 'curvearrowright;': '\u21b7',
442 'cwconint;': '\u2232',
454 'dbkarow;': '\u290f',
462 'ddagger;': '\u2021',
464 'DDotrahd;': '\u2911',
465 'ddotseq;': '\u2a77',
471 'demptyv;': '\u29b1',
473 'Dfr;': '\U0001d507',
474 'dfr;': '\U0001d521',
478 'DiacriticalAcute;': '\xb4',
479 'DiacriticalDot;': '\u02d9',
480 'DiacriticalDoubleAcute;': '\u02dd',
481 'DiacriticalGrave;': '`',
482 'DiacriticalTilde;': '\u02dc',
484 'Diamond;': '\u22c4',
485 'diamond;': '\u22c4',
486 'diamondsuit;': '\u2666',
489 'DifferentialD;': '\u2146',
490 'digamma;': '\u03dd',
495 'divideontimes;': '\u22c7',
502 'Dopf;': '\U0001d53b',
503 'dopf;': '\U0001d555',
508 'doteqdot;': '\u2251',
509 'DotEqual;': '\u2250',
510 'dotminus;': '\u2238',
511 'dotplus;': '\u2214',
512 'dotsquare;': '\u22a1',
513 'doublebarwedge;': '\u2306',
514 'DoubleContourIntegral;': '\u222f',
515 'DoubleDot;': '\xa8',
516 'DoubleDownArrow;': '\u21d3',
517 'DoubleLeftArrow;': '\u21d0',
518 'DoubleLeftRightArrow;': '\u21d4',
519 'DoubleLeftTee;': '\u2ae4',
520 'DoubleLongLeftArrow;': '\u27f8',
521 'DoubleLongLeftRightArrow;': '\u27fa',
522 'DoubleLongRightArrow;': '\u27f9',
523 'DoubleRightArrow;': '\u21d2',
524 'DoubleRightTee;': '\u22a8',
525 'DoubleUpArrow;': '\u21d1',
526 'DoubleUpDownArrow;': '\u21d5',
527 'DoubleVerticalBar;': '\u2225',
528 'DownArrow;': '\u2193',
529 'Downarrow;': '\u21d3',
530 'downarrow;': '\u2193',
531 'DownArrowBar;': '\u2913',
532 'DownArrowUpArrow;': '\u21f5',
533 'DownBreve;': '\u0311',
534 'downdownarrows;': '\u21ca',
535 'downharpoonleft;': '\u21c3',
536 'downharpoonright;': '\u21c2',
537 'DownLeftRightVector;': '\u2950',
538 'DownLeftTeeVector;': '\u295e',
539 'DownLeftVector;': '\u21bd',
540 'DownLeftVectorBar;': '\u2956',
541 'DownRightTeeVector;': '\u295f',
542 'DownRightVector;': '\u21c1',
543 'DownRightVectorBar;': '\u2957',
544 'DownTee;': '\u22a4',
545 'DownTeeArrow;': '\u21a7',
546 'drbkarow;': '\u2910',
549 'Dscr;': '\U0001d49f',
550 'dscr;': '\U0001d4b9',
561 'dwangle;': '\u29a6',
564 'dzigrarr;': '\u27ff',
586 'Efr;': '\U0001d508',
587 'efr;': '\U0001d522',
596 'Element;': '\u2208',
597 'elinters;': '\u23e7',
604 'emptyset;': '\u2205',
605 'EmptySmallSquare;': '\u25fb',
607 'EmptyVerySmallSquare;': '\u25ab',
616 'Eopf;': '\U0001d53c',
617 'eopf;': '\U0001d556',
622 'Epsilon;': '\u0395',
623 'epsilon;': '\u03b5',
626 'eqcolon;': '\u2255',
628 'eqslantgtr;': '\u2a96',
629 'eqslantless;': '\u2a95',
632 'EqualTilde;': '\u2242',
634 'Equilibrium;': '\u21cc',
636 'equivDD;': '\u2a78',
637 'eqvparsl;': '\u29e5',
659 'expectation;': '\u2130',
660 'ExponentialE;': '\u2147',
661 'exponentiale;': '\u2147',
662 'fallingdotseq;': '\u2252',
669 'Ffr;': '\U0001d509',
670 'ffr;': '\U0001d523',
672 'FilledSmallSquare;': '\u25fc',
673 'FilledVerySmallSquare;': '\u25aa',
679 'Fopf;': '\U0001d53d',
680 'fopf;': '\U0001d557',
685 'Fouriertrf;': '\u2131',
686 'fpartint;': '\u2a0d',
708 'fscr;': '\U0001d4bb',
730 'geqslant;': '\u2a7e',
734 'gesdoto;': '\u2a82',
735 'gesdotol;': '\u2a84',
736 'gesl;': '\u22db\ufe00',
738 'Gfr;': '\U0001d50a',
739 'gfr;': '\U0001d524',
751 'gnapprox;': '\u2a8a',
757 'Gopf;': '\U0001d53e',
758 'gopf;': '\U0001d558',
760 'GreaterEqual;': '\u2265',
761 'GreaterEqualLess;': '\u22db',
762 'GreaterFullEqual;': '\u2267',
763 'GreaterGreater;': '\u2aa2',
764 'GreaterLess;': '\u2277',
765 'GreaterSlantEqual;': '\u2a7e',
766 'GreaterTilde;': '\u2273',
767 'Gscr;': '\U0001d4a2',
781 'gtquest;': '\u2a7c',
782 'gtrapprox;': '\u2a86',
785 'gtreqless;': '\u22db',
786 'gtreqqless;': '\u2a8c',
787 'gtrless;': '\u2277',
789 'gvertneqq;': '\u2269\ufe00',
790 'gvnE;': '\u2269\ufe00',
799 'harrcir;': '\u2948',
806 'heartsuit;': '\u2665',
810 'hfr;': '\U0001d525',
811 'HilbertSpace;': '\u210b',
812 'hksearow;': '\u2925',
813 'hkswarow;': '\u2926',
816 'hookleftarrow;': '\u21a9',
817 'hookrightarrow;': '\u21aa',
819 'hopf;': '\U0001d559',
821 'HorizontalLine;': '\u2500',
823 'hscr;': '\U0001d4bd',
827 'HumpDownHump;': '\u224e',
828 'HumpEqual;': '\u224f',
849 'ifr;': '\U0001d526',
865 'ImaginaryI;': '\u2148',
866 'imagline;': '\u2110',
867 'imagpart;': '\u2111',
871 'Implies;': '\u21d2',
875 'infintie;': '\u29dd',
880 'integers;': '\u2124',
881 'Integral;': '\u222b',
882 'intercal;': '\u22ba',
883 'Intersection;': '\u22c2',
884 'intlarhk;': '\u2a17',
885 'intprod;': '\u2a3c',
886 'InvisibleComma;': '\u2063',
887 'InvisibleTimes;': '\u2062',
892 'Iopf;': '\U0001d540',
893 'iopf;': '\U0001d55a',
900 'iscr;': '\U0001d4be',
902 'isindot;': '\u22f5',
920 'Jfr;': '\U0001d50d',
921 'jfr;': '\U0001d527',
923 'Jopf;': '\U0001d541',
924 'jopf;': '\U0001d55b',
925 'Jscr;': '\U0001d4a5',
926 'jscr;': '\U0001d4bf',
938 'Kfr;': '\U0001d50e',
939 'kfr;': '\U0001d528',
945 'Kopf;': '\U0001d542',
946 'kopf;': '\U0001d55c',
947 'Kscr;': '\U0001d4a6',
948 'kscr;': '\U0001d4c0',
952 'laemptyv;': '\u29b4',
961 'Laplacetrf;': '\u2112',
968 'larrbfs;': '\u291f',
973 'larrsim;': '\u2973',
979 'lates;': '\u2aad\ufe00',
986 'lbrksld;': '\u298f',
987 'lbrkslu;': '\u298d',
999 'ldrdhar;': '\u2967',
1000 'ldrushar;': '\u294b',
1004 'LeftAngleBracket;': '\u27e8',
1005 'LeftArrow;': '\u2190',
1006 'Leftarrow;': '\u21d0',
1007 'leftarrow;': '\u2190',
1008 'LeftArrowBar;': '\u21e4',
1009 'LeftArrowRightArrow;': '\u21c6',
1010 'leftarrowtail;': '\u21a2',
1011 'LeftCeiling;': '\u2308',
1012 'LeftDoubleBracket;': '\u27e6',
1013 'LeftDownTeeVector;': '\u2961',
1014 'LeftDownVector;': '\u21c3',
1015 'LeftDownVectorBar;': '\u2959',
1016 'LeftFloor;': '\u230a',
1017 'leftharpoondown;': '\u21bd',
1018 'leftharpoonup;': '\u21bc',
1019 'leftleftarrows;': '\u21c7',
1020 'LeftRightArrow;': '\u2194',
1021 'Leftrightarrow;': '\u21d4',
1022 'leftrightarrow;': '\u2194',
1023 'leftrightarrows;': '\u21c6',
1024 'leftrightharpoons;': '\u21cb',
1025 'leftrightsquigarrow;': '\u21ad',
1026 'LeftRightVector;': '\u294e',
1027 'LeftTee;': '\u22a3',
1028 'LeftTeeArrow;': '\u21a4',
1029 'LeftTeeVector;': '\u295a',
1030 'leftthreetimes;': '\u22cb',
1031 'LeftTriangle;': '\u22b2',
1032 'LeftTriangleBar;': '\u29cf',
1033 'LeftTriangleEqual;': '\u22b4',
1034 'LeftUpDownVector;': '\u2951',
1035 'LeftUpTeeVector;': '\u2960',
1036 'LeftUpVector;': '\u21bf',
1037 'LeftUpVectorBar;': '\u2958',
1038 'LeftVector;': '\u21bc',
1039 'LeftVectorBar;': '\u2952',
1044 'leqslant;': '\u2a7d',
1047 'lesdot;': '\u2a7f',
1048 'lesdoto;': '\u2a81',
1049 'lesdotor;': '\u2a83',
1050 'lesg;': '\u22da\ufe00',
1051 'lesges;': '\u2a93',
1052 'lessapprox;': '\u2a85',
1053 'lessdot;': '\u22d6',
1054 'lesseqgtr;': '\u22da',
1055 'lesseqqgtr;': '\u2a8b',
1056 'LessEqualGreater;': '\u22da',
1057 'LessFullEqual;': '\u2266',
1058 'LessGreater;': '\u2276',
1059 'lessgtr;': '\u2276',
1060 'LessLess;': '\u2aa1',
1061 'lesssim;': '\u2272',
1062 'LessSlantEqual;': '\u2a7d',
1063 'LessTilde;': '\u2272',
1064 'lfisht;': '\u297c',
1065 'lfloor;': '\u230a',
1066 'Lfr;': '\U0001d50f',
1067 'lfr;': '\U0001d529',
1073 'lharul;': '\u296a',
1080 'llcorner;': '\u231e',
1081 'Lleftarrow;': '\u21da',
1082 'llhard;': '\u296b',
1084 'Lmidot;': '\u013f',
1085 'lmidot;': '\u0140',
1086 'lmoust;': '\u23b0',
1087 'lmoustache;': '\u23b0',
1089 'lnapprox;': '\u2a89',
1098 'LongLeftArrow;': '\u27f5',
1099 'Longleftarrow;': '\u27f8',
1100 'longleftarrow;': '\u27f5',
1101 'LongLeftRightArrow;': '\u27f7',
1102 'Longleftrightarrow;': '\u27fa',
1103 'longleftrightarrow;': '\u27f7',
1104 'longmapsto;': '\u27fc',
1105 'LongRightArrow;': '\u27f6',
1106 'Longrightarrow;': '\u27f9',
1107 'longrightarrow;': '\u27f6',
1108 'looparrowleft;': '\u21ab',
1109 'looparrowright;': '\u21ac',
1111 'Lopf;': '\U0001d543',
1112 'lopf;': '\U0001d55d',
1113 'loplus;': '\u2a2d',
1114 'lotimes;': '\u2a34',
1115 'lowast;': '\u2217',
1117 'LowerLeftArrow;': '\u2199',
1118 'LowerRightArrow;': '\u2198',
1120 'lozenge;': '\u25ca',
1123 'lparlt;': '\u2993',
1125 'lrcorner;': '\u231f',
1127 'lrhard;': '\u296d',
1130 'lsaquo;': '\u2039',
1132 'lscr;': '\U0001d4c1',
1140 'lsquor;': '\u201a',
1141 'Lstrok;': '\u0141',
1142 'lstrok;': '\u0142',
1151 'lthree;': '\u22cb',
1152 'ltimes;': '\u22c9',
1153 'ltlarr;': '\u2976',
1154 'ltquest;': '\u2a7b',
1158 'ltrPar;': '\u2996',
1159 'lurdshar;': '\u294a',
1160 'luruhar;': '\u2966',
1161 'lvertneqq;': '\u2268\ufe00',
1162 'lvnE;': '\u2268\ufe00',
1167 'maltese;': '\u2720',
1170 'mapsto;': '\u21a6',
1171 'mapstodown;': '\u21a7',
1172 'mapstoleft;': '\u21a4',
1173 'mapstoup;': '\u21a5',
1174 'marker;': '\u25ae',
1175 'mcomma;': '\u2a29',
1180 'measuredangle;': '\u2221',
1181 'MediumSpace;': '\u205f',
1182 'Mellintrf;': '\u2133',
1183 'Mfr;': '\U0001d510',
1184 'mfr;': '\U0001d52a',
1190 'midcir;': '\u2af0',
1194 'minusb;': '\u229f',
1195 'minusd;': '\u2238',
1196 'minusdu;': '\u2a2a',
1197 'MinusPlus;': '\u2213',
1200 'mnplus;': '\u2213',
1201 'models;': '\u22a7',
1202 'Mopf;': '\U0001d544',
1203 'mopf;': '\U0001d55e',
1206 'mscr;': '\U0001d4c2',
1207 'mstpos;': '\u223e',
1210 'multimap;': '\u22b8',
1213 'Nacute;': '\u0143',
1214 'nacute;': '\u0144',
1215 'nang;': '\u2220\u20d2',
1217 'napE;': '\u2a70\u0338',
1218 'napid;': '\u224b\u0338',
1220 'napprox;': '\u2249',
1222 'natural;': '\u266e',
1223 'naturals;': '\u2115',
1226 'nbump;': '\u224e\u0338',
1227 'nbumpe;': '\u224f\u0338',
1229 'Ncaron;': '\u0147',
1230 'ncaron;': '\u0148',
1231 'Ncedil;': '\u0145',
1232 'ncedil;': '\u0146',
1234 'ncongdot;': '\u2a6d\u0338',
1240 'nearhk;': '\u2924',
1243 'nearrow;': '\u2197',
1244 'nedot;': '\u2250\u0338',
1245 'NegativeMediumSpace;': '\u200b',
1246 'NegativeThickSpace;': '\u200b',
1247 'NegativeThinSpace;': '\u200b',
1248 'NegativeVeryThinSpace;': '\u200b',
1249 'nequiv;': '\u2262',
1250 'nesear;': '\u2928',
1251 'nesim;': '\u2242\u0338',
1252 'NestedGreaterGreater;': '\u226b',
1253 'NestedLessLess;': '\u226a',
1255 'nexist;': '\u2204',
1256 'nexists;': '\u2204',
1257 'Nfr;': '\U0001d511',
1258 'nfr;': '\U0001d52b',
1259 'ngE;': '\u2267\u0338',
1262 'ngeqq;': '\u2267\u0338',
1263 'ngeqslant;': '\u2a7e\u0338',
1264 'nges;': '\u2a7e\u0338',
1265 'nGg;': '\u22d9\u0338',
1267 'nGt;': '\u226b\u20d2',
1270 'nGtv;': '\u226b\u0338',
1283 'nlE;': '\u2266\u0338',
1285 'nLeftarrow;': '\u21cd',
1286 'nleftarrow;': '\u219a',
1287 'nLeftrightarrow;': '\u21ce',
1288 'nleftrightarrow;': '\u21ae',
1290 'nleqq;': '\u2266\u0338',
1291 'nleqslant;': '\u2a7d\u0338',
1292 'nles;': '\u2a7d\u0338',
1294 'nLl;': '\u22d8\u0338',
1296 'nLt;': '\u226a\u20d2',
1299 'nltrie;': '\u22ec',
1300 'nLtv;': '\u226a\u0338',
1302 'NoBreak;': '\u2060',
1303 'NonBreakingSpace;': '\xa0',
1305 'nopf;': '\U0001d55f',
1309 'NotCongruent;': '\u2262',
1310 'NotCupCap;': '\u226d',
1311 'NotDoubleVerticalBar;': '\u2226',
1312 'NotElement;': '\u2209',
1313 'NotEqual;': '\u2260',
1314 'NotEqualTilde;': '\u2242\u0338',
1315 'NotExists;': '\u2204',
1316 'NotGreater;': '\u226f',
1317 'NotGreaterEqual;': '\u2271',
1318 'NotGreaterFullEqual;': '\u2267\u0338',
1319 'NotGreaterGreater;': '\u226b\u0338',
1320 'NotGreaterLess;': '\u2279',
1321 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1322 'NotGreaterTilde;': '\u2275',
1323 'NotHumpDownHump;': '\u224e\u0338',
1324 'NotHumpEqual;': '\u224f\u0338',
1326 'notindot;': '\u22f5\u0338',
1327 'notinE;': '\u22f9\u0338',
1328 'notinva;': '\u2209',
1329 'notinvb;': '\u22f7',
1330 'notinvc;': '\u22f6',
1331 'NotLeftTriangle;': '\u22ea',
1332 'NotLeftTriangleBar;': '\u29cf\u0338',
1333 'NotLeftTriangleEqual;': '\u22ec',
1334 'NotLess;': '\u226e',
1335 'NotLessEqual;': '\u2270',
1336 'NotLessGreater;': '\u2278',
1337 'NotLessLess;': '\u226a\u0338',
1338 'NotLessSlantEqual;': '\u2a7d\u0338',
1339 'NotLessTilde;': '\u2274',
1340 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1341 'NotNestedLessLess;': '\u2aa1\u0338',
1343 'notniva;': '\u220c',
1344 'notnivb;': '\u22fe',
1345 'notnivc;': '\u22fd',
1346 'NotPrecedes;': '\u2280',
1347 'NotPrecedesEqual;': '\u2aaf\u0338',
1348 'NotPrecedesSlantEqual;': '\u22e0',
1349 'NotReverseElement;': '\u220c',
1350 'NotRightTriangle;': '\u22eb',
1351 'NotRightTriangleBar;': '\u29d0\u0338',
1352 'NotRightTriangleEqual;': '\u22ed',
1353 'NotSquareSubset;': '\u228f\u0338',
1354 'NotSquareSubsetEqual;': '\u22e2',
1355 'NotSquareSuperset;': '\u2290\u0338',
1356 'NotSquareSupersetEqual;': '\u22e3',
1357 'NotSubset;': '\u2282\u20d2',
1358 'NotSubsetEqual;': '\u2288',
1359 'NotSucceeds;': '\u2281',
1360 'NotSucceedsEqual;': '\u2ab0\u0338',
1361 'NotSucceedsSlantEqual;': '\u22e1',
1362 'NotSucceedsTilde;': '\u227f\u0338',
1363 'NotSuperset;': '\u2283\u20d2',
1364 'NotSupersetEqual;': '\u2289',
1365 'NotTilde;': '\u2241',
1366 'NotTildeEqual;': '\u2244',
1367 'NotTildeFullEqual;': '\u2247',
1368 'NotTildeTilde;': '\u2249',
1369 'NotVerticalBar;': '\u2224',
1371 'nparallel;': '\u2226',
1372 'nparsl;': '\u2afd\u20e5',
1373 'npart;': '\u2202\u0338',
1374 'npolint;': '\u2a14',
1376 'nprcue;': '\u22e0',
1377 'npre;': '\u2aaf\u0338',
1379 'npreceq;': '\u2aaf\u0338',
1382 'nrarrc;': '\u2933\u0338',
1383 'nrarrw;': '\u219d\u0338',
1384 'nRightarrow;': '\u21cf',
1385 'nrightarrow;': '\u219b',
1387 'nrtrie;': '\u22ed',
1389 'nsccue;': '\u22e1',
1390 'nsce;': '\u2ab0\u0338',
1391 'Nscr;': '\U0001d4a9',
1392 'nscr;': '\U0001d4c3',
1393 'nshortmid;': '\u2224',
1394 'nshortparallel;': '\u2226',
1397 'nsimeq;': '\u2244',
1400 'nsqsube;': '\u22e2',
1401 'nsqsupe;': '\u22e3',
1403 'nsubE;': '\u2ac5\u0338',
1405 'nsubset;': '\u2282\u20d2',
1406 'nsubseteq;': '\u2288',
1407 'nsubseteqq;': '\u2ac5\u0338',
1409 'nsucceq;': '\u2ab0\u0338',
1411 'nsupE;': '\u2ac6\u0338',
1413 'nsupset;': '\u2283\u20d2',
1414 'nsupseteq;': '\u2289',
1415 'nsupseteqq;': '\u2ac6\u0338',
1422 'ntriangleleft;': '\u22ea',
1423 'ntrianglelefteq;': '\u22ec',
1424 'ntriangleright;': '\u22eb',
1425 'ntrianglerighteq;': '\u22ed',
1429 'numero;': '\u2116',
1431 'nvap;': '\u224d\u20d2',
1432 'nVDash;': '\u22af',
1433 'nVdash;': '\u22ae',
1434 'nvDash;': '\u22ad',
1435 'nvdash;': '\u22ac',
1436 'nvge;': '\u2265\u20d2',
1438 'nvHarr;': '\u2904',
1439 'nvinfin;': '\u29de',
1440 'nvlArr;': '\u2902',
1441 'nvle;': '\u2264\u20d2',
1443 'nvltrie;': '\u22b4\u20d2',
1444 'nvrArr;': '\u2903',
1445 'nvrtrie;': '\u22b5\u20d2',
1446 'nvsim;': '\u223c\u20d2',
1447 'nwarhk;': '\u2923',
1450 'nwarrow;': '\u2196',
1451 'nwnear;': '\u2927',
1465 'Odblac;': '\u0150',
1466 'odblac;': '\u0151',
1469 'odsold;': '\u29bc',
1473 'Ofr;': '\U0001d512',
1474 'ofr;': '\U0001d52c',
1486 'olcross;': '\u29bb',
1493 'Omicron;': '\u039f',
1494 'omicron;': '\u03bf',
1496 'ominus;': '\u2296',
1497 'Oopf;': '\U0001d546',
1498 'oopf;': '\U0001d560',
1500 'OpenCurlyDoubleQuote;': '\u201c',
1501 'OpenCurlyQuote;': '\u2018',
1509 'orderof;': '\u2134',
1514 'origof;': '\u22b6',
1516 'orslope;': '\u2a57',
1519 'Oscr;': '\U0001d4aa',
1530 'Otimes;': '\u2a37',
1531 'otimes;': '\u2297',
1532 'otimesas;': '\u2a36',
1538 'OverBar;': '\u203e',
1539 'OverBrace;': '\u23de',
1540 'OverBracket;': '\u23b4',
1541 'OverParenthesis;': '\u23dc',
1545 'parallel;': '\u2225',
1546 'parsim;': '\u2af3',
1549 'PartialD;': '\u2202',
1554 'permil;': '\u2030',
1556 'pertenk;': '\u2031',
1557 'Pfr;': '\U0001d513',
1558 'pfr;': '\U0001d52d',
1562 'phmmat;': '\u2133',
1566 'pitchfork;': '\u22d4',
1568 'planck;': '\u210f',
1569 'planckh;': '\u210e',
1570 'plankv;': '\u210f',
1572 'plusacir;': '\u2a23',
1574 'pluscir;': '\u2a22',
1575 'plusdo;': '\u2214',
1576 'plusdu;': '\u2a25',
1578 'PlusMinus;': '\xb1',
1581 'plussim;': '\u2a26',
1582 'plustwo;': '\u2a27',
1584 'Poincareplane;': '\u210c',
1585 'pointint;': '\u2a15',
1587 'popf;': '\U0001d561',
1597 'precapprox;': '\u2ab7',
1598 'preccurlyeq;': '\u227c',
1599 'Precedes;': '\u227a',
1600 'PrecedesEqual;': '\u2aaf',
1601 'PrecedesSlantEqual;': '\u227c',
1602 'PrecedesTilde;': '\u227e',
1603 'preceq;': '\u2aaf',
1604 'precnapprox;': '\u2ab9',
1605 'precneqq;': '\u2ab5',
1606 'precnsim;': '\u22e8',
1607 'precsim;': '\u227e',
1610 'primes;': '\u2119',
1613 'prnsim;': '\u22e8',
1615 'Product;': '\u220f',
1616 'profalar;': '\u232e',
1617 'profline;': '\u2312',
1618 'profsurf;': '\u2313',
1620 'Proportion;': '\u2237',
1621 'Proportional;': '\u221d',
1622 'propto;': '\u221d',
1624 'prurel;': '\u22b0',
1625 'Pscr;': '\U0001d4ab',
1626 'pscr;': '\U0001d4c5',
1629 'puncsp;': '\u2008',
1630 'Qfr;': '\U0001d514',
1631 'qfr;': '\U0001d52e',
1634 'qopf;': '\U0001d562',
1635 'qprime;': '\u2057',
1636 'Qscr;': '\U0001d4ac',
1637 'qscr;': '\U0001d4c6',
1638 'quaternions;': '\u210d',
1639 'quatint;': '\u2a16',
1641 'questeq;': '\u225f',
1647 'race;': '\u223d\u0331',
1648 'Racute;': '\u0154',
1649 'racute;': '\u0155',
1651 'raemptyv;': '\u29b3',
1656 'rangle;': '\u27e9',
1662 'rarrap;': '\u2975',
1664 'rarrbfs;': '\u2920',
1666 'rarrfs;': '\u291e',
1667 'rarrhk;': '\u21aa',
1668 'rarrlp;': '\u21ac',
1669 'rarrpl;': '\u2945',
1670 'rarrsim;': '\u2974',
1671 'Rarrtl;': '\u2916',
1672 'rarrtl;': '\u21a3',
1674 'rAtail;': '\u291c',
1675 'ratail;': '\u291a',
1677 'rationals;': '\u211a',
1685 'rbrksld;': '\u298e',
1686 'rbrkslu;': '\u2990',
1687 'Rcaron;': '\u0158',
1688 'rcaron;': '\u0159',
1689 'Rcedil;': '\u0156',
1690 'rcedil;': '\u0157',
1696 'rdldhar;': '\u2969',
1698 'rdquor;': '\u201d',
1702 'realine;': '\u211b',
1703 'realpart;': '\u211c',
1710 'ReverseElement;': '\u220b',
1711 'ReverseEquilibrium;': '\u21cb',
1712 'ReverseUpEquilibrium;': '\u296f',
1713 'rfisht;': '\u297d',
1714 'rfloor;': '\u230b',
1716 'rfr;': '\U0001d52f',
1720 'rharul;': '\u296c',
1724 'RightAngleBracket;': '\u27e9',
1725 'RightArrow;': '\u2192',
1726 'Rightarrow;': '\u21d2',
1727 'rightarrow;': '\u2192',
1728 'RightArrowBar;': '\u21e5',
1729 'RightArrowLeftArrow;': '\u21c4',
1730 'rightarrowtail;': '\u21a3',
1731 'RightCeiling;': '\u2309',
1732 'RightDoubleBracket;': '\u27e7',
1733 'RightDownTeeVector;': '\u295d',
1734 'RightDownVector;': '\u21c2',
1735 'RightDownVectorBar;': '\u2955',
1736 'RightFloor;': '\u230b',
1737 'rightharpoondown;': '\u21c1',
1738 'rightharpoonup;': '\u21c0',
1739 'rightleftarrows;': '\u21c4',
1740 'rightleftharpoons;': '\u21cc',
1741 'rightrightarrows;': '\u21c9',
1742 'rightsquigarrow;': '\u219d',
1743 'RightTee;': '\u22a2',
1744 'RightTeeArrow;': '\u21a6',
1745 'RightTeeVector;': '\u295b',
1746 'rightthreetimes;': '\u22cc',
1747 'RightTriangle;': '\u22b3',
1748 'RightTriangleBar;': '\u29d0',
1749 'RightTriangleEqual;': '\u22b5',
1750 'RightUpDownVector;': '\u294f',
1751 'RightUpTeeVector;': '\u295c',
1752 'RightUpVector;': '\u21be',
1753 'RightUpVectorBar;': '\u2954',
1754 'RightVector;': '\u21c0',
1755 'RightVectorBar;': '\u2953',
1757 'risingdotseq;': '\u2253',
1761 'rmoust;': '\u23b1',
1762 'rmoustache;': '\u23b1',
1769 'ropf;': '\U0001d563',
1770 'roplus;': '\u2a2e',
1771 'rotimes;': '\u2a35',
1772 'RoundImplies;': '\u2970',
1774 'rpargt;': '\u2994',
1775 'rppolint;': '\u2a12',
1777 'Rrightarrow;': '\u21db',
1778 'rsaquo;': '\u203a',
1780 'rscr;': '\U0001d4c7',
1785 'rsquor;': '\u2019',
1786 'rthree;': '\u22cc',
1787 'rtimes;': '\u22ca',
1791 'rtriltri;': '\u29ce',
1792 'RuleDelayed;': '\u29f4',
1793 'ruluhar;': '\u2968',
1795 'Sacute;': '\u015a',
1796 'sacute;': '\u015b',
1801 'Scaron;': '\u0160',
1802 'scaron;': '\u0161',
1806 'Scedil;': '\u015e',
1807 'scedil;': '\u015f',
1812 'scnsim;': '\u22e9',
1813 'scpolint;': '\u2a13',
1820 'searhk;': '\u2925',
1823 'searrow;': '\u2198',
1827 'seswar;': '\u2929',
1828 'setminus;': '\u2216',
1831 'Sfr;': '\U0001d516',
1832 'sfr;': '\U0001d530',
1833 'sfrown;': '\u2322',
1835 'SHCHcy;': '\u0429',
1836 'shchcy;': '\u0449',
1839 'ShortDownArrow;': '\u2193',
1840 'ShortLeftArrow;': '\u2190',
1841 'shortmid;': '\u2223',
1842 'shortparallel;': '\u2225',
1843 'ShortRightArrow;': '\u2192',
1844 'ShortUpArrow;': '\u2191',
1849 'sigmaf;': '\u03c2',
1850 'sigmav;': '\u03c2',
1852 'simdot;': '\u2a6a',
1860 'simplus;': '\u2a24',
1861 'simrarr;': '\u2972',
1863 'SmallCircle;': '\u2218',
1864 'smallsetminus;': '\u2216',
1865 'smashp;': '\u2a33',
1866 'smeparsl;': '\u29e4',
1871 'smtes;': '\u2aac\ufe00',
1872 'SOFTcy;': '\u042c',
1873 'softcy;': '\u044c',
1876 'solbar;': '\u233f',
1877 'Sopf;': '\U0001d54a',
1878 'sopf;': '\U0001d564',
1879 'spades;': '\u2660',
1880 'spadesuit;': '\u2660',
1883 'sqcaps;': '\u2293\ufe00',
1885 'sqcups;': '\u2294\ufe00',
1888 'sqsube;': '\u2291',
1889 'sqsubset;': '\u228f',
1890 'sqsubseteq;': '\u2291',
1892 'sqsupe;': '\u2292',
1893 'sqsupset;': '\u2290',
1894 'sqsupseteq;': '\u2292',
1896 'Square;': '\u25a1',
1897 'square;': '\u25a1',
1898 'SquareIntersection;': '\u2293',
1899 'SquareSubset;': '\u228f',
1900 'SquareSubsetEqual;': '\u2291',
1901 'SquareSuperset;': '\u2290',
1902 'SquareSupersetEqual;': '\u2292',
1903 'SquareUnion;': '\u2294',
1904 'squarf;': '\u25aa',
1907 'Sscr;': '\U0001d4ae',
1908 'sscr;': '\U0001d4c8',
1909 'ssetmn;': '\u2216',
1910 'ssmile;': '\u2323',
1911 'sstarf;': '\u22c6',
1915 'straightepsilon;': '\u03f5',
1916 'straightphi;': '\u03d5',
1920 'subdot;': '\u2abd',
1923 'subedot;': '\u2ac3',
1924 'submult;': '\u2ac1',
1927 'subplus;': '\u2abf',
1928 'subrarr;': '\u2979',
1929 'Subset;': '\u22d0',
1930 'subset;': '\u2282',
1931 'subseteq;': '\u2286',
1932 'subseteqq;': '\u2ac5',
1933 'SubsetEqual;': '\u2286',
1934 'subsetneq;': '\u228a',
1935 'subsetneqq;': '\u2acb',
1936 'subsim;': '\u2ac7',
1937 'subsub;': '\u2ad5',
1938 'subsup;': '\u2ad3',
1940 'succapprox;': '\u2ab8',
1941 'succcurlyeq;': '\u227d',
1942 'Succeeds;': '\u227b',
1943 'SucceedsEqual;': '\u2ab0',
1944 'SucceedsSlantEqual;': '\u227d',
1945 'SucceedsTilde;': '\u227f',
1946 'succeq;': '\u2ab0',
1947 'succnapprox;': '\u2aba',
1948 'succneqq;': '\u2ab6',
1949 'succnsim;': '\u22e9',
1950 'succsim;': '\u227f',
1951 'SuchThat;': '\u220b',
1963 'supdot;': '\u2abe',
1964 'supdsub;': '\u2ad8',
1967 'supedot;': '\u2ac4',
1968 'Superset;': '\u2283',
1969 'SupersetEqual;': '\u2287',
1970 'suphsol;': '\u27c9',
1971 'suphsub;': '\u2ad7',
1972 'suplarr;': '\u297b',
1973 'supmult;': '\u2ac2',
1976 'supplus;': '\u2ac0',
1977 'Supset;': '\u22d1',
1978 'supset;': '\u2283',
1979 'supseteq;': '\u2287',
1980 'supseteqq;': '\u2ac6',
1981 'supsetneq;': '\u228b',
1982 'supsetneqq;': '\u2acc',
1983 'supsim;': '\u2ac8',
1984 'supsub;': '\u2ad4',
1985 'supsup;': '\u2ad6',
1986 'swarhk;': '\u2926',
1989 'swarrow;': '\u2199',
1990 'swnwar;': '\u292a',
1994 'target;': '\u2316',
1998 'Tcaron;': '\u0164',
1999 'tcaron;': '\u0165',
2000 'Tcedil;': '\u0162',
2001 'tcedil;': '\u0163',
2005 'telrec;': '\u2315',
2006 'Tfr;': '\U0001d517',
2007 'tfr;': '\U0001d531',
2008 'there4;': '\u2234',
2009 'Therefore;': '\u2234',
2010 'therefore;': '\u2234',
2013 'thetasym;': '\u03d1',
2014 'thetav;': '\u03d1',
2015 'thickapprox;': '\u2248',
2016 'thicksim;': '\u223c',
2017 'ThickSpace;': '\u205f\u200a',
2018 'thinsp;': '\u2009',
2019 'ThinSpace;': '\u2009',
2021 'thksim;': '\u223c',
2028 'TildeEqual;': '\u2243',
2029 'TildeFullEqual;': '\u2245',
2030 'TildeTilde;': '\u2248',
2033 'timesb;': '\u22a0',
2034 'timesbar;': '\u2a31',
2035 'timesd;': '\u2a30',
2039 'topbot;': '\u2336',
2040 'topcir;': '\u2af1',
2041 'Topf;': '\U0001d54b',
2042 'topf;': '\U0001d565',
2043 'topfork;': '\u2ada',
2045 'tprime;': '\u2034',
2048 'triangle;': '\u25b5',
2049 'triangledown;': '\u25bf',
2050 'triangleleft;': '\u25c3',
2051 'trianglelefteq;': '\u22b4',
2052 'triangleq;': '\u225c',
2053 'triangleright;': '\u25b9',
2054 'trianglerighteq;': '\u22b5',
2055 'tridot;': '\u25ec',
2057 'triminus;': '\u2a3a',
2058 'TripleDot;': '\u20db',
2059 'triplus;': '\u2a39',
2061 'tritime;': '\u2a3b',
2062 'trpezium;': '\u23e2',
2063 'Tscr;': '\U0001d4af',
2064 'tscr;': '\U0001d4c9',
2069 'Tstrok;': '\u0166',
2070 'tstrok;': '\u0167',
2072 'twoheadleftarrow;': '\u219e',
2073 'twoheadrightarrow;': '\u21a0',
2081 'Uarrocir;': '\u2949',
2084 'Ubreve;': '\u016c',
2085 'ubreve;': '\u016d',
2093 'Udblac;': '\u0170',
2094 'udblac;': '\u0171',
2096 'ufisht;': '\u297e',
2097 'Ufr;': '\U0001d518',
2098 'ufr;': '\U0001d532',
2107 'ulcorn;': '\u231c',
2108 'ulcorner;': '\u231c',
2109 'ulcrop;': '\u230f',
2116 'UnderBrace;': '\u23df',
2117 'UnderBracket;': '\u23b5',
2118 'UnderParenthesis;': '\u23dd',
2120 'UnionPlus;': '\u228e',
2123 'Uopf;': '\U0001d54c',
2124 'uopf;': '\U0001d566',
2125 'UpArrow;': '\u2191',
2126 'Uparrow;': '\u21d1',
2127 'uparrow;': '\u2191',
2128 'UpArrowBar;': '\u2912',
2129 'UpArrowDownArrow;': '\u21c5',
2130 'UpDownArrow;': '\u2195',
2131 'Updownarrow;': '\u21d5',
2132 'updownarrow;': '\u2195',
2133 'UpEquilibrium;': '\u296e',
2134 'upharpoonleft;': '\u21bf',
2135 'upharpoonright;': '\u21be',
2137 'UpperLeftArrow;': '\u2196',
2138 'UpperRightArrow;': '\u2197',
2142 'Upsilon;': '\u03a5',
2143 'upsilon;': '\u03c5',
2145 'UpTeeArrow;': '\u21a5',
2146 'upuparrows;': '\u21c8',
2147 'urcorn;': '\u231d',
2148 'urcorner;': '\u231d',
2149 'urcrop;': '\u230e',
2153 'Uscr;': '\U0001d4b0',
2154 'uscr;': '\U0001d4ca',
2156 'Utilde;': '\u0168',
2157 'utilde;': '\u0169',
2165 'uwangle;': '\u29a7',
2166 'vangrt;': '\u299c',
2167 'varepsilon;': '\u03f5',
2168 'varkappa;': '\u03f0',
2169 'varnothing;': '\u2205',
2170 'varphi;': '\u03d5',
2172 'varpropto;': '\u221d',
2175 'varrho;': '\u03f1',
2176 'varsigma;': '\u03c2',
2177 'varsubsetneq;': '\u228a\ufe00',
2178 'varsubsetneqq;': '\u2acb\ufe00',
2179 'varsupsetneq;': '\u228b\ufe00',
2180 'varsupsetneqq;': '\u2acc\ufe00',
2181 'vartheta;': '\u03d1',
2182 'vartriangleleft;': '\u22b2',
2183 'vartriangleright;': '\u22b3',
2193 'Vdashl;': '\u2ae6',
2196 'veebar;': '\u22bb',
2198 'vellip;': '\u22ee',
2199 'Verbar;': '\u2016',
2203 'VerticalBar;': '\u2223',
2204 'VerticalLine;': '|',
2205 'VerticalSeparator;': '\u2758',
2206 'VerticalTilde;': '\u2240',
2207 'VeryThinSpace;': '\u200a',
2208 'Vfr;': '\U0001d519',
2209 'vfr;': '\U0001d533',
2211 'vnsub;': '\u2282\u20d2',
2212 'vnsup;': '\u2283\u20d2',
2213 'Vopf;': '\U0001d54d',
2214 'vopf;': '\U0001d567',
2217 'Vscr;': '\U0001d4b1',
2218 'vscr;': '\U0001d4cb',
2219 'vsubnE;': '\u2acb\ufe00',
2220 'vsubne;': '\u228a\ufe00',
2221 'vsupnE;': '\u2acc\ufe00',
2222 'vsupne;': '\u228b\ufe00',
2223 'Vvdash;': '\u22aa',
2224 'vzigzag;': '\u299a',
2227 'wedbar;': '\u2a5f',
2230 'wedgeq;': '\u2259',
2231 'weierp;': '\u2118',
2232 'Wfr;': '\U0001d51a',
2233 'wfr;': '\U0001d534',
2234 'Wopf;': '\U0001d54e',
2235 'wopf;': '\U0001d568',
2238 'wreath;': '\u2240',
2239 'Wscr;': '\U0001d4b2',
2240 'wscr;': '\U0001d4cc',
2245 'Xfr;': '\U0001d51b',
2246 'xfr;': '\U0001d535',
2256 'Xopf;': '\U0001d54f',
2257 'xopf;': '\U0001d569',
2258 'xoplus;': '\u2a01',
2259 'xotime;': '\u2a02',
2262 'Xscr;': '\U0001d4b3',
2263 'xscr;': '\U0001d4cd',
2264 'xsqcup;': '\u2a06',
2265 'xuplus;': '\u2a04',
2268 'xwedge;': '\u22c0',
2281 'Yfr;': '\U0001d51c',
2282 'yfr;': '\U0001d536',
2285 'Yopf;': '\U0001d550',
2286 'yopf;': '\U0001d56a',
2287 'Yscr;': '\U0001d4b4',
2288 'yscr;': '\U0001d4ce',
2294 'Zacute;': '\u0179',
2295 'zacute;': '\u017a',
2296 'Zcaron;': '\u017d',
2297 'zcaron;': '\u017e',
2302 'zeetrf;': '\u2128',
2303 'ZeroWidthSpace;': '\u200b',
2307 'zfr;': '\U0001d537',
2310 'zigrarr;': '\u21dd',
2312 'zopf;': '\U0001d56b',
2313 'Zscr;': '\U0001d4b5',
2314 'zscr;': '\U0001d4cf',
2320 import http
.client
as compat_http_client
2321 except ImportError: # Python 2
2322 import httplib
as compat_http_client
2325 from urllib
.error
import HTTPError
as compat_HTTPError
2326 except ImportError: # Python 2
2327 from urllib2
import HTTPError
as compat_HTTPError
2330 from urllib
.request
import urlretrieve
as compat_urlretrieve
2331 except ImportError: # Python 2
2332 from urllib
import urlretrieve
as compat_urlretrieve
2335 from html
.parser
import HTMLParser
as compat_HTMLParser
2336 except ImportError: # Python 2
2337 from HTMLParser
import HTMLParser
as compat_HTMLParser
2340 from HTMLParser
import HTMLParseError
as compat_HTMLParseError
2341 except ImportError: # Python <3.4
2343 from html
.parser
import HTMLParseError
as compat_HTMLParseError
2344 except ImportError: # Python >3.4
2346 # HTMLParseError has been deprecated in Python 3.3 and removed in
2347 # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
2348 # and uniform cross-version exceptiong handling
2349 class compat_HTMLParseError(Exception):
2353 from subprocess
import DEVNULL
2354 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
2356 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
2359 import http
.server
as compat_http_server
2361 import BaseHTTPServer
as compat_http_server
2364 compat_str
= unicode # Python 2
2369 from urllib
.parse
import unquote_to_bytes
as compat_urllib_parse_unquote_to_bytes
2370 from urllib
.parse
import unquote
as compat_urllib_parse_unquote
2371 from urllib
.parse
import unquote_plus
as compat_urllib_parse_unquote_plus
2372 except ImportError: # Python 2
2373 _asciire
= (compat_urllib_parse
._asciire
if hasattr(compat_urllib_parse
, '_asciire')
2374 else re
.compile(r
'([\x00-\x7f]+)'))
2376 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2377 # implementations from cpython 3.4.3's stdlib. Python 2's version
2378 # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
2380 def compat_urllib_parse_unquote_to_bytes(string
):
2381 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2382 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2383 # unescaped non-ASCII characters, which URIs should not.
2385 # Is it a string-like object?
2388 if isinstance(string
, compat_str
):
2389 string
= string
.encode('utf-8')
2390 bits
= string
.split(b
'%')
2395 for item
in bits
[1:]:
2397 append(compat_urllib_parse
._hextochr
[item
[:2]])
2402 return b
''.join(res
)
2404 def compat_urllib_parse_unquote(string
, encoding
='utf-8', errors
='replace'):
2405 """Replace %xx escapes by their single-character equivalent. The optional
2406 encoding and errors parameters specify how to decode percent-encoded
2407 sequences into Unicode characters, as accepted by the bytes.decode()
2409 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2410 sequences are replaced by a placeholder character.
2412 unquote('abc%20def') -> 'abc def'.
2414 if '%' not in string
:
2417 if encoding
is None:
2421 bits
= _asciire
.split(string
)
2424 for i
in range(1, len(bits
), 2):
2425 append(compat_urllib_parse_unquote_to_bytes(bits
[i
]).decode(encoding
, errors
))
2429 def compat_urllib_parse_unquote_plus(string
, encoding
='utf-8', errors
='replace'):
2430 """Like unquote(), but also replace plus signs by spaces, as required for
2431 unquoting HTML form values.
2433 unquote_plus('%7e/abc+def') -> '~/abc def'
2435 string
= string
.replace('+', ' ')
2436 return compat_urllib_parse_unquote(string
, encoding
, errors
)
2439 from urllib
.parse
import urlencode
as compat_urllib_parse_urlencode
2440 except ImportError: # Python 2
2441 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2442 # Possible solutions are to either port it from python 3 with all
2443 # the friends or manually ensure input query contains only byte strings.
2444 # We will stick with latter thus recursively encoding the whole query.
2445 def compat_urllib_parse_urlencode(query
, doseq
=0, encoding
='utf-8'):
2447 if isinstance(e
, dict):
2449 elif isinstance(e
, (list, tuple,)):
2450 list_e
= encode_list(e
)
2451 e
= tuple(list_e
) if isinstance(e
, tuple) else list_e
2452 elif isinstance(e
, compat_str
):
2453 e
= e
.encode(encoding
)
2457 return dict((encode_elem(k
), encode_elem(v
)) for k
, v
in d
.items())
2460 return [encode_elem(e
) for e
in l
]
2462 return compat_urllib_parse
.urlencode(encode_elem(query
), doseq
=doseq
)
2465 from urllib
.request
import DataHandler
as compat_urllib_request_DataHandler
2466 except ImportError: # Python < 3.4
2467 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2468 class compat_urllib_request_DataHandler(compat_urllib_request
.BaseHandler
):
2469 def data_open(self
, req
):
2470 # data URLs as specified in RFC 2397.
2472 # ignores POSTed data
2475 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2476 # mediatype := [ type "/" subtype ] *( ";" parameter )
2478 # parameter := attribute "=" value
2479 url
= req
.get_full_url()
2481 scheme
, data
= url
.split(':', 1)
2482 mediatype
, data
= data
.split(',', 1)
2484 # even base64 encoded data URLs might be quoted so unquote in any case:
2485 data
= compat_urllib_parse_unquote_to_bytes(data
)
2486 if mediatype
.endswith(';base64'):
2487 data
= binascii
.a2b_base64(data
)
2488 mediatype
= mediatype
[:-7]
2491 mediatype
= 'text/plain;charset=US-ASCII'
2493 headers
= email
.message_from_string(
2494 'Content-type: %s\nContent-length: %d\n' % (mediatype
, len(data
)))
2496 return compat_urllib_response
.addinfourl(io
.BytesIO(data
), headers
, url
)
2499 compat_basestring
= basestring
# Python 2
2501 compat_basestring
= str
2504 compat_chr
= unichr # Python 2
2509 from xml
.etree
.ElementTree
import ParseError
as compat_xml_parse_error
2510 except ImportError: # Python 2.6
2511 from xml
.parsers
.expat
import ExpatError
as compat_xml_parse_error
2514 etree
= xml
.etree
.ElementTree
2517 class _TreeBuilder(etree
.TreeBuilder
):
2518 def doctype(self
, name
, pubid
, system
):
2523 # xml.etree.ElementTree.Element is a method in Python <=2.6 and
2524 # the following will crash with:
2525 # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
2526 isinstance(None, xml
.etree
.ElementTree
.Element
)
2527 from xml
.etree
.ElementTree
import Element
as compat_etree_Element
2528 except TypeError: # Python <=2.6
2529 from xml
.etree
.ElementTree
import _ElementInterface
as compat_etree_Element
2531 if sys
.version_info
[0] >= 3:
2532 def compat_etree_fromstring(text
):
2533 return etree
.XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder()))
2535 # python 2.x tries to encode unicode strings with ascii (see the
2536 # XMLParser._fixtext method)
2538 _etree_iter
= etree
.Element
.iter
2539 except AttributeError: # Python <=2.6
2540 def _etree_iter(root
):
2541 for el
in root
.findall('*'):
2543 for sub
in _etree_iter(el
):
2546 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2548 def _XML(text
, parser
=None):
2550 parser
= etree
.XMLParser(target
=_TreeBuilder())
2552 return parser
.close()
2554 def _element_factory(*args
, **kwargs
):
2555 el
= etree
.Element(*args
, **kwargs
)
2556 for k
, v
in el
.items():
2557 if isinstance(v
, bytes):
2558 el
.set(k
, v
.decode('utf-8'))
2561 def compat_etree_fromstring(text
):
2562 doc
= _XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder(element_factory
=_element_factory
)))
2563 for el
in _etree_iter(doc
):
2564 if el
.text
is not None and isinstance(el
.text
, bytes):
2565 el
.text
= el
.text
.decode('utf-8')
2568 if hasattr(etree
, 'register_namespace'):
2569 compat_etree_register_namespace
= etree
.register_namespace
2571 def compat_etree_register_namespace(prefix
, uri
):
2572 """Register a namespace prefix.
2573 The registry is global, and any existing mapping for either the
2574 given prefix or the namespace URI will be removed.
2575 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2576 attributes in this namespace will be serialized with prefix if possible.
2577 ValueError is raised if prefix is reserved or is invalid.
2579 if re
.match(r
"ns\d+$", prefix
):
2580 raise ValueError("Prefix format reserved for internal use")
2581 for k
, v
in list(etree
._namespace
_map
.items()):
2582 if k
== uri
or v
== prefix
:
2583 del etree
._namespace
_map
[k
]
2584 etree
._namespace
_map
[uri
] = prefix
2586 if sys
.version_info
< (2, 7):
2587 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2588 # .//node does not match if a node is a direct child of . !
2589 def compat_xpath(xpath
):
2590 if isinstance(xpath
, compat_str
):
2591 xpath
= xpath
.encode('ascii')
2594 compat_xpath
= lambda xpath
: xpath
2597 from urllib
.parse
import parse_qs
as compat_parse_qs
2598 except ImportError: # Python 2
2599 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2600 # Python 2's version is apparently totally broken
2602 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
2603 encoding
='utf-8', errors
='replace'):
2604 qs
, _coerce_result
= qs
, compat_str
2605 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
2607 for name_value
in pairs
:
2608 if not name_value
and not strict_parsing
:
2610 nv
= name_value
.split('=', 1)
2613 raise ValueError('bad query field: %r' % (name_value
,))
2614 # Handle case of a control-name with no equal sign
2615 if keep_blank_values
:
2619 if len(nv
[1]) or keep_blank_values
:
2620 name
= nv
[0].replace('+', ' ')
2621 name
= compat_urllib_parse_unquote(
2622 name
, encoding
=encoding
, errors
=errors
)
2623 name
= _coerce_result(name
)
2624 value
= nv
[1].replace('+', ' ')
2625 value
= compat_urllib_parse_unquote(
2626 value
, encoding
=encoding
, errors
=errors
)
2627 value
= _coerce_result(value
)
2628 r
.append((name
, value
))
2631 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
2632 encoding
='utf-8', errors
='replace'):
2634 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
2635 encoding
=encoding
, errors
=errors
)
2636 for name
, value
in pairs
:
2637 if name
in parsed_result
:
2638 parsed_result
[name
].append(value
)
2640 parsed_result
[name
] = [value
]
2641 return parsed_result
2644 compat_os_name
= os
._name
if os
.name
== 'java' else os
.name
2647 if compat_os_name
== 'nt':
2648 def compat_shlex_quote(s
):
2649 return s
if re
.match(r
'^[-_\w./]+$', s
) else '"%s"' % s
.replace('"', '\\"')
2652 from shlex
import quote
as compat_shlex_quote
2653 except ImportError: # Python < 3.3
2654 def compat_shlex_quote(s
):
2655 if re
.match(r
'^[-_\w./]+$', s
):
2658 return "'" + s
.replace("'", "'\"'\"'") + "'"
2662 args
= shlex
.split('äøę')
2663 assert (isinstance(args
, list)
2664 and isinstance(args
[0], compat_str
)
2665 and args
[0] == 'äøę')
2666 compat_shlex_split
= shlex
.split
2667 except (AssertionError, UnicodeEncodeError):
2668 # Working around shlex issue with unicode strings on some python 2
2669 # versions (see http://bugs.python.org/issue1548891)
2670 def compat_shlex_split(s
, comments
=False, posix
=True):
2671 if isinstance(s
, compat_str
):
2672 s
= s
.encode('utf-8')
2673 return list(map(lambda s
: s
.decode('utf-8'), shlex
.split(s
, comments
, posix
)))
2683 if sys
.version_info
>= (3, 0):
2684 compat_getenv
= os
.getenv
2685 compat_expanduser
= os
.path
.expanduser
2687 def compat_setenv(key
, value
, env
=os
.environ
):
2690 # Environment variables should be decoded with filesystem encoding.
2691 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2693 def compat_getenv(key
, default
=None):
2694 from .utils
import get_filesystem_encoding
2695 env
= os
.getenv(key
, default
)
2697 env
= env
.decode(get_filesystem_encoding())
2700 def compat_setenv(key
, value
, env
=os
.environ
):
2702 from .utils
import get_filesystem_encoding
2703 return v
.encode(get_filesystem_encoding()) if isinstance(v
, compat_str
) else v
2704 env
[encode(key
)] = encode(value
)
2706 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2707 # environment variables with filesystem encoding. We will work around this by
2708 # providing adjusted implementations.
2709 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2710 # for different platforms with correct environment variables decoding.
2712 if compat_os_name
== 'posix':
2713 def compat_expanduser(path
):
2714 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2716 if not path
.startswith('~'):
2718 i
= path
.find('/', 1)
2722 if 'HOME' not in os
.environ
:
2724 userhome
= pwd
.getpwuid(os
.getuid()).pw_dir
2726 userhome
= compat_getenv('HOME')
2730 pwent
= pwd
.getpwnam(path
[1:i
])
2733 userhome
= pwent
.pw_dir
2734 userhome
= userhome
.rstrip('/')
2735 return (userhome
+ path
[i
:]) or '/'
2736 elif compat_os_name
in ('nt', 'ce'):
2737 def compat_expanduser(path
):
2738 """Expand ~ and ~user constructs.
2740 If user or $HOME is unknown, do nothing."""
2744 while i
< n
and path
[i
] not in '/\\':
2747 if 'HOME' in os
.environ
:
2748 userhome
= compat_getenv('HOME')
2749 elif 'USERPROFILE' in os
.environ
:
2750 userhome
= compat_getenv('USERPROFILE')
2751 elif 'HOMEPATH' not in os
.environ
:
2755 drive
= compat_getenv('HOMEDRIVE')
2758 userhome
= os
.path
.join(drive
, compat_getenv('HOMEPATH'))
2761 userhome
= os
.path
.join(os
.path
.dirname(userhome
), path
[1:i
])
2763 return userhome
+ path
[i
:]
2765 compat_expanduser
= os
.path
.expanduser
2768 if compat_os_name
== 'nt' and sys
.version_info
< (3, 8):
2769 # os.path.realpath on Windows does not follow symbolic links
2770 # prior to Python 3.8 (see https://bugs.python.org/issue9949)
2771 def compat_realpath(path
):
2772 while os
.path
.islink(path
):
2773 path
= os
.path
.abspath(os
.readlink(path
))
2776 compat_realpath
= os
.path
.realpath
2779 if sys
.version_info
< (3, 0):
2780 def compat_print(s
):
2781 from .utils
import preferredencoding
2782 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
2784 def compat_print(s
):
2785 assert isinstance(s
, compat_str
)
2789 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
2790 def compat_getpass(prompt
, *args
, **kwargs
):
2791 if isinstance(prompt
, compat_str
):
2792 from .utils
import preferredencoding
2793 prompt
= prompt
.encode(preferredencoding())
2794 return getpass
.getpass(prompt
, *args
, **kwargs
)
2796 compat_getpass
= getpass
.getpass
2799 compat_input
= raw_input
2800 except NameError: # Python 3
2801 compat_input
= input
2803 # Python < 2.6.5 require kwargs to be bytes
2807 _testfunc(**{'x': 0})
2809 def compat_kwargs(kwargs
):
2810 return dict((bytes(k
), v
) for k
, v
in kwargs
.items())
2812 compat_kwargs
= lambda kwargs
: kwargs
2816 compat_numeric_types
= (int, float, long, complex)
2817 except NameError: # Python 3
2818 compat_numeric_types
= (int, float, complex)
2822 compat_integer_types
= (int, long)
2823 except NameError: # Python 3
2824 compat_integer_types
= (int, )
2827 if sys
.version_info
< (2, 7):
2828 def compat_socket_create_connection(address
, timeout
, source_address
=None):
2829 host
, port
= address
2831 for res
in socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
):
2832 af
, socktype
, proto
, canonname
, sa
= res
2835 sock
= socket
.socket(af
, socktype
, proto
)
2836 sock
.settimeout(timeout
)
2838 sock
.bind(source_address
)
2841 except socket
.error
as _
:
2843 if sock
is not None:
2848 raise socket
.error('getaddrinfo returns an empty list')
2850 compat_socket_create_connection
= socket
.create_connection
2853 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
2854 # See http://bugs.python.org/issue9161 for what is broken
2855 def workaround_optparse_bug9161():
2856 op
= optparse
.OptionParser()
2857 og
= optparse
.OptionGroup(op
, 'foo')
2861 real_add_option
= optparse
.OptionGroup
.add_option
2863 def _compat_add_option(self
, *args
, **kwargs
):
2865 v
.encode('ascii', 'replace') if isinstance(v
, compat_str
)
2867 bargs
= [enc(a
) for a
in args
]
2869 (k
, enc(v
)) for k
, v
in kwargs
.items())
2870 return real_add_option(self
, *bargs
, **bkwargs
)
2871 optparse
.OptionGroup
.add_option
= _compat_add_option
2874 if hasattr(shutil
, 'get_terminal_size'): # Python >= 3.3
2875 compat_get_terminal_size
= shutil
.get_terminal_size
2877 _terminal_size
= collections
.namedtuple('terminal_size', ['columns', 'lines'])
2879 def compat_get_terminal_size(fallback
=(80, 24)):
2880 columns
= compat_getenv('COLUMNS')
2882 columns
= int(columns
)
2885 lines
= compat_getenv('LINES')
2891 if columns
is None or lines
is None or columns
<= 0 or lines
<= 0:
2893 sp
= subprocess
.Popen(
2895 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2896 out
, err
= sp
.communicate()
2897 _lines
, _columns
= map(int, out
.split())
2899 _columns
, _lines
= _terminal_size(*fallback
)
2901 if columns
is None or columns
<= 0:
2903 if lines
is None or lines
<= 0:
2905 return _terminal_size(columns
, lines
)
2908 itertools
.count(start
=0, step
=1)
2909 compat_itertools_count
= itertools
.count
2910 except TypeError: # Python 2.6
2911 def compat_itertools_count(start
=0, step
=1):
2917 if sys
.version_info
>= (3, 0):
2918 from tokenize
import tokenize
as compat_tokenize_tokenize
2920 from tokenize
import generate_tokens
as compat_tokenize_tokenize
2924 struct
.pack('!I', 0)
2926 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2927 # See https://bugs.python.org/issue19099
2928 def compat_struct_pack(spec
, *args
):
2929 if isinstance(spec
, compat_str
):
2930 spec
= spec
.encode('ascii')
2931 return struct
.pack(spec
, *args
)
2933 def compat_struct_unpack(spec
, *args
):
2934 if isinstance(spec
, compat_str
):
2935 spec
= spec
.encode('ascii')
2936 return struct
.unpack(spec
, *args
)
2938 class compat_Struct(struct
.Struct
):
2939 def __init__(self
, fmt
):
2940 if isinstance(fmt
, compat_str
):
2941 fmt
= fmt
.encode('ascii')
2942 super(compat_Struct
, self
).__init
__(fmt
)
2944 compat_struct_pack
= struct
.pack
2945 compat_struct_unpack
= struct
.unpack
2946 if platform
.python_implementation() == 'IronPython' and sys
.version_info
< (2, 7, 8):
2947 class compat_Struct(struct
.Struct
):
2948 def unpack(self
, string
):
2949 if not isinstance(string
, buffer): # noqa: F821
2950 string
= buffer(string
) # noqa: F821
2951 return super(compat_Struct
, self
).unpack(string
)
2953 compat_Struct
= struct
.Struct
2957 from future_builtins
import zip as compat_zip
2958 except ImportError: # not 2.6+ or is 3.x
2960 from itertools
import izip
as compat_zip
# < 2.5 or 3.x
2965 if sys
.version_info
< (3, 3):
2966 def compat_b64decode(s
, *args
, **kwargs
):
2967 if isinstance(s
, compat_str
):
2968 s
= s
.encode('ascii')
2969 return base64
.b64decode(s
, *args
, **kwargs
)
2971 compat_b64decode
= base64
.b64decode
2974 if platform
.python_implementation() == 'PyPy' and sys
.pypy_version_info
< (5, 4, 0):
2975 # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
2976 # names, see the original PyPy issue [1] and the youtube-dl one [2].
2977 # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
2978 # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
2979 def compat_ctypes_WINFUNCTYPE(*args
, **kwargs
):
2980 real
= ctypes
.WINFUNCTYPE(*args
, **kwargs
)
2982 def resf(tpl
, *args
, **kwargs
):
2984 return real((str(funcname
), dll
), *args
, **kwargs
)
2988 def compat_ctypes_WINFUNCTYPE(*args
, **kwargs
):
2989 return ctypes
.WINFUNCTYPE(*args
, **kwargs
)
2993 'compat_HTMLParseError',
2994 'compat_HTMLParser',
2998 'compat_basestring',
3001 'compat_cookiejar_Cookie',
3003 'compat_ctypes_WINFUNCTYPE',
3004 'compat_etree_Element',
3005 'compat_etree_fromstring',
3006 'compat_etree_register_namespace',
3007 'compat_expanduser',
3008 'compat_get_terminal_size',
3011 'compat_html_entities',
3012 'compat_html_entities_html5',
3013 'compat_http_client',
3014 'compat_http_server',
3016 'compat_integer_types',
3017 'compat_itertools_count',
3019 'compat_numeric_types',
3026 'compat_shlex_quote',
3027 'compat_shlex_split',
3028 'compat_socket_create_connection',
3030 'compat_struct_pack',
3031 'compat_struct_unpack',
3032 'compat_subprocess_get_DEVNULL',
3033 'compat_tokenize_tokenize',
3034 'compat_urllib_error',
3035 'compat_urllib_parse',
3036 'compat_urllib_parse_unquote',
3037 'compat_urllib_parse_unquote_plus',
3038 'compat_urllib_parse_unquote_to_bytes',
3039 'compat_urllib_parse_urlencode',
3040 'compat_urllib_parse_urlparse',
3041 'compat_urllib_request',
3042 'compat_urllib_request_DataHandler',
3043 'compat_urllib_response',
3045 'compat_urlretrieve',
3046 'compat_xml_parse_error',
3049 'workaround_optparse_bug9161',