2 from __future__
import unicode_literals
19 import xml
.etree
.ElementTree
23 import urllib
.request
as compat_urllib_request
24 except ImportError: # Python 2
25 import urllib2
as compat_urllib_request
28 import urllib
.error
as compat_urllib_error
29 except ImportError: # Python 2
30 import urllib2
as compat_urllib_error
33 import urllib
.parse
as compat_urllib_parse
34 except ImportError: # Python 2
35 import urllib
as compat_urllib_parse
38 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
39 except ImportError: # Python 2
40 from urlparse
import urlparse
as compat_urllib_parse_urlparse
43 import urllib
.parse
as compat_urlparse
44 except ImportError: # Python 2
45 import urlparse
as compat_urlparse
48 import urllib
.response
as compat_urllib_response
49 except ImportError: # Python 2
50 import urllib
as compat_urllib_response
53 import http
.cookiejar
as compat_cookiejar
54 except ImportError: # Python 2
55 import cookielib
as compat_cookiejar
58 import http
.cookies
as compat_cookies
59 except ImportError: # Python 2
60 import Cookie
as compat_cookies
63 import html
.entities
as compat_html_entities
64 except ImportError: # Python 2
65 import htmlentitydefs
as compat_html_entities
68 compat_html_entities_html5
= compat_html_entities
.html5
69 except AttributeError:
70 # Copied from CPython 3.5.1 html/entities.py
71 compat_html_entities_html5
= {
80 'acE;': '\u223e\u0333',
100 'alefsym;': '\u2135',
115 'andslope;': '\u2a58',
121 'angmsdaa;': '\u29a8',
122 'angmsdab;': '\u29a9',
123 'angmsdac;': '\u29aa',
124 'angmsdad;': '\u29ab',
125 'angmsdae;': '\u29ac',
126 'angmsdaf;': '\u29ad',
127 'angmsdag;': '\u29ae',
128 'angmsdah;': '\u29af',
130 'angrtvb;': '\u22be',
131 'angrtvbd;': '\u299d',
134 'angzarr;': '\u237c',
137 'Aopf;': '\U0001d538',
138 'aopf;': '\U0001d552',
145 'ApplyFunction;': '\u2061',
147 'approxeq;': '\u224a',
152 'Ascr;': '\U0001d49c',
153 'ascr;': '\U0001d4b6',
157 'asympeq;': '\u224d',
166 'awconint;': '\u2233',
168 'backcong;': '\u224c',
169 'backepsilon;': '\u03f6',
170 'backprime;': '\u2035',
171 'backsim;': '\u223d',
172 'backsimeq;': '\u22cd',
173 'Backslash;': '\u2216',
178 'barwedge;': '\u2305',
180 'bbrktbrk;': '\u23b6',
186 'Because;': '\u2235',
187 'because;': '\u2235',
188 'bemptyv;': '\u29b0',
191 'Bernoullis;': '\u212c',
195 'between;': '\u226c',
196 'Bfr;': '\U0001d505',
197 'bfr;': '\U0001d51f',
199 'bigcirc;': '\u25ef',
201 'bigodot;': '\u2a00',
202 'bigoplus;': '\u2a01',
203 'bigotimes;': '\u2a02',
204 'bigsqcup;': '\u2a06',
205 'bigstar;': '\u2605',
206 'bigtriangledown;': '\u25bd',
207 'bigtriangleup;': '\u25b3',
208 'biguplus;': '\u2a04',
210 'bigwedge;': '\u22c0',
212 'blacklozenge;': '\u29eb',
213 'blacksquare;': '\u25aa',
214 'blacktriangle;': '\u25b4',
215 'blacktriangledown;': '\u25be',
216 'blacktriangleleft;': '\u25c2',
217 'blacktriangleright;': '\u25b8',
224 'bnequiv;': '\u2261\u20e5',
227 'Bopf;': '\U0001d539',
228 'bopf;': '\U0001d553',
251 'boxminus;': '\u229f',
252 'boxplus;': '\u229e',
253 'boxtimes;': '\u22a0',
282 'bscr;': '\U0001d4b7',
288 'bsolhsub;': '\u27c8',
301 'capbrcup;': '\u2a49',
305 'CapitalDifferentialD;': '\u2145',
306 'caps;': '\u2229\ufe00',
309 'Cayleys;': '\u212d',
319 'Cconint;': '\u2230',
321 'ccupssm;': '\u2a50',
327 'cemptyv;': '\u29b2',
330 'CenterDot;': '\xb7',
331 'centerdot;': '\xb7',
333 'cfr;': '\U0001d520',
337 'checkmark;': '\u2713',
343 'circlearrowleft;': '\u21ba',
344 'circlearrowright;': '\u21bb',
345 'circledast;': '\u229b',
346 'circledcirc;': '\u229a',
347 'circleddash;': '\u229d',
348 'CircleDot;': '\u2299',
350 'circledS;': '\u24c8',
351 'CircleMinus;': '\u2296',
352 'CirclePlus;': '\u2295',
353 'CircleTimes;': '\u2297',
356 'cirfnint;': '\u2a10',
358 'cirscir;': '\u29c2',
359 'ClockwiseContourIntegral;': '\u2232',
360 'CloseCurlyDoubleQuote;': '\u201d',
361 'CloseCurlyQuote;': '\u2019',
363 'clubsuit;': '\u2663',
368 'coloneq;': '\u2254',
373 'complement;': '\u2201',
374 'complexes;': '\u2102',
376 'congdot;': '\u2a6d',
377 'Congruent;': '\u2261',
380 'ContourIntegral;': '\u222e',
382 'copf;': '\U0001d554',
384 'Coproduct;': '\u2210',
390 'CounterClockwiseContourIntegral;': '\u2233',
394 'Cscr;': '\U0001d49e',
395 'cscr;': '\U0001d4b8',
401 'cudarrl;': '\u2938',
402 'cudarrr;': '\u2935',
406 'cularrp;': '\u293d',
409 'cupbrcap;': '\u2a48',
415 'cups;': '\u222a\ufe00',
417 'curarrm;': '\u293c',
418 'curlyeqprec;': '\u22de',
419 'curlyeqsucc;': '\u22df',
420 'curlyvee;': '\u22ce',
421 'curlywedge;': '\u22cf',
424 'curvearrowleft;': '\u21b6',
425 'curvearrowright;': '\u21b7',
428 'cwconint;': '\u2232',
440 'dbkarow;': '\u290f',
448 'ddagger;': '\u2021',
450 'DDotrahd;': '\u2911',
451 'ddotseq;': '\u2a77',
457 'demptyv;': '\u29b1',
459 'Dfr;': '\U0001d507',
460 'dfr;': '\U0001d521',
464 'DiacriticalAcute;': '\xb4',
465 'DiacriticalDot;': '\u02d9',
466 'DiacriticalDoubleAcute;': '\u02dd',
467 'DiacriticalGrave;': '`',
468 'DiacriticalTilde;': '\u02dc',
470 'Diamond;': '\u22c4',
471 'diamond;': '\u22c4',
472 'diamondsuit;': '\u2666',
475 'DifferentialD;': '\u2146',
476 'digamma;': '\u03dd',
481 'divideontimes;': '\u22c7',
488 'Dopf;': '\U0001d53b',
489 'dopf;': '\U0001d555',
494 'doteqdot;': '\u2251',
495 'DotEqual;': '\u2250',
496 'dotminus;': '\u2238',
497 'dotplus;': '\u2214',
498 'dotsquare;': '\u22a1',
499 'doublebarwedge;': '\u2306',
500 'DoubleContourIntegral;': '\u222f',
501 'DoubleDot;': '\xa8',
502 'DoubleDownArrow;': '\u21d3',
503 'DoubleLeftArrow;': '\u21d0',
504 'DoubleLeftRightArrow;': '\u21d4',
505 'DoubleLeftTee;': '\u2ae4',
506 'DoubleLongLeftArrow;': '\u27f8',
507 'DoubleLongLeftRightArrow;': '\u27fa',
508 'DoubleLongRightArrow;': '\u27f9',
509 'DoubleRightArrow;': '\u21d2',
510 'DoubleRightTee;': '\u22a8',
511 'DoubleUpArrow;': '\u21d1',
512 'DoubleUpDownArrow;': '\u21d5',
513 'DoubleVerticalBar;': '\u2225',
514 'DownArrow;': '\u2193',
515 'Downarrow;': '\u21d3',
516 'downarrow;': '\u2193',
517 'DownArrowBar;': '\u2913',
518 'DownArrowUpArrow;': '\u21f5',
519 'DownBreve;': '\u0311',
520 'downdownarrows;': '\u21ca',
521 'downharpoonleft;': '\u21c3',
522 'downharpoonright;': '\u21c2',
523 'DownLeftRightVector;': '\u2950',
524 'DownLeftTeeVector;': '\u295e',
525 'DownLeftVector;': '\u21bd',
526 'DownLeftVectorBar;': '\u2956',
527 'DownRightTeeVector;': '\u295f',
528 'DownRightVector;': '\u21c1',
529 'DownRightVectorBar;': '\u2957',
530 'DownTee;': '\u22a4',
531 'DownTeeArrow;': '\u21a7',
532 'drbkarow;': '\u2910',
535 'Dscr;': '\U0001d49f',
536 'dscr;': '\U0001d4b9',
547 'dwangle;': '\u29a6',
550 'dzigrarr;': '\u27ff',
572 'Efr;': '\U0001d508',
573 'efr;': '\U0001d522',
582 'Element;': '\u2208',
583 'elinters;': '\u23e7',
590 'emptyset;': '\u2205',
591 'EmptySmallSquare;': '\u25fb',
593 'EmptyVerySmallSquare;': '\u25ab',
602 'Eopf;': '\U0001d53c',
603 'eopf;': '\U0001d556',
608 'Epsilon;': '\u0395',
609 'epsilon;': '\u03b5',
612 'eqcolon;': '\u2255',
614 'eqslantgtr;': '\u2a96',
615 'eqslantless;': '\u2a95',
618 'EqualTilde;': '\u2242',
620 'Equilibrium;': '\u21cc',
622 'equivDD;': '\u2a78',
623 'eqvparsl;': '\u29e5',
645 'expectation;': '\u2130',
646 'ExponentialE;': '\u2147',
647 'exponentiale;': '\u2147',
648 'fallingdotseq;': '\u2252',
655 'Ffr;': '\U0001d509',
656 'ffr;': '\U0001d523',
658 'FilledSmallSquare;': '\u25fc',
659 'FilledVerySmallSquare;': '\u25aa',
665 'Fopf;': '\U0001d53d',
666 'fopf;': '\U0001d557',
671 'Fouriertrf;': '\u2131',
672 'fpartint;': '\u2a0d',
694 'fscr;': '\U0001d4bb',
716 'geqslant;': '\u2a7e',
720 'gesdoto;': '\u2a82',
721 'gesdotol;': '\u2a84',
722 'gesl;': '\u22db\ufe00',
724 'Gfr;': '\U0001d50a',
725 'gfr;': '\U0001d524',
737 'gnapprox;': '\u2a8a',
743 'Gopf;': '\U0001d53e',
744 'gopf;': '\U0001d558',
746 'GreaterEqual;': '\u2265',
747 'GreaterEqualLess;': '\u22db',
748 'GreaterFullEqual;': '\u2267',
749 'GreaterGreater;': '\u2aa2',
750 'GreaterLess;': '\u2277',
751 'GreaterSlantEqual;': '\u2a7e',
752 'GreaterTilde;': '\u2273',
753 'Gscr;': '\U0001d4a2',
767 'gtquest;': '\u2a7c',
768 'gtrapprox;': '\u2a86',
771 'gtreqless;': '\u22db',
772 'gtreqqless;': '\u2a8c',
773 'gtrless;': '\u2277',
775 'gvertneqq;': '\u2269\ufe00',
776 'gvnE;': '\u2269\ufe00',
785 'harrcir;': '\u2948',
792 'heartsuit;': '\u2665',
796 'hfr;': '\U0001d525',
797 'HilbertSpace;': '\u210b',
798 'hksearow;': '\u2925',
799 'hkswarow;': '\u2926',
802 'hookleftarrow;': '\u21a9',
803 'hookrightarrow;': '\u21aa',
805 'hopf;': '\U0001d559',
807 'HorizontalLine;': '\u2500',
809 'hscr;': '\U0001d4bd',
813 'HumpDownHump;': '\u224e',
814 'HumpEqual;': '\u224f',
835 'ifr;': '\U0001d526',
851 'ImaginaryI;': '\u2148',
852 'imagline;': '\u2110',
853 'imagpart;': '\u2111',
857 'Implies;': '\u21d2',
861 'infintie;': '\u29dd',
866 'integers;': '\u2124',
867 'Integral;': '\u222b',
868 'intercal;': '\u22ba',
869 'Intersection;': '\u22c2',
870 'intlarhk;': '\u2a17',
871 'intprod;': '\u2a3c',
872 'InvisibleComma;': '\u2063',
873 'InvisibleTimes;': '\u2062',
878 'Iopf;': '\U0001d540',
879 'iopf;': '\U0001d55a',
886 'iscr;': '\U0001d4be',
888 'isindot;': '\u22f5',
906 'Jfr;': '\U0001d50d',
907 'jfr;': '\U0001d527',
909 'Jopf;': '\U0001d541',
910 'jopf;': '\U0001d55b',
911 'Jscr;': '\U0001d4a5',
912 'jscr;': '\U0001d4bf',
924 'Kfr;': '\U0001d50e',
925 'kfr;': '\U0001d528',
931 'Kopf;': '\U0001d542',
932 'kopf;': '\U0001d55c',
933 'Kscr;': '\U0001d4a6',
934 'kscr;': '\U0001d4c0',
938 'laemptyv;': '\u29b4',
947 'Laplacetrf;': '\u2112',
954 'larrbfs;': '\u291f',
959 'larrsim;': '\u2973',
965 'lates;': '\u2aad\ufe00',
972 'lbrksld;': '\u298f',
973 'lbrkslu;': '\u298d',
985 'ldrdhar;': '\u2967',
986 'ldrushar;': '\u294b',
990 'LeftAngleBracket;': '\u27e8',
991 'LeftArrow;': '\u2190',
992 'Leftarrow;': '\u21d0',
993 'leftarrow;': '\u2190',
994 'LeftArrowBar;': '\u21e4',
995 'LeftArrowRightArrow;': '\u21c6',
996 'leftarrowtail;': '\u21a2',
997 'LeftCeiling;': '\u2308',
998 'LeftDoubleBracket;': '\u27e6',
999 'LeftDownTeeVector;': '\u2961',
1000 'LeftDownVector;': '\u21c3',
1001 'LeftDownVectorBar;': '\u2959',
1002 'LeftFloor;': '\u230a',
1003 'leftharpoondown;': '\u21bd',
1004 'leftharpoonup;': '\u21bc',
1005 'leftleftarrows;': '\u21c7',
1006 'LeftRightArrow;': '\u2194',
1007 'Leftrightarrow;': '\u21d4',
1008 'leftrightarrow;': '\u2194',
1009 'leftrightarrows;': '\u21c6',
1010 'leftrightharpoons;': '\u21cb',
1011 'leftrightsquigarrow;': '\u21ad',
1012 'LeftRightVector;': '\u294e',
1013 'LeftTee;': '\u22a3',
1014 'LeftTeeArrow;': '\u21a4',
1015 'LeftTeeVector;': '\u295a',
1016 'leftthreetimes;': '\u22cb',
1017 'LeftTriangle;': '\u22b2',
1018 'LeftTriangleBar;': '\u29cf',
1019 'LeftTriangleEqual;': '\u22b4',
1020 'LeftUpDownVector;': '\u2951',
1021 'LeftUpTeeVector;': '\u2960',
1022 'LeftUpVector;': '\u21bf',
1023 'LeftUpVectorBar;': '\u2958',
1024 'LeftVector;': '\u21bc',
1025 'LeftVectorBar;': '\u2952',
1030 'leqslant;': '\u2a7d',
1033 'lesdot;': '\u2a7f',
1034 'lesdoto;': '\u2a81',
1035 'lesdotor;': '\u2a83',
1036 'lesg;': '\u22da\ufe00',
1037 'lesges;': '\u2a93',
1038 'lessapprox;': '\u2a85',
1039 'lessdot;': '\u22d6',
1040 'lesseqgtr;': '\u22da',
1041 'lesseqqgtr;': '\u2a8b',
1042 'LessEqualGreater;': '\u22da',
1043 'LessFullEqual;': '\u2266',
1044 'LessGreater;': '\u2276',
1045 'lessgtr;': '\u2276',
1046 'LessLess;': '\u2aa1',
1047 'lesssim;': '\u2272',
1048 'LessSlantEqual;': '\u2a7d',
1049 'LessTilde;': '\u2272',
1050 'lfisht;': '\u297c',
1051 'lfloor;': '\u230a',
1052 'Lfr;': '\U0001d50f',
1053 'lfr;': '\U0001d529',
1059 'lharul;': '\u296a',
1066 'llcorner;': '\u231e',
1067 'Lleftarrow;': '\u21da',
1068 'llhard;': '\u296b',
1070 'Lmidot;': '\u013f',
1071 'lmidot;': '\u0140',
1072 'lmoust;': '\u23b0',
1073 'lmoustache;': '\u23b0',
1075 'lnapprox;': '\u2a89',
1084 'LongLeftArrow;': '\u27f5',
1085 'Longleftarrow;': '\u27f8',
1086 'longleftarrow;': '\u27f5',
1087 'LongLeftRightArrow;': '\u27f7',
1088 'Longleftrightarrow;': '\u27fa',
1089 'longleftrightarrow;': '\u27f7',
1090 'longmapsto;': '\u27fc',
1091 'LongRightArrow;': '\u27f6',
1092 'Longrightarrow;': '\u27f9',
1093 'longrightarrow;': '\u27f6',
1094 'looparrowleft;': '\u21ab',
1095 'looparrowright;': '\u21ac',
1097 'Lopf;': '\U0001d543',
1098 'lopf;': '\U0001d55d',
1099 'loplus;': '\u2a2d',
1100 'lotimes;': '\u2a34',
1101 'lowast;': '\u2217',
1103 'LowerLeftArrow;': '\u2199',
1104 'LowerRightArrow;': '\u2198',
1106 'lozenge;': '\u25ca',
1109 'lparlt;': '\u2993',
1111 'lrcorner;': '\u231f',
1113 'lrhard;': '\u296d',
1116 'lsaquo;': '\u2039',
1118 'lscr;': '\U0001d4c1',
1126 'lsquor;': '\u201a',
1127 'Lstrok;': '\u0141',
1128 'lstrok;': '\u0142',
1137 'lthree;': '\u22cb',
1138 'ltimes;': '\u22c9',
1139 'ltlarr;': '\u2976',
1140 'ltquest;': '\u2a7b',
1144 'ltrPar;': '\u2996',
1145 'lurdshar;': '\u294a',
1146 'luruhar;': '\u2966',
1147 'lvertneqq;': '\u2268\ufe00',
1148 'lvnE;': '\u2268\ufe00',
1153 'maltese;': '\u2720',
1156 'mapsto;': '\u21a6',
1157 'mapstodown;': '\u21a7',
1158 'mapstoleft;': '\u21a4',
1159 'mapstoup;': '\u21a5',
1160 'marker;': '\u25ae',
1161 'mcomma;': '\u2a29',
1166 'measuredangle;': '\u2221',
1167 'MediumSpace;': '\u205f',
1168 'Mellintrf;': '\u2133',
1169 'Mfr;': '\U0001d510',
1170 'mfr;': '\U0001d52a',
1176 'midcir;': '\u2af0',
1180 'minusb;': '\u229f',
1181 'minusd;': '\u2238',
1182 'minusdu;': '\u2a2a',
1183 'MinusPlus;': '\u2213',
1186 'mnplus;': '\u2213',
1187 'models;': '\u22a7',
1188 'Mopf;': '\U0001d544',
1189 'mopf;': '\U0001d55e',
1192 'mscr;': '\U0001d4c2',
1193 'mstpos;': '\u223e',
1196 'multimap;': '\u22b8',
1199 'Nacute;': '\u0143',
1200 'nacute;': '\u0144',
1201 'nang;': '\u2220\u20d2',
1203 'napE;': '\u2a70\u0338',
1204 'napid;': '\u224b\u0338',
1206 'napprox;': '\u2249',
1208 'natural;': '\u266e',
1209 'naturals;': '\u2115',
1212 'nbump;': '\u224e\u0338',
1213 'nbumpe;': '\u224f\u0338',
1215 'Ncaron;': '\u0147',
1216 'ncaron;': '\u0148',
1217 'Ncedil;': '\u0145',
1218 'ncedil;': '\u0146',
1220 'ncongdot;': '\u2a6d\u0338',
1226 'nearhk;': '\u2924',
1229 'nearrow;': '\u2197',
1230 'nedot;': '\u2250\u0338',
1231 'NegativeMediumSpace;': '\u200b',
1232 'NegativeThickSpace;': '\u200b',
1233 'NegativeThinSpace;': '\u200b',
1234 'NegativeVeryThinSpace;': '\u200b',
1235 'nequiv;': '\u2262',
1236 'nesear;': '\u2928',
1237 'nesim;': '\u2242\u0338',
1238 'NestedGreaterGreater;': '\u226b',
1239 'NestedLessLess;': '\u226a',
1241 'nexist;': '\u2204',
1242 'nexists;': '\u2204',
1243 'Nfr;': '\U0001d511',
1244 'nfr;': '\U0001d52b',
1245 'ngE;': '\u2267\u0338',
1248 'ngeqq;': '\u2267\u0338',
1249 'ngeqslant;': '\u2a7e\u0338',
1250 'nges;': '\u2a7e\u0338',
1251 'nGg;': '\u22d9\u0338',
1253 'nGt;': '\u226b\u20d2',
1256 'nGtv;': '\u226b\u0338',
1269 'nlE;': '\u2266\u0338',
1271 'nLeftarrow;': '\u21cd',
1272 'nleftarrow;': '\u219a',
1273 'nLeftrightarrow;': '\u21ce',
1274 'nleftrightarrow;': '\u21ae',
1276 'nleqq;': '\u2266\u0338',
1277 'nleqslant;': '\u2a7d\u0338',
1278 'nles;': '\u2a7d\u0338',
1280 'nLl;': '\u22d8\u0338',
1282 'nLt;': '\u226a\u20d2',
1285 'nltrie;': '\u22ec',
1286 'nLtv;': '\u226a\u0338',
1288 'NoBreak;': '\u2060',
1289 'NonBreakingSpace;': '\xa0',
1291 'nopf;': '\U0001d55f',
1295 'NotCongruent;': '\u2262',
1296 'NotCupCap;': '\u226d',
1297 'NotDoubleVerticalBar;': '\u2226',
1298 'NotElement;': '\u2209',
1299 'NotEqual;': '\u2260',
1300 'NotEqualTilde;': '\u2242\u0338',
1301 'NotExists;': '\u2204',
1302 'NotGreater;': '\u226f',
1303 'NotGreaterEqual;': '\u2271',
1304 'NotGreaterFullEqual;': '\u2267\u0338',
1305 'NotGreaterGreater;': '\u226b\u0338',
1306 'NotGreaterLess;': '\u2279',
1307 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1308 'NotGreaterTilde;': '\u2275',
1309 'NotHumpDownHump;': '\u224e\u0338',
1310 'NotHumpEqual;': '\u224f\u0338',
1312 'notindot;': '\u22f5\u0338',
1313 'notinE;': '\u22f9\u0338',
1314 'notinva;': '\u2209',
1315 'notinvb;': '\u22f7',
1316 'notinvc;': '\u22f6',
1317 'NotLeftTriangle;': '\u22ea',
1318 'NotLeftTriangleBar;': '\u29cf\u0338',
1319 'NotLeftTriangleEqual;': '\u22ec',
1320 'NotLess;': '\u226e',
1321 'NotLessEqual;': '\u2270',
1322 'NotLessGreater;': '\u2278',
1323 'NotLessLess;': '\u226a\u0338',
1324 'NotLessSlantEqual;': '\u2a7d\u0338',
1325 'NotLessTilde;': '\u2274',
1326 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1327 'NotNestedLessLess;': '\u2aa1\u0338',
1329 'notniva;': '\u220c',
1330 'notnivb;': '\u22fe',
1331 'notnivc;': '\u22fd',
1332 'NotPrecedes;': '\u2280',
1333 'NotPrecedesEqual;': '\u2aaf\u0338',
1334 'NotPrecedesSlantEqual;': '\u22e0',
1335 'NotReverseElement;': '\u220c',
1336 'NotRightTriangle;': '\u22eb',
1337 'NotRightTriangleBar;': '\u29d0\u0338',
1338 'NotRightTriangleEqual;': '\u22ed',
1339 'NotSquareSubset;': '\u228f\u0338',
1340 'NotSquareSubsetEqual;': '\u22e2',
1341 'NotSquareSuperset;': '\u2290\u0338',
1342 'NotSquareSupersetEqual;': '\u22e3',
1343 'NotSubset;': '\u2282\u20d2',
1344 'NotSubsetEqual;': '\u2288',
1345 'NotSucceeds;': '\u2281',
1346 'NotSucceedsEqual;': '\u2ab0\u0338',
1347 'NotSucceedsSlantEqual;': '\u22e1',
1348 'NotSucceedsTilde;': '\u227f\u0338',
1349 'NotSuperset;': '\u2283\u20d2',
1350 'NotSupersetEqual;': '\u2289',
1351 'NotTilde;': '\u2241',
1352 'NotTildeEqual;': '\u2244',
1353 'NotTildeFullEqual;': '\u2247',
1354 'NotTildeTilde;': '\u2249',
1355 'NotVerticalBar;': '\u2224',
1357 'nparallel;': '\u2226',
1358 'nparsl;': '\u2afd\u20e5',
1359 'npart;': '\u2202\u0338',
1360 'npolint;': '\u2a14',
1362 'nprcue;': '\u22e0',
1363 'npre;': '\u2aaf\u0338',
1365 'npreceq;': '\u2aaf\u0338',
1368 'nrarrc;': '\u2933\u0338',
1369 'nrarrw;': '\u219d\u0338',
1370 'nRightarrow;': '\u21cf',
1371 'nrightarrow;': '\u219b',
1373 'nrtrie;': '\u22ed',
1375 'nsccue;': '\u22e1',
1376 'nsce;': '\u2ab0\u0338',
1377 'Nscr;': '\U0001d4a9',
1378 'nscr;': '\U0001d4c3',
1379 'nshortmid;': '\u2224',
1380 'nshortparallel;': '\u2226',
1383 'nsimeq;': '\u2244',
1386 'nsqsube;': '\u22e2',
1387 'nsqsupe;': '\u22e3',
1389 'nsubE;': '\u2ac5\u0338',
1391 'nsubset;': '\u2282\u20d2',
1392 'nsubseteq;': '\u2288',
1393 'nsubseteqq;': '\u2ac5\u0338',
1395 'nsucceq;': '\u2ab0\u0338',
1397 'nsupE;': '\u2ac6\u0338',
1399 'nsupset;': '\u2283\u20d2',
1400 'nsupseteq;': '\u2289',
1401 'nsupseteqq;': '\u2ac6\u0338',
1408 'ntriangleleft;': '\u22ea',
1409 'ntrianglelefteq;': '\u22ec',
1410 'ntriangleright;': '\u22eb',
1411 'ntrianglerighteq;': '\u22ed',
1415 'numero;': '\u2116',
1417 'nvap;': '\u224d\u20d2',
1418 'nVDash;': '\u22af',
1419 'nVdash;': '\u22ae',
1420 'nvDash;': '\u22ad',
1421 'nvdash;': '\u22ac',
1422 'nvge;': '\u2265\u20d2',
1424 'nvHarr;': '\u2904',
1425 'nvinfin;': '\u29de',
1426 'nvlArr;': '\u2902',
1427 'nvle;': '\u2264\u20d2',
1429 'nvltrie;': '\u22b4\u20d2',
1430 'nvrArr;': '\u2903',
1431 'nvrtrie;': '\u22b5\u20d2',
1432 'nvsim;': '\u223c\u20d2',
1433 'nwarhk;': '\u2923',
1436 'nwarrow;': '\u2196',
1437 'nwnear;': '\u2927',
1451 'Odblac;': '\u0150',
1452 'odblac;': '\u0151',
1455 'odsold;': '\u29bc',
1459 'Ofr;': '\U0001d512',
1460 'ofr;': '\U0001d52c',
1472 'olcross;': '\u29bb',
1479 'Omicron;': '\u039f',
1480 'omicron;': '\u03bf',
1482 'ominus;': '\u2296',
1483 'Oopf;': '\U0001d546',
1484 'oopf;': '\U0001d560',
1486 'OpenCurlyDoubleQuote;': '\u201c',
1487 'OpenCurlyQuote;': '\u2018',
1495 'orderof;': '\u2134',
1500 'origof;': '\u22b6',
1502 'orslope;': '\u2a57',
1505 'Oscr;': '\U0001d4aa',
1516 'Otimes;': '\u2a37',
1517 'otimes;': '\u2297',
1518 'otimesas;': '\u2a36',
1524 'OverBar;': '\u203e',
1525 'OverBrace;': '\u23de',
1526 'OverBracket;': '\u23b4',
1527 'OverParenthesis;': '\u23dc',
1531 'parallel;': '\u2225',
1532 'parsim;': '\u2af3',
1535 'PartialD;': '\u2202',
1540 'permil;': '\u2030',
1542 'pertenk;': '\u2031',
1543 'Pfr;': '\U0001d513',
1544 'pfr;': '\U0001d52d',
1548 'phmmat;': '\u2133',
1552 'pitchfork;': '\u22d4',
1554 'planck;': '\u210f',
1555 'planckh;': '\u210e',
1556 'plankv;': '\u210f',
1558 'plusacir;': '\u2a23',
1560 'pluscir;': '\u2a22',
1561 'plusdo;': '\u2214',
1562 'plusdu;': '\u2a25',
1564 'PlusMinus;': '\xb1',
1567 'plussim;': '\u2a26',
1568 'plustwo;': '\u2a27',
1570 'Poincareplane;': '\u210c',
1571 'pointint;': '\u2a15',
1573 'popf;': '\U0001d561',
1583 'precapprox;': '\u2ab7',
1584 'preccurlyeq;': '\u227c',
1585 'Precedes;': '\u227a',
1586 'PrecedesEqual;': '\u2aaf',
1587 'PrecedesSlantEqual;': '\u227c',
1588 'PrecedesTilde;': '\u227e',
1589 'preceq;': '\u2aaf',
1590 'precnapprox;': '\u2ab9',
1591 'precneqq;': '\u2ab5',
1592 'precnsim;': '\u22e8',
1593 'precsim;': '\u227e',
1596 'primes;': '\u2119',
1599 'prnsim;': '\u22e8',
1601 'Product;': '\u220f',
1602 'profalar;': '\u232e',
1603 'profline;': '\u2312',
1604 'profsurf;': '\u2313',
1606 'Proportion;': '\u2237',
1607 'Proportional;': '\u221d',
1608 'propto;': '\u221d',
1610 'prurel;': '\u22b0',
1611 'Pscr;': '\U0001d4ab',
1612 'pscr;': '\U0001d4c5',
1615 'puncsp;': '\u2008',
1616 'Qfr;': '\U0001d514',
1617 'qfr;': '\U0001d52e',
1620 'qopf;': '\U0001d562',
1621 'qprime;': '\u2057',
1622 'Qscr;': '\U0001d4ac',
1623 'qscr;': '\U0001d4c6',
1624 'quaternions;': '\u210d',
1625 'quatint;': '\u2a16',
1627 'questeq;': '\u225f',
1633 'race;': '\u223d\u0331',
1634 'Racute;': '\u0154',
1635 'racute;': '\u0155',
1637 'raemptyv;': '\u29b3',
1642 'rangle;': '\u27e9',
1648 'rarrap;': '\u2975',
1650 'rarrbfs;': '\u2920',
1652 'rarrfs;': '\u291e',
1653 'rarrhk;': '\u21aa',
1654 'rarrlp;': '\u21ac',
1655 'rarrpl;': '\u2945',
1656 'rarrsim;': '\u2974',
1657 'Rarrtl;': '\u2916',
1658 'rarrtl;': '\u21a3',
1660 'rAtail;': '\u291c',
1661 'ratail;': '\u291a',
1663 'rationals;': '\u211a',
1671 'rbrksld;': '\u298e',
1672 'rbrkslu;': '\u2990',
1673 'Rcaron;': '\u0158',
1674 'rcaron;': '\u0159',
1675 'Rcedil;': '\u0156',
1676 'rcedil;': '\u0157',
1682 'rdldhar;': '\u2969',
1684 'rdquor;': '\u201d',
1688 'realine;': '\u211b',
1689 'realpart;': '\u211c',
1696 'ReverseElement;': '\u220b',
1697 'ReverseEquilibrium;': '\u21cb',
1698 'ReverseUpEquilibrium;': '\u296f',
1699 'rfisht;': '\u297d',
1700 'rfloor;': '\u230b',
1702 'rfr;': '\U0001d52f',
1706 'rharul;': '\u296c',
1710 'RightAngleBracket;': '\u27e9',
1711 'RightArrow;': '\u2192',
1712 'Rightarrow;': '\u21d2',
1713 'rightarrow;': '\u2192',
1714 'RightArrowBar;': '\u21e5',
1715 'RightArrowLeftArrow;': '\u21c4',
1716 'rightarrowtail;': '\u21a3',
1717 'RightCeiling;': '\u2309',
1718 'RightDoubleBracket;': '\u27e7',
1719 'RightDownTeeVector;': '\u295d',
1720 'RightDownVector;': '\u21c2',
1721 'RightDownVectorBar;': '\u2955',
1722 'RightFloor;': '\u230b',
1723 'rightharpoondown;': '\u21c1',
1724 'rightharpoonup;': '\u21c0',
1725 'rightleftarrows;': '\u21c4',
1726 'rightleftharpoons;': '\u21cc',
1727 'rightrightarrows;': '\u21c9',
1728 'rightsquigarrow;': '\u219d',
1729 'RightTee;': '\u22a2',
1730 'RightTeeArrow;': '\u21a6',
1731 'RightTeeVector;': '\u295b',
1732 'rightthreetimes;': '\u22cc',
1733 'RightTriangle;': '\u22b3',
1734 'RightTriangleBar;': '\u29d0',
1735 'RightTriangleEqual;': '\u22b5',
1736 'RightUpDownVector;': '\u294f',
1737 'RightUpTeeVector;': '\u295c',
1738 'RightUpVector;': '\u21be',
1739 'RightUpVectorBar;': '\u2954',
1740 'RightVector;': '\u21c0',
1741 'RightVectorBar;': '\u2953',
1743 'risingdotseq;': '\u2253',
1747 'rmoust;': '\u23b1',
1748 'rmoustache;': '\u23b1',
1755 'ropf;': '\U0001d563',
1756 'roplus;': '\u2a2e',
1757 'rotimes;': '\u2a35',
1758 'RoundImplies;': '\u2970',
1760 'rpargt;': '\u2994',
1761 'rppolint;': '\u2a12',
1763 'Rrightarrow;': '\u21db',
1764 'rsaquo;': '\u203a',
1766 'rscr;': '\U0001d4c7',
1771 'rsquor;': '\u2019',
1772 'rthree;': '\u22cc',
1773 'rtimes;': '\u22ca',
1777 'rtriltri;': '\u29ce',
1778 'RuleDelayed;': '\u29f4',
1779 'ruluhar;': '\u2968',
1781 'Sacute;': '\u015a',
1782 'sacute;': '\u015b',
1787 'Scaron;': '\u0160',
1788 'scaron;': '\u0161',
1792 'Scedil;': '\u015e',
1793 'scedil;': '\u015f',
1798 'scnsim;': '\u22e9',
1799 'scpolint;': '\u2a13',
1806 'searhk;': '\u2925',
1809 'searrow;': '\u2198',
1813 'seswar;': '\u2929',
1814 'setminus;': '\u2216',
1817 'Sfr;': '\U0001d516',
1818 'sfr;': '\U0001d530',
1819 'sfrown;': '\u2322',
1821 'SHCHcy;': '\u0429',
1822 'shchcy;': '\u0449',
1825 'ShortDownArrow;': '\u2193',
1826 'ShortLeftArrow;': '\u2190',
1827 'shortmid;': '\u2223',
1828 'shortparallel;': '\u2225',
1829 'ShortRightArrow;': '\u2192',
1830 'ShortUpArrow;': '\u2191',
1835 'sigmaf;': '\u03c2',
1836 'sigmav;': '\u03c2',
1838 'simdot;': '\u2a6a',
1846 'simplus;': '\u2a24',
1847 'simrarr;': '\u2972',
1849 'SmallCircle;': '\u2218',
1850 'smallsetminus;': '\u2216',
1851 'smashp;': '\u2a33',
1852 'smeparsl;': '\u29e4',
1857 'smtes;': '\u2aac\ufe00',
1858 'SOFTcy;': '\u042c',
1859 'softcy;': '\u044c',
1862 'solbar;': '\u233f',
1863 'Sopf;': '\U0001d54a',
1864 'sopf;': '\U0001d564',
1865 'spades;': '\u2660',
1866 'spadesuit;': '\u2660',
1869 'sqcaps;': '\u2293\ufe00',
1871 'sqcups;': '\u2294\ufe00',
1874 'sqsube;': '\u2291',
1875 'sqsubset;': '\u228f',
1876 'sqsubseteq;': '\u2291',
1878 'sqsupe;': '\u2292',
1879 'sqsupset;': '\u2290',
1880 'sqsupseteq;': '\u2292',
1882 'Square;': '\u25a1',
1883 'square;': '\u25a1',
1884 'SquareIntersection;': '\u2293',
1885 'SquareSubset;': '\u228f',
1886 'SquareSubsetEqual;': '\u2291',
1887 'SquareSuperset;': '\u2290',
1888 'SquareSupersetEqual;': '\u2292',
1889 'SquareUnion;': '\u2294',
1890 'squarf;': '\u25aa',
1893 'Sscr;': '\U0001d4ae',
1894 'sscr;': '\U0001d4c8',
1895 'ssetmn;': '\u2216',
1896 'ssmile;': '\u2323',
1897 'sstarf;': '\u22c6',
1901 'straightepsilon;': '\u03f5',
1902 'straightphi;': '\u03d5',
1906 'subdot;': '\u2abd',
1909 'subedot;': '\u2ac3',
1910 'submult;': '\u2ac1',
1913 'subplus;': '\u2abf',
1914 'subrarr;': '\u2979',
1915 'Subset;': '\u22d0',
1916 'subset;': '\u2282',
1917 'subseteq;': '\u2286',
1918 'subseteqq;': '\u2ac5',
1919 'SubsetEqual;': '\u2286',
1920 'subsetneq;': '\u228a',
1921 'subsetneqq;': '\u2acb',
1922 'subsim;': '\u2ac7',
1923 'subsub;': '\u2ad5',
1924 'subsup;': '\u2ad3',
1926 'succapprox;': '\u2ab8',
1927 'succcurlyeq;': '\u227d',
1928 'Succeeds;': '\u227b',
1929 'SucceedsEqual;': '\u2ab0',
1930 'SucceedsSlantEqual;': '\u227d',
1931 'SucceedsTilde;': '\u227f',
1932 'succeq;': '\u2ab0',
1933 'succnapprox;': '\u2aba',
1934 'succneqq;': '\u2ab6',
1935 'succnsim;': '\u22e9',
1936 'succsim;': '\u227f',
1937 'SuchThat;': '\u220b',
1949 'supdot;': '\u2abe',
1950 'supdsub;': '\u2ad8',
1953 'supedot;': '\u2ac4',
1954 'Superset;': '\u2283',
1955 'SupersetEqual;': '\u2287',
1956 'suphsol;': '\u27c9',
1957 'suphsub;': '\u2ad7',
1958 'suplarr;': '\u297b',
1959 'supmult;': '\u2ac2',
1962 'supplus;': '\u2ac0',
1963 'Supset;': '\u22d1',
1964 'supset;': '\u2283',
1965 'supseteq;': '\u2287',
1966 'supseteqq;': '\u2ac6',
1967 'supsetneq;': '\u228b',
1968 'supsetneqq;': '\u2acc',
1969 'supsim;': '\u2ac8',
1970 'supsub;': '\u2ad4',
1971 'supsup;': '\u2ad6',
1972 'swarhk;': '\u2926',
1975 'swarrow;': '\u2199',
1976 'swnwar;': '\u292a',
1980 'target;': '\u2316',
1984 'Tcaron;': '\u0164',
1985 'tcaron;': '\u0165',
1986 'Tcedil;': '\u0162',
1987 'tcedil;': '\u0163',
1991 'telrec;': '\u2315',
1992 'Tfr;': '\U0001d517',
1993 'tfr;': '\U0001d531',
1994 'there4;': '\u2234',
1995 'Therefore;': '\u2234',
1996 'therefore;': '\u2234',
1999 'thetasym;': '\u03d1',
2000 'thetav;': '\u03d1',
2001 'thickapprox;': '\u2248',
2002 'thicksim;': '\u223c',
2003 'ThickSpace;': '\u205f\u200a',
2004 'thinsp;': '\u2009',
2005 'ThinSpace;': '\u2009',
2007 'thksim;': '\u223c',
2014 'TildeEqual;': '\u2243',
2015 'TildeFullEqual;': '\u2245',
2016 'TildeTilde;': '\u2248',
2019 'timesb;': '\u22a0',
2020 'timesbar;': '\u2a31',
2021 'timesd;': '\u2a30',
2025 'topbot;': '\u2336',
2026 'topcir;': '\u2af1',
2027 'Topf;': '\U0001d54b',
2028 'topf;': '\U0001d565',
2029 'topfork;': '\u2ada',
2031 'tprime;': '\u2034',
2034 'triangle;': '\u25b5',
2035 'triangledown;': '\u25bf',
2036 'triangleleft;': '\u25c3',
2037 'trianglelefteq;': '\u22b4',
2038 'triangleq;': '\u225c',
2039 'triangleright;': '\u25b9',
2040 'trianglerighteq;': '\u22b5',
2041 'tridot;': '\u25ec',
2043 'triminus;': '\u2a3a',
2044 'TripleDot;': '\u20db',
2045 'triplus;': '\u2a39',
2047 'tritime;': '\u2a3b',
2048 'trpezium;': '\u23e2',
2049 'Tscr;': '\U0001d4af',
2050 'tscr;': '\U0001d4c9',
2055 'Tstrok;': '\u0166',
2056 'tstrok;': '\u0167',
2058 'twoheadleftarrow;': '\u219e',
2059 'twoheadrightarrow;': '\u21a0',
2067 'Uarrocir;': '\u2949',
2070 'Ubreve;': '\u016c',
2071 'ubreve;': '\u016d',
2079 'Udblac;': '\u0170',
2080 'udblac;': '\u0171',
2082 'ufisht;': '\u297e',
2083 'Ufr;': '\U0001d518',
2084 'ufr;': '\U0001d532',
2093 'ulcorn;': '\u231c',
2094 'ulcorner;': '\u231c',
2095 'ulcrop;': '\u230f',
2102 'UnderBrace;': '\u23df',
2103 'UnderBracket;': '\u23b5',
2104 'UnderParenthesis;': '\u23dd',
2106 'UnionPlus;': '\u228e',
2109 'Uopf;': '\U0001d54c',
2110 'uopf;': '\U0001d566',
2111 'UpArrow;': '\u2191',
2112 'Uparrow;': '\u21d1',
2113 'uparrow;': '\u2191',
2114 'UpArrowBar;': '\u2912',
2115 'UpArrowDownArrow;': '\u21c5',
2116 'UpDownArrow;': '\u2195',
2117 'Updownarrow;': '\u21d5',
2118 'updownarrow;': '\u2195',
2119 'UpEquilibrium;': '\u296e',
2120 'upharpoonleft;': '\u21bf',
2121 'upharpoonright;': '\u21be',
2123 'UpperLeftArrow;': '\u2196',
2124 'UpperRightArrow;': '\u2197',
2128 'Upsilon;': '\u03a5',
2129 'upsilon;': '\u03c5',
2131 'UpTeeArrow;': '\u21a5',
2132 'upuparrows;': '\u21c8',
2133 'urcorn;': '\u231d',
2134 'urcorner;': '\u231d',
2135 'urcrop;': '\u230e',
2139 'Uscr;': '\U0001d4b0',
2140 'uscr;': '\U0001d4ca',
2142 'Utilde;': '\u0168',
2143 'utilde;': '\u0169',
2151 'uwangle;': '\u29a7',
2152 'vangrt;': '\u299c',
2153 'varepsilon;': '\u03f5',
2154 'varkappa;': '\u03f0',
2155 'varnothing;': '\u2205',
2156 'varphi;': '\u03d5',
2158 'varpropto;': '\u221d',
2161 'varrho;': '\u03f1',
2162 'varsigma;': '\u03c2',
2163 'varsubsetneq;': '\u228a\ufe00',
2164 'varsubsetneqq;': '\u2acb\ufe00',
2165 'varsupsetneq;': '\u228b\ufe00',
2166 'varsupsetneqq;': '\u2acc\ufe00',
2167 'vartheta;': '\u03d1',
2168 'vartriangleleft;': '\u22b2',
2169 'vartriangleright;': '\u22b3',
2179 'Vdashl;': '\u2ae6',
2182 'veebar;': '\u22bb',
2184 'vellip;': '\u22ee',
2185 'Verbar;': '\u2016',
2189 'VerticalBar;': '\u2223',
2190 'VerticalLine;': '|',
2191 'VerticalSeparator;': '\u2758',
2192 'VerticalTilde;': '\u2240',
2193 'VeryThinSpace;': '\u200a',
2194 'Vfr;': '\U0001d519',
2195 'vfr;': '\U0001d533',
2197 'vnsub;': '\u2282\u20d2',
2198 'vnsup;': '\u2283\u20d2',
2199 'Vopf;': '\U0001d54d',
2200 'vopf;': '\U0001d567',
2203 'Vscr;': '\U0001d4b1',
2204 'vscr;': '\U0001d4cb',
2205 'vsubnE;': '\u2acb\ufe00',
2206 'vsubne;': '\u228a\ufe00',
2207 'vsupnE;': '\u2acc\ufe00',
2208 'vsupne;': '\u228b\ufe00',
2209 'Vvdash;': '\u22aa',
2210 'vzigzag;': '\u299a',
2213 'wedbar;': '\u2a5f',
2216 'wedgeq;': '\u2259',
2217 'weierp;': '\u2118',
2218 'Wfr;': '\U0001d51a',
2219 'wfr;': '\U0001d534',
2220 'Wopf;': '\U0001d54e',
2221 'wopf;': '\U0001d568',
2224 'wreath;': '\u2240',
2225 'Wscr;': '\U0001d4b2',
2226 'wscr;': '\U0001d4cc',
2231 'Xfr;': '\U0001d51b',
2232 'xfr;': '\U0001d535',
2242 'Xopf;': '\U0001d54f',
2243 'xopf;': '\U0001d569',
2244 'xoplus;': '\u2a01',
2245 'xotime;': '\u2a02',
2248 'Xscr;': '\U0001d4b3',
2249 'xscr;': '\U0001d4cd',
2250 'xsqcup;': '\u2a06',
2251 'xuplus;': '\u2a04',
2254 'xwedge;': '\u22c0',
2267 'Yfr;': '\U0001d51c',
2268 'yfr;': '\U0001d536',
2271 'Yopf;': '\U0001d550',
2272 'yopf;': '\U0001d56a',
2273 'Yscr;': '\U0001d4b4',
2274 'yscr;': '\U0001d4ce',
2280 'Zacute;': '\u0179',
2281 'zacute;': '\u017a',
2282 'Zcaron;': '\u017d',
2283 'zcaron;': '\u017e',
2288 'zeetrf;': '\u2128',
2289 'ZeroWidthSpace;': '\u200b',
2293 'zfr;': '\U0001d537',
2296 'zigrarr;': '\u21dd',
2298 'zopf;': '\U0001d56b',
2299 'Zscr;': '\U0001d4b5',
2300 'zscr;': '\U0001d4cf',
2306 import http
.client
as compat_http_client
2307 except ImportError: # Python 2
2308 import httplib
as compat_http_client
2311 from urllib
.error
import HTTPError
as compat_HTTPError
2312 except ImportError: # Python 2
2313 from urllib2
import HTTPError
as compat_HTTPError
2316 from urllib
.request
import urlretrieve
as compat_urlretrieve
2317 except ImportError: # Python 2
2318 from urllib
import urlretrieve
as compat_urlretrieve
2321 from html
.parser
import HTMLParser
as compat_HTMLParser
2322 except ImportError: # Python 2
2323 from HTMLParser
import HTMLParser
as compat_HTMLParser
2326 from subprocess
import DEVNULL
2327 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
2329 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
2332 import http
.server
as compat_http_server
2334 import BaseHTTPServer
as compat_http_server
2337 compat_str
= unicode # Python 2
2342 from urllib
.parse
import unquote_to_bytes
as compat_urllib_parse_unquote_to_bytes
2343 from urllib
.parse
import unquote
as compat_urllib_parse_unquote
2344 from urllib
.parse
import unquote_plus
as compat_urllib_parse_unquote_plus
2345 except ImportError: # Python 2
2346 _asciire
= (compat_urllib_parse
._asciire
if hasattr(compat_urllib_parse
, '_asciire')
2347 else re
.compile('([\x00-\x7f]+)'))
2349 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2350 # implementations from cpython 3.4.3's stdlib. Python 2's version
2351 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
2353 def compat_urllib_parse_unquote_to_bytes(string
):
2354 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2355 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2356 # unescaped non-ASCII characters, which URIs should not.
2358 # Is it a string-like object?
2361 if isinstance(string
, compat_str
):
2362 string
= string
.encode('utf-8')
2363 bits
= string
.split(b
'%')
2368 for item
in bits
[1:]:
2370 append(compat_urllib_parse
._hextochr
[item
[:2]])
2375 return b
''.join(res
)
2377 def compat_urllib_parse_unquote(string
, encoding
='utf-8', errors
='replace'):
2378 """Replace %xx escapes by their single-character equivalent. The optional
2379 encoding and errors parameters specify how to decode percent-encoded
2380 sequences into Unicode characters, as accepted by the bytes.decode()
2382 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2383 sequences are replaced by a placeholder character.
2385 unquote('abc%20def') -> 'abc def'.
2387 if '%' not in string
:
2390 if encoding
is None:
2394 bits
= _asciire
.split(string
)
2397 for i
in range(1, len(bits
), 2):
2398 append(compat_urllib_parse_unquote_to_bytes(bits
[i
]).decode(encoding
, errors
))
2402 def compat_urllib_parse_unquote_plus(string
, encoding
='utf-8', errors
='replace'):
2403 """Like unquote(), but also replace plus signs by spaces, as required for
2404 unquoting HTML form values.
2406 unquote_plus('%7e/abc+def') -> '~/abc def'
2408 string
= string
.replace('+', ' ')
2409 return compat_urllib_parse_unquote(string
, encoding
, errors
)
2412 from urllib
.parse
import urlencode
as compat_urllib_parse_urlencode
2413 except ImportError: # Python 2
2414 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2415 # Possible solutions are to either port it from python 3 with all
2416 # the friends or manually ensure input query contains only byte strings.
2417 # We will stick with latter thus recursively encoding the whole query.
2418 def compat_urllib_parse_urlencode(query
, doseq
=0, encoding
='utf-8'):
2420 if isinstance(e
, dict):
2422 elif isinstance(e
, (list, tuple,)):
2423 list_e
= encode_list(e
)
2424 e
= tuple(list_e
) if isinstance(e
, tuple) else list_e
2425 elif isinstance(e
, compat_str
):
2426 e
= e
.encode(encoding
)
2430 return dict((encode_elem(k
), encode_elem(v
)) for k
, v
in d
.items())
2433 return [encode_elem(e
) for e
in l
]
2435 return compat_urllib_parse
.urlencode(encode_elem(query
), doseq
=doseq
)
2438 from urllib
.request
import DataHandler
as compat_urllib_request_DataHandler
2439 except ImportError: # Python < 3.4
2440 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2441 class compat_urllib_request_DataHandler(compat_urllib_request
.BaseHandler
):
2442 def data_open(self
, req
):
2443 # data URLs as specified in RFC 2397.
2445 # ignores POSTed data
2448 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2449 # mediatype := [ type "/" subtype ] *( ";" parameter )
2451 # parameter := attribute "=" value
2452 url
= req
.get_full_url()
2454 scheme
, data
= url
.split(':', 1)
2455 mediatype
, data
= data
.split(',', 1)
2457 # even base64 encoded data URLs might be quoted so unquote in any case:
2458 data
= compat_urllib_parse_unquote_to_bytes(data
)
2459 if mediatype
.endswith(';base64'):
2460 data
= binascii
.a2b_base64(data
)
2461 mediatype
= mediatype
[:-7]
2464 mediatype
= 'text/plain;charset=US-ASCII'
2466 headers
= email
.message_from_string(
2467 'Content-type: %s\nContent-length: %d\n' % (mediatype
, len(data
)))
2469 return compat_urllib_response
.addinfourl(io
.BytesIO(data
), headers
, url
)
2472 compat_basestring
= basestring
# Python 2
2474 compat_basestring
= str
2477 compat_chr
= unichr # Python 2
2482 from xml
.etree
.ElementTree
import ParseError
as compat_xml_parse_error
2483 except ImportError: # Python 2.6
2484 from xml
.parsers
.expat
import ExpatError
as compat_xml_parse_error
2487 etree
= xml
.etree
.ElementTree
2490 class _TreeBuilder(etree
.TreeBuilder
):
2491 def doctype(self
, name
, pubid
, system
):
2494 if sys
.version_info
[0] >= 3:
2495 def compat_etree_fromstring(text
):
2496 return etree
.XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder()))
2498 # python 2.x tries to encode unicode strings with ascii (see the
2499 # XMLParser._fixtext method)
2501 _etree_iter
= etree
.Element
.iter
2502 except AttributeError: # Python <=2.6
2503 def _etree_iter(root
):
2504 for el
in root
.findall('*'):
2506 for sub
in _etree_iter(el
):
2509 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2511 def _XML(text
, parser
=None):
2513 parser
= etree
.XMLParser(target
=_TreeBuilder())
2515 return parser
.close()
2517 def _element_factory(*args
, **kwargs
):
2518 el
= etree
.Element(*args
, **kwargs
)
2519 for k
, v
in el
.items():
2520 if isinstance(v
, bytes):
2521 el
.set(k
, v
.decode('utf-8'))
2524 def compat_etree_fromstring(text
):
2525 doc
= _XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder(element_factory
=_element_factory
)))
2526 for el
in _etree_iter(doc
):
2527 if el
.text
is not None and isinstance(el
.text
, bytes):
2528 el
.text
= el
.text
.decode('utf-8')
2531 if sys
.version_info
< (2, 7):
2532 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2533 # .//node does not match if a node is a direct child of . !
2534 def compat_xpath(xpath
):
2535 if isinstance(xpath
, compat_str
):
2536 xpath
= xpath
.encode('ascii')
2539 compat_xpath
= lambda xpath
: xpath
2542 from urllib
.parse
import parse_qs
as compat_parse_qs
2543 except ImportError: # Python 2
2544 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2545 # Python 2's version is apparently totally broken
2547 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
2548 encoding
='utf-8', errors
='replace'):
2549 qs
, _coerce_result
= qs
, compat_str
2550 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
2552 for name_value
in pairs
:
2553 if not name_value
and not strict_parsing
:
2555 nv
= name_value
.split('=', 1)
2558 raise ValueError('bad query field: %r' % (name_value
,))
2559 # Handle case of a control-name with no equal sign
2560 if keep_blank_values
:
2564 if len(nv
[1]) or keep_blank_values
:
2565 name
= nv
[0].replace('+', ' ')
2566 name
= compat_urllib_parse_unquote(
2567 name
, encoding
=encoding
, errors
=errors
)
2568 name
= _coerce_result(name
)
2569 value
= nv
[1].replace('+', ' ')
2570 value
= compat_urllib_parse_unquote(
2571 value
, encoding
=encoding
, errors
=errors
)
2572 value
= _coerce_result(value
)
2573 r
.append((name
, value
))
2576 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
2577 encoding
='utf-8', errors
='replace'):
2579 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
2580 encoding
=encoding
, errors
=errors
)
2581 for name
, value
in pairs
:
2582 if name
in parsed_result
:
2583 parsed_result
[name
].append(value
)
2585 parsed_result
[name
] = [value
]
2586 return parsed_result
2589 from shlex
import quote
as compat_shlex_quote
2590 except ImportError: # Python < 3.3
2591 def compat_shlex_quote(s
):
2592 if re
.match(r
'^[-_\w./]+$', s
):
2595 return "'" + s
.replace("'", "'\"'\"'") + "'"
2599 args
= shlex
.split('äøę')
2600 assert (isinstance(args
, list) and
2601 isinstance(args
[0], compat_str
) and
2602 args
[0] == 'äøę')
2603 compat_shlex_split
= shlex
.split
2604 except (AssertionError, UnicodeEncodeError):
2605 # Working around shlex issue with unicode strings on some python 2
2606 # versions (see http://bugs.python.org/issue1548891)
2607 def compat_shlex_split(s
, comments
=False, posix
=True):
2608 if isinstance(s
, compat_str
):
2609 s
= s
.encode('utf-8')
2610 return list(map(lambda s
: s
.decode('utf-8'), shlex
.split(s
, comments
, posix
)))
2620 compat_os_name
= os
._name
if os
.name
== 'java' else os
.name
2623 if sys
.version_info
>= (3, 0):
2624 compat_getenv
= os
.getenv
2625 compat_expanduser
= os
.path
.expanduser
2627 def compat_setenv(key
, value
, env
=os
.environ
):
2630 # Environment variables should be decoded with filesystem encoding.
2631 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2633 def compat_getenv(key
, default
=None):
2634 from .utils
import get_filesystem_encoding
2635 env
= os
.getenv(key
, default
)
2637 env
= env
.decode(get_filesystem_encoding())
2640 def compat_setenv(key
, value
, env
=os
.environ
):
2642 from .utils
import get_filesystem_encoding
2643 return v
.encode(get_filesystem_encoding()) if isinstance(v
, compat_str
) else v
2644 env
[encode(key
)] = encode(value
)
2646 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2647 # environment variables with filesystem encoding. We will work around this by
2648 # providing adjusted implementations.
2649 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2650 # for different platforms with correct environment variables decoding.
2652 if compat_os_name
== 'posix':
2653 def compat_expanduser(path
):
2654 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2656 if not path
.startswith('~'):
2658 i
= path
.find('/', 1)
2662 if 'HOME' not in os
.environ
:
2664 userhome
= pwd
.getpwuid(os
.getuid()).pw_dir
2666 userhome
= compat_getenv('HOME')
2670 pwent
= pwd
.getpwnam(path
[1:i
])
2673 userhome
= pwent
.pw_dir
2674 userhome
= userhome
.rstrip('/')
2675 return (userhome
+ path
[i
:]) or '/'
2676 elif compat_os_name
== 'nt' or compat_os_name
== 'ce':
2677 def compat_expanduser(path
):
2678 """Expand ~ and ~user constructs.
2680 If user or $HOME is unknown, do nothing."""
2684 while i
< n
and path
[i
] not in '/\\':
2687 if 'HOME' in os
.environ
:
2688 userhome
= compat_getenv('HOME')
2689 elif 'USERPROFILE' in os
.environ
:
2690 userhome
= compat_getenv('USERPROFILE')
2691 elif 'HOMEPATH' not in os
.environ
:
2695 drive
= compat_getenv('HOMEDRIVE')
2698 userhome
= os
.path
.join(drive
, compat_getenv('HOMEPATH'))
2701 userhome
= os
.path
.join(os
.path
.dirname(userhome
), path
[1:i
])
2703 return userhome
+ path
[i
:]
2705 compat_expanduser
= os
.path
.expanduser
2708 if sys
.version_info
< (3, 0):
2709 def compat_print(s
):
2710 from .utils
import preferredencoding
2711 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
2713 def compat_print(s
):
2714 assert isinstance(s
, compat_str
)
2718 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
2719 def compat_getpass(prompt
, *args
, **kwargs
):
2720 if isinstance(prompt
, compat_str
):
2721 from .utils
import preferredencoding
2722 prompt
= prompt
.encode(preferredencoding())
2723 return getpass
.getpass(prompt
, *args
, **kwargs
)
2725 compat_getpass
= getpass
.getpass
2728 compat_input
= raw_input
2729 except NameError: # Python 3
2730 compat_input
= input
2732 # Python < 2.6.5 require kwargs to be bytes
2736 _testfunc(**{'x': 0})
2738 def compat_kwargs(kwargs
):
2739 return dict((bytes(k
), v
) for k
, v
in kwargs
.items())
2741 compat_kwargs
= lambda kwargs
: kwargs
2744 if sys
.version_info
< (2, 7):
2745 def compat_socket_create_connection(address
, timeout
, source_address
=None):
2746 host
, port
= address
2748 for res
in socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
):
2749 af
, socktype
, proto
, canonname
, sa
= res
2752 sock
= socket
.socket(af
, socktype
, proto
)
2753 sock
.settimeout(timeout
)
2755 sock
.bind(source_address
)
2758 except socket
.error
as _
:
2760 if sock
is not None:
2765 raise socket
.error('getaddrinfo returns an empty list')
2767 compat_socket_create_connection
= socket
.create_connection
2770 # Fix https://github.com/rg3/youtube-dl/issues/4223
2771 # See http://bugs.python.org/issue9161 for what is broken
2772 def workaround_optparse_bug9161():
2773 op
= optparse
.OptionParser()
2774 og
= optparse
.OptionGroup(op
, 'foo')
2778 real_add_option
= optparse
.OptionGroup
.add_option
2780 def _compat_add_option(self
, *args
, **kwargs
):
2782 v
.encode('ascii', 'replace') if isinstance(v
, compat_str
)
2784 bargs
= [enc(a
) for a
in args
]
2786 (k
, enc(v
)) for k
, v
in kwargs
.items())
2787 return real_add_option(self
, *bargs
, **bkwargs
)
2788 optparse
.OptionGroup
.add_option
= _compat_add_option
2790 if hasattr(shutil
, 'get_terminal_size'): # Python >= 3.3
2791 compat_get_terminal_size
= shutil
.get_terminal_size
2793 _terminal_size
= collections
.namedtuple('terminal_size', ['columns', 'lines'])
2795 def compat_get_terminal_size(fallback
=(80, 24)):
2796 columns
= compat_getenv('COLUMNS')
2798 columns
= int(columns
)
2801 lines
= compat_getenv('LINES')
2807 if columns
is None or lines
is None or columns
<= 0 or lines
<= 0:
2809 sp
= subprocess
.Popen(
2811 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2812 out
, err
= sp
.communicate()
2813 _lines
, _columns
= map(int, out
.split())
2815 _columns
, _lines
= _terminal_size(*fallback
)
2817 if columns
is None or columns
<= 0:
2819 if lines
is None or lines
<= 0:
2821 return _terminal_size(columns
, lines
)
2824 itertools
.count(start
=0, step
=1)
2825 compat_itertools_count
= itertools
.count
2826 except TypeError: # Python 2.6
2827 def compat_itertools_count(start
=0, step
=1):
2833 if sys
.version_info
>= (3, 0):
2834 from tokenize
import tokenize
as compat_tokenize_tokenize
2836 from tokenize
import generate_tokens
as compat_tokenize_tokenize
2840 struct
.pack('!I', 0)
2842 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2843 # See https://bugs.python.org/issue19099
2844 def compat_struct_pack(spec
, *args
):
2845 if isinstance(spec
, compat_str
):
2846 spec
= spec
.encode('ascii')
2847 return struct
.pack(spec
, *args
)
2849 def compat_struct_unpack(spec
, *args
):
2850 if isinstance(spec
, compat_str
):
2851 spec
= spec
.encode('ascii')
2852 return struct
.unpack(spec
, *args
)
2854 compat_struct_pack
= struct
.pack
2855 compat_struct_unpack
= struct
.unpack
2859 'compat_HTMLParser',
2861 'compat_basestring',
2865 'compat_etree_fromstring',
2866 'compat_expanduser',
2867 'compat_get_terminal_size',
2870 'compat_html_entities',
2871 'compat_html_entities_html5',
2872 'compat_http_client',
2873 'compat_http_server',
2875 'compat_itertools_count',
2882 'compat_shlex_quote',
2883 'compat_shlex_split',
2884 'compat_socket_create_connection',
2886 'compat_struct_pack',
2887 'compat_struct_unpack',
2888 'compat_subprocess_get_DEVNULL',
2889 'compat_tokenize_tokenize',
2890 'compat_urllib_error',
2891 'compat_urllib_parse',
2892 'compat_urllib_parse_unquote',
2893 'compat_urllib_parse_unquote_plus',
2894 'compat_urllib_parse_unquote_to_bytes',
2895 'compat_urllib_parse_urlencode',
2896 'compat_urllib_parse_urlparse',
2897 'compat_urllib_request',
2898 'compat_urllib_request_DataHandler',
2899 'compat_urllib_response',
2901 'compat_urlretrieve',
2902 'compat_xml_parse_error',
2904 'workaround_optparse_bug9161',