2 from __future__
import unicode_literals
19 import xml
.etree
.ElementTree
23 import urllib
.request
as compat_urllib_request
24 except ImportError: # Python 2
25 import urllib2
as compat_urllib_request
28 import urllib
.error
as compat_urllib_error
29 except ImportError: # Python 2
30 import urllib2
as compat_urllib_error
33 import urllib
.parse
as compat_urllib_parse
34 except ImportError: # Python 2
35 import urllib
as compat_urllib_parse
38 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
39 except ImportError: # Python 2
40 from urlparse
import urlparse
as compat_urllib_parse_urlparse
43 import urllib
.parse
as compat_urlparse
44 except ImportError: # Python 2
45 import urlparse
as compat_urlparse
48 import urllib
.response
as compat_urllib_response
49 except ImportError: # Python 2
50 import urllib
as compat_urllib_response
53 import http
.cookiejar
as compat_cookiejar
54 except ImportError: # Python 2
55 import cookielib
as compat_cookiejar
58 import http
.cookies
as compat_cookies
59 except ImportError: # Python 2
60 import Cookie
as compat_cookies
63 import html
.entities
as compat_html_entities
64 except ImportError: # Python 2
65 import htmlentitydefs
as compat_html_entities
68 compat_html_entities_html5
= compat_html_entities
.html5
69 except AttributeError:
70 # Copied from CPython 3.5.1 html/entities.py
71 compat_html_entities_html5
= {
80 'acE;': '\u223e\u0333',
100 'alefsym;': '\u2135',
115 'andslope;': '\u2a58',
121 'angmsdaa;': '\u29a8',
122 'angmsdab;': '\u29a9',
123 'angmsdac;': '\u29aa',
124 'angmsdad;': '\u29ab',
125 'angmsdae;': '\u29ac',
126 'angmsdaf;': '\u29ad',
127 'angmsdag;': '\u29ae',
128 'angmsdah;': '\u29af',
130 'angrtvb;': '\u22be',
131 'angrtvbd;': '\u299d',
134 'angzarr;': '\u237c',
137 'Aopf;': '\U0001d538',
138 'aopf;': '\U0001d552',
145 'ApplyFunction;': '\u2061',
147 'approxeq;': '\u224a',
152 'Ascr;': '\U0001d49c',
153 'ascr;': '\U0001d4b6',
157 'asympeq;': '\u224d',
166 'awconint;': '\u2233',
168 'backcong;': '\u224c',
169 'backepsilon;': '\u03f6',
170 'backprime;': '\u2035',
171 'backsim;': '\u223d',
172 'backsimeq;': '\u22cd',
173 'Backslash;': '\u2216',
178 'barwedge;': '\u2305',
180 'bbrktbrk;': '\u23b6',
186 'Because;': '\u2235',
187 'because;': '\u2235',
188 'bemptyv;': '\u29b0',
191 'Bernoullis;': '\u212c',
195 'between;': '\u226c',
196 'Bfr;': '\U0001d505',
197 'bfr;': '\U0001d51f',
199 'bigcirc;': '\u25ef',
201 'bigodot;': '\u2a00',
202 'bigoplus;': '\u2a01',
203 'bigotimes;': '\u2a02',
204 'bigsqcup;': '\u2a06',
205 'bigstar;': '\u2605',
206 'bigtriangledown;': '\u25bd',
207 'bigtriangleup;': '\u25b3',
208 'biguplus;': '\u2a04',
210 'bigwedge;': '\u22c0',
212 'blacklozenge;': '\u29eb',
213 'blacksquare;': '\u25aa',
214 'blacktriangle;': '\u25b4',
215 'blacktriangledown;': '\u25be',
216 'blacktriangleleft;': '\u25c2',
217 'blacktriangleright;': '\u25b8',
224 'bnequiv;': '\u2261\u20e5',
227 'Bopf;': '\U0001d539',
228 'bopf;': '\U0001d553',
251 'boxminus;': '\u229f',
252 'boxplus;': '\u229e',
253 'boxtimes;': '\u22a0',
282 'bscr;': '\U0001d4b7',
288 'bsolhsub;': '\u27c8',
301 'capbrcup;': '\u2a49',
305 'CapitalDifferentialD;': '\u2145',
306 'caps;': '\u2229\ufe00',
309 'Cayleys;': '\u212d',
319 'Cconint;': '\u2230',
321 'ccupssm;': '\u2a50',
327 'cemptyv;': '\u29b2',
330 'CenterDot;': '\xb7',
331 'centerdot;': '\xb7',
333 'cfr;': '\U0001d520',
337 'checkmark;': '\u2713',
343 'circlearrowleft;': '\u21ba',
344 'circlearrowright;': '\u21bb',
345 'circledast;': '\u229b',
346 'circledcirc;': '\u229a',
347 'circleddash;': '\u229d',
348 'CircleDot;': '\u2299',
350 'circledS;': '\u24c8',
351 'CircleMinus;': '\u2296',
352 'CirclePlus;': '\u2295',
353 'CircleTimes;': '\u2297',
356 'cirfnint;': '\u2a10',
358 'cirscir;': '\u29c2',
359 'ClockwiseContourIntegral;': '\u2232',
360 'CloseCurlyDoubleQuote;': '\u201d',
361 'CloseCurlyQuote;': '\u2019',
363 'clubsuit;': '\u2663',
368 'coloneq;': '\u2254',
373 'complement;': '\u2201',
374 'complexes;': '\u2102',
376 'congdot;': '\u2a6d',
377 'Congruent;': '\u2261',
380 'ContourIntegral;': '\u222e',
382 'copf;': '\U0001d554',
384 'Coproduct;': '\u2210',
390 'CounterClockwiseContourIntegral;': '\u2233',
394 'Cscr;': '\U0001d49e',
395 'cscr;': '\U0001d4b8',
401 'cudarrl;': '\u2938',
402 'cudarrr;': '\u2935',
406 'cularrp;': '\u293d',
409 'cupbrcap;': '\u2a48',
415 'cups;': '\u222a\ufe00',
417 'curarrm;': '\u293c',
418 'curlyeqprec;': '\u22de',
419 'curlyeqsucc;': '\u22df',
420 'curlyvee;': '\u22ce',
421 'curlywedge;': '\u22cf',
424 'curvearrowleft;': '\u21b6',
425 'curvearrowright;': '\u21b7',
428 'cwconint;': '\u2232',
440 'dbkarow;': '\u290f',
448 'ddagger;': '\u2021',
450 'DDotrahd;': '\u2911',
451 'ddotseq;': '\u2a77',
457 'demptyv;': '\u29b1',
459 'Dfr;': '\U0001d507',
460 'dfr;': '\U0001d521',
464 'DiacriticalAcute;': '\xb4',
465 'DiacriticalDot;': '\u02d9',
466 'DiacriticalDoubleAcute;': '\u02dd',
467 'DiacriticalGrave;': '`',
468 'DiacriticalTilde;': '\u02dc',
470 'Diamond;': '\u22c4',
471 'diamond;': '\u22c4',
472 'diamondsuit;': '\u2666',
475 'DifferentialD;': '\u2146',
476 'digamma;': '\u03dd',
481 'divideontimes;': '\u22c7',
488 'Dopf;': '\U0001d53b',
489 'dopf;': '\U0001d555',
494 'doteqdot;': '\u2251',
495 'DotEqual;': '\u2250',
496 'dotminus;': '\u2238',
497 'dotplus;': '\u2214',
498 'dotsquare;': '\u22a1',
499 'doublebarwedge;': '\u2306',
500 'DoubleContourIntegral;': '\u222f',
501 'DoubleDot;': '\xa8',
502 'DoubleDownArrow;': '\u21d3',
503 'DoubleLeftArrow;': '\u21d0',
504 'DoubleLeftRightArrow;': '\u21d4',
505 'DoubleLeftTee;': '\u2ae4',
506 'DoubleLongLeftArrow;': '\u27f8',
507 'DoubleLongLeftRightArrow;': '\u27fa',
508 'DoubleLongRightArrow;': '\u27f9',
509 'DoubleRightArrow;': '\u21d2',
510 'DoubleRightTee;': '\u22a8',
511 'DoubleUpArrow;': '\u21d1',
512 'DoubleUpDownArrow;': '\u21d5',
513 'DoubleVerticalBar;': '\u2225',
514 'DownArrow;': '\u2193',
515 'Downarrow;': '\u21d3',
516 'downarrow;': '\u2193',
517 'DownArrowBar;': '\u2913',
518 'DownArrowUpArrow;': '\u21f5',
519 'DownBreve;': '\u0311',
520 'downdownarrows;': '\u21ca',
521 'downharpoonleft;': '\u21c3',
522 'downharpoonright;': '\u21c2',
523 'DownLeftRightVector;': '\u2950',
524 'DownLeftTeeVector;': '\u295e',
525 'DownLeftVector;': '\u21bd',
526 'DownLeftVectorBar;': '\u2956',
527 'DownRightTeeVector;': '\u295f',
528 'DownRightVector;': '\u21c1',
529 'DownRightVectorBar;': '\u2957',
530 'DownTee;': '\u22a4',
531 'DownTeeArrow;': '\u21a7',
532 'drbkarow;': '\u2910',
535 'Dscr;': '\U0001d49f',
536 'dscr;': '\U0001d4b9',
547 'dwangle;': '\u29a6',
550 'dzigrarr;': '\u27ff',
572 'Efr;': '\U0001d508',
573 'efr;': '\U0001d522',
582 'Element;': '\u2208',
583 'elinters;': '\u23e7',
590 'emptyset;': '\u2205',
591 'EmptySmallSquare;': '\u25fb',
593 'EmptyVerySmallSquare;': '\u25ab',
602 'Eopf;': '\U0001d53c',
603 'eopf;': '\U0001d556',
608 'Epsilon;': '\u0395',
609 'epsilon;': '\u03b5',
612 'eqcolon;': '\u2255',
614 'eqslantgtr;': '\u2a96',
615 'eqslantless;': '\u2a95',
618 'EqualTilde;': '\u2242',
620 'Equilibrium;': '\u21cc',
622 'equivDD;': '\u2a78',
623 'eqvparsl;': '\u29e5',
645 'expectation;': '\u2130',
646 'ExponentialE;': '\u2147',
647 'exponentiale;': '\u2147',
648 'fallingdotseq;': '\u2252',
655 'Ffr;': '\U0001d509',
656 'ffr;': '\U0001d523',
658 'FilledSmallSquare;': '\u25fc',
659 'FilledVerySmallSquare;': '\u25aa',
665 'Fopf;': '\U0001d53d',
666 'fopf;': '\U0001d557',
671 'Fouriertrf;': '\u2131',
672 'fpartint;': '\u2a0d',
694 'fscr;': '\U0001d4bb',
716 'geqslant;': '\u2a7e',
720 'gesdoto;': '\u2a82',
721 'gesdotol;': '\u2a84',
722 'gesl;': '\u22db\ufe00',
724 'Gfr;': '\U0001d50a',
725 'gfr;': '\U0001d524',
737 'gnapprox;': '\u2a8a',
743 'Gopf;': '\U0001d53e',
744 'gopf;': '\U0001d558',
746 'GreaterEqual;': '\u2265',
747 'GreaterEqualLess;': '\u22db',
748 'GreaterFullEqual;': '\u2267',
749 'GreaterGreater;': '\u2aa2',
750 'GreaterLess;': '\u2277',
751 'GreaterSlantEqual;': '\u2a7e',
752 'GreaterTilde;': '\u2273',
753 'Gscr;': '\U0001d4a2',
767 'gtquest;': '\u2a7c',
768 'gtrapprox;': '\u2a86',
771 'gtreqless;': '\u22db',
772 'gtreqqless;': '\u2a8c',
773 'gtrless;': '\u2277',
775 'gvertneqq;': '\u2269\ufe00',
776 'gvnE;': '\u2269\ufe00',
785 'harrcir;': '\u2948',
792 'heartsuit;': '\u2665',
796 'hfr;': '\U0001d525',
797 'HilbertSpace;': '\u210b',
798 'hksearow;': '\u2925',
799 'hkswarow;': '\u2926',
802 'hookleftarrow;': '\u21a9',
803 'hookrightarrow;': '\u21aa',
805 'hopf;': '\U0001d559',
807 'HorizontalLine;': '\u2500',
809 'hscr;': '\U0001d4bd',
813 'HumpDownHump;': '\u224e',
814 'HumpEqual;': '\u224f',
835 'ifr;': '\U0001d526',
851 'ImaginaryI;': '\u2148',
852 'imagline;': '\u2110',
853 'imagpart;': '\u2111',
857 'Implies;': '\u21d2',
861 'infintie;': '\u29dd',
866 'integers;': '\u2124',
867 'Integral;': '\u222b',
868 'intercal;': '\u22ba',
869 'Intersection;': '\u22c2',
870 'intlarhk;': '\u2a17',
871 'intprod;': '\u2a3c',
872 'InvisibleComma;': '\u2063',
873 'InvisibleTimes;': '\u2062',
878 'Iopf;': '\U0001d540',
879 'iopf;': '\U0001d55a',
886 'iscr;': '\U0001d4be',
888 'isindot;': '\u22f5',
906 'Jfr;': '\U0001d50d',
907 'jfr;': '\U0001d527',
909 'Jopf;': '\U0001d541',
910 'jopf;': '\U0001d55b',
911 'Jscr;': '\U0001d4a5',
912 'jscr;': '\U0001d4bf',
924 'Kfr;': '\U0001d50e',
925 'kfr;': '\U0001d528',
931 'Kopf;': '\U0001d542',
932 'kopf;': '\U0001d55c',
933 'Kscr;': '\U0001d4a6',
934 'kscr;': '\U0001d4c0',
938 'laemptyv;': '\u29b4',
947 'Laplacetrf;': '\u2112',
954 'larrbfs;': '\u291f',
959 'larrsim;': '\u2973',
965 'lates;': '\u2aad\ufe00',
972 'lbrksld;': '\u298f',
973 'lbrkslu;': '\u298d',
985 'ldrdhar;': '\u2967',
986 'ldrushar;': '\u294b',
990 'LeftAngleBracket;': '\u27e8',
991 'LeftArrow;': '\u2190',
992 'Leftarrow;': '\u21d0',
993 'leftarrow;': '\u2190',
994 'LeftArrowBar;': '\u21e4',
995 'LeftArrowRightArrow;': '\u21c6',
996 'leftarrowtail;': '\u21a2',
997 'LeftCeiling;': '\u2308',
998 'LeftDoubleBracket;': '\u27e6',
999 'LeftDownTeeVector;': '\u2961',
1000 'LeftDownVector;': '\u21c3',
1001 'LeftDownVectorBar;': '\u2959',
1002 'LeftFloor;': '\u230a',
1003 'leftharpoondown;': '\u21bd',
1004 'leftharpoonup;': '\u21bc',
1005 'leftleftarrows;': '\u21c7',
1006 'LeftRightArrow;': '\u2194',
1007 'Leftrightarrow;': '\u21d4',
1008 'leftrightarrow;': '\u2194',
1009 'leftrightarrows;': '\u21c6',
1010 'leftrightharpoons;': '\u21cb',
1011 'leftrightsquigarrow;': '\u21ad',
1012 'LeftRightVector;': '\u294e',
1013 'LeftTee;': '\u22a3',
1014 'LeftTeeArrow;': '\u21a4',
1015 'LeftTeeVector;': '\u295a',
1016 'leftthreetimes;': '\u22cb',
1017 'LeftTriangle;': '\u22b2',
1018 'LeftTriangleBar;': '\u29cf',
1019 'LeftTriangleEqual;': '\u22b4',
1020 'LeftUpDownVector;': '\u2951',
1021 'LeftUpTeeVector;': '\u2960',
1022 'LeftUpVector;': '\u21bf',
1023 'LeftUpVectorBar;': '\u2958',
1024 'LeftVector;': '\u21bc',
1025 'LeftVectorBar;': '\u2952',
1030 'leqslant;': '\u2a7d',
1033 'lesdot;': '\u2a7f',
1034 'lesdoto;': '\u2a81',
1035 'lesdotor;': '\u2a83',
1036 'lesg;': '\u22da\ufe00',
1037 'lesges;': '\u2a93',
1038 'lessapprox;': '\u2a85',
1039 'lessdot;': '\u22d6',
1040 'lesseqgtr;': '\u22da',
1041 'lesseqqgtr;': '\u2a8b',
1042 'LessEqualGreater;': '\u22da',
1043 'LessFullEqual;': '\u2266',
1044 'LessGreater;': '\u2276',
1045 'lessgtr;': '\u2276',
1046 'LessLess;': '\u2aa1',
1047 'lesssim;': '\u2272',
1048 'LessSlantEqual;': '\u2a7d',
1049 'LessTilde;': '\u2272',
1050 'lfisht;': '\u297c',
1051 'lfloor;': '\u230a',
1052 'Lfr;': '\U0001d50f',
1053 'lfr;': '\U0001d529',
1059 'lharul;': '\u296a',
1066 'llcorner;': '\u231e',
1067 'Lleftarrow;': '\u21da',
1068 'llhard;': '\u296b',
1070 'Lmidot;': '\u013f',
1071 'lmidot;': '\u0140',
1072 'lmoust;': '\u23b0',
1073 'lmoustache;': '\u23b0',
1075 'lnapprox;': '\u2a89',
1084 'LongLeftArrow;': '\u27f5',
1085 'Longleftarrow;': '\u27f8',
1086 'longleftarrow;': '\u27f5',
1087 'LongLeftRightArrow;': '\u27f7',
1088 'Longleftrightarrow;': '\u27fa',
1089 'longleftrightarrow;': '\u27f7',
1090 'longmapsto;': '\u27fc',
1091 'LongRightArrow;': '\u27f6',
1092 'Longrightarrow;': '\u27f9',
1093 'longrightarrow;': '\u27f6',
1094 'looparrowleft;': '\u21ab',
1095 'looparrowright;': '\u21ac',
1097 'Lopf;': '\U0001d543',
1098 'lopf;': '\U0001d55d',
1099 'loplus;': '\u2a2d',
1100 'lotimes;': '\u2a34',
1101 'lowast;': '\u2217',
1103 'LowerLeftArrow;': '\u2199',
1104 'LowerRightArrow;': '\u2198',
1106 'lozenge;': '\u25ca',
1109 'lparlt;': '\u2993',
1111 'lrcorner;': '\u231f',
1113 'lrhard;': '\u296d',
1116 'lsaquo;': '\u2039',
1118 'lscr;': '\U0001d4c1',
1126 'lsquor;': '\u201a',
1127 'Lstrok;': '\u0141',
1128 'lstrok;': '\u0142',
1137 'lthree;': '\u22cb',
1138 'ltimes;': '\u22c9',
1139 'ltlarr;': '\u2976',
1140 'ltquest;': '\u2a7b',
1144 'ltrPar;': '\u2996',
1145 'lurdshar;': '\u294a',
1146 'luruhar;': '\u2966',
1147 'lvertneqq;': '\u2268\ufe00',
1148 'lvnE;': '\u2268\ufe00',
1153 'maltese;': '\u2720',
1156 'mapsto;': '\u21a6',
1157 'mapstodown;': '\u21a7',
1158 'mapstoleft;': '\u21a4',
1159 'mapstoup;': '\u21a5',
1160 'marker;': '\u25ae',
1161 'mcomma;': '\u2a29',
1166 'measuredangle;': '\u2221',
1167 'MediumSpace;': '\u205f',
1168 'Mellintrf;': '\u2133',
1169 'Mfr;': '\U0001d510',
1170 'mfr;': '\U0001d52a',
1176 'midcir;': '\u2af0',
1180 'minusb;': '\u229f',
1181 'minusd;': '\u2238',
1182 'minusdu;': '\u2a2a',
1183 'MinusPlus;': '\u2213',
1186 'mnplus;': '\u2213',
1187 'models;': '\u22a7',
1188 'Mopf;': '\U0001d544',
1189 'mopf;': '\U0001d55e',
1192 'mscr;': '\U0001d4c2',
1193 'mstpos;': '\u223e',
1196 'multimap;': '\u22b8',
1199 'Nacute;': '\u0143',
1200 'nacute;': '\u0144',
1201 'nang;': '\u2220\u20d2',
1203 'napE;': '\u2a70\u0338',
1204 'napid;': '\u224b\u0338',
1206 'napprox;': '\u2249',
1208 'natural;': '\u266e',
1209 'naturals;': '\u2115',
1212 'nbump;': '\u224e\u0338',
1213 'nbumpe;': '\u224f\u0338',
1215 'Ncaron;': '\u0147',
1216 'ncaron;': '\u0148',
1217 'Ncedil;': '\u0145',
1218 'ncedil;': '\u0146',
1220 'ncongdot;': '\u2a6d\u0338',
1226 'nearhk;': '\u2924',
1229 'nearrow;': '\u2197',
1230 'nedot;': '\u2250\u0338',
1231 'NegativeMediumSpace;': '\u200b',
1232 'NegativeThickSpace;': '\u200b',
1233 'NegativeThinSpace;': '\u200b',
1234 'NegativeVeryThinSpace;': '\u200b',
1235 'nequiv;': '\u2262',
1236 'nesear;': '\u2928',
1237 'nesim;': '\u2242\u0338',
1238 'NestedGreaterGreater;': '\u226b',
1239 'NestedLessLess;': '\u226a',
1241 'nexist;': '\u2204',
1242 'nexists;': '\u2204',
1243 'Nfr;': '\U0001d511',
1244 'nfr;': '\U0001d52b',
1245 'ngE;': '\u2267\u0338',
1248 'ngeqq;': '\u2267\u0338',
1249 'ngeqslant;': '\u2a7e\u0338',
1250 'nges;': '\u2a7e\u0338',
1251 'nGg;': '\u22d9\u0338',
1253 'nGt;': '\u226b\u20d2',
1256 'nGtv;': '\u226b\u0338',
1269 'nlE;': '\u2266\u0338',
1271 'nLeftarrow;': '\u21cd',
1272 'nleftarrow;': '\u219a',
1273 'nLeftrightarrow;': '\u21ce',
1274 'nleftrightarrow;': '\u21ae',
1276 'nleqq;': '\u2266\u0338',
1277 'nleqslant;': '\u2a7d\u0338',
1278 'nles;': '\u2a7d\u0338',
1280 'nLl;': '\u22d8\u0338',
1282 'nLt;': '\u226a\u20d2',
1285 'nltrie;': '\u22ec',
1286 'nLtv;': '\u226a\u0338',
1288 'NoBreak;': '\u2060',
1289 'NonBreakingSpace;': '\xa0',
1291 'nopf;': '\U0001d55f',
1295 'NotCongruent;': '\u2262',
1296 'NotCupCap;': '\u226d',
1297 'NotDoubleVerticalBar;': '\u2226',
1298 'NotElement;': '\u2209',
1299 'NotEqual;': '\u2260',
1300 'NotEqualTilde;': '\u2242\u0338',
1301 'NotExists;': '\u2204',
1302 'NotGreater;': '\u226f',
1303 'NotGreaterEqual;': '\u2271',
1304 'NotGreaterFullEqual;': '\u2267\u0338',
1305 'NotGreaterGreater;': '\u226b\u0338',
1306 'NotGreaterLess;': '\u2279',
1307 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1308 'NotGreaterTilde;': '\u2275',
1309 'NotHumpDownHump;': '\u224e\u0338',
1310 'NotHumpEqual;': '\u224f\u0338',
1312 'notindot;': '\u22f5\u0338',
1313 'notinE;': '\u22f9\u0338',
1314 'notinva;': '\u2209',
1315 'notinvb;': '\u22f7',
1316 'notinvc;': '\u22f6',
1317 'NotLeftTriangle;': '\u22ea',
1318 'NotLeftTriangleBar;': '\u29cf\u0338',
1319 'NotLeftTriangleEqual;': '\u22ec',
1320 'NotLess;': '\u226e',
1321 'NotLessEqual;': '\u2270',
1322 'NotLessGreater;': '\u2278',
1323 'NotLessLess;': '\u226a\u0338',
1324 'NotLessSlantEqual;': '\u2a7d\u0338',
1325 'NotLessTilde;': '\u2274',
1326 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1327 'NotNestedLessLess;': '\u2aa1\u0338',
1329 'notniva;': '\u220c',
1330 'notnivb;': '\u22fe',
1331 'notnivc;': '\u22fd',
1332 'NotPrecedes;': '\u2280',
1333 'NotPrecedesEqual;': '\u2aaf\u0338',
1334 'NotPrecedesSlantEqual;': '\u22e0',
1335 'NotReverseElement;': '\u220c',
1336 'NotRightTriangle;': '\u22eb',
1337 'NotRightTriangleBar;': '\u29d0\u0338',
1338 'NotRightTriangleEqual;': '\u22ed',
1339 'NotSquareSubset;': '\u228f\u0338',
1340 'NotSquareSubsetEqual;': '\u22e2',
1341 'NotSquareSuperset;': '\u2290\u0338',
1342 'NotSquareSupersetEqual;': '\u22e3',
1343 'NotSubset;': '\u2282\u20d2',
1344 'NotSubsetEqual;': '\u2288',
1345 'NotSucceeds;': '\u2281',
1346 'NotSucceedsEqual;': '\u2ab0\u0338',
1347 'NotSucceedsSlantEqual;': '\u22e1',
1348 'NotSucceedsTilde;': '\u227f\u0338',
1349 'NotSuperset;': '\u2283\u20d2',
1350 'NotSupersetEqual;': '\u2289',
1351 'NotTilde;': '\u2241',
1352 'NotTildeEqual;': '\u2244',
1353 'NotTildeFullEqual;': '\u2247',
1354 'NotTildeTilde;': '\u2249',
1355 'NotVerticalBar;': '\u2224',
1357 'nparallel;': '\u2226',
1358 'nparsl;': '\u2afd\u20e5',
1359 'npart;': '\u2202\u0338',
1360 'npolint;': '\u2a14',
1362 'nprcue;': '\u22e0',
1363 'npre;': '\u2aaf\u0338',
1365 'npreceq;': '\u2aaf\u0338',
1368 'nrarrc;': '\u2933\u0338',
1369 'nrarrw;': '\u219d\u0338',
1370 'nRightarrow;': '\u21cf',
1371 'nrightarrow;': '\u219b',
1373 'nrtrie;': '\u22ed',
1375 'nsccue;': '\u22e1',
1376 'nsce;': '\u2ab0\u0338',
1377 'Nscr;': '\U0001d4a9',
1378 'nscr;': '\U0001d4c3',
1379 'nshortmid;': '\u2224',
1380 'nshortparallel;': '\u2226',
1383 'nsimeq;': '\u2244',
1386 'nsqsube;': '\u22e2',
1387 'nsqsupe;': '\u22e3',
1389 'nsubE;': '\u2ac5\u0338',
1391 'nsubset;': '\u2282\u20d2',
1392 'nsubseteq;': '\u2288',
1393 'nsubseteqq;': '\u2ac5\u0338',
1395 'nsucceq;': '\u2ab0\u0338',
1397 'nsupE;': '\u2ac6\u0338',
1399 'nsupset;': '\u2283\u20d2',
1400 'nsupseteq;': '\u2289',
1401 'nsupseteqq;': '\u2ac6\u0338',
1408 'ntriangleleft;': '\u22ea',
1409 'ntrianglelefteq;': '\u22ec',
1410 'ntriangleright;': '\u22eb',
1411 'ntrianglerighteq;': '\u22ed',
1415 'numero;': '\u2116',
1417 'nvap;': '\u224d\u20d2',
1418 'nVDash;': '\u22af',
1419 'nVdash;': '\u22ae',
1420 'nvDash;': '\u22ad',
1421 'nvdash;': '\u22ac',
1422 'nvge;': '\u2265\u20d2',
1424 'nvHarr;': '\u2904',
1425 'nvinfin;': '\u29de',
1426 'nvlArr;': '\u2902',
1427 'nvle;': '\u2264\u20d2',
1429 'nvltrie;': '\u22b4\u20d2',
1430 'nvrArr;': '\u2903',
1431 'nvrtrie;': '\u22b5\u20d2',
1432 'nvsim;': '\u223c\u20d2',
1433 'nwarhk;': '\u2923',
1436 'nwarrow;': '\u2196',
1437 'nwnear;': '\u2927',
1451 'Odblac;': '\u0150',
1452 'odblac;': '\u0151',
1455 'odsold;': '\u29bc',
1459 'Ofr;': '\U0001d512',
1460 'ofr;': '\U0001d52c',
1472 'olcross;': '\u29bb',
1479 'Omicron;': '\u039f',
1480 'omicron;': '\u03bf',
1482 'ominus;': '\u2296',
1483 'Oopf;': '\U0001d546',
1484 'oopf;': '\U0001d560',
1486 'OpenCurlyDoubleQuote;': '\u201c',
1487 'OpenCurlyQuote;': '\u2018',
1495 'orderof;': '\u2134',
1500 'origof;': '\u22b6',
1502 'orslope;': '\u2a57',
1505 'Oscr;': '\U0001d4aa',
1516 'Otimes;': '\u2a37',
1517 'otimes;': '\u2297',
1518 'otimesas;': '\u2a36',
1524 'OverBar;': '\u203e',
1525 'OverBrace;': '\u23de',
1526 'OverBracket;': '\u23b4',
1527 'OverParenthesis;': '\u23dc',
1531 'parallel;': '\u2225',
1532 'parsim;': '\u2af3',
1535 'PartialD;': '\u2202',
1540 'permil;': '\u2030',
1542 'pertenk;': '\u2031',
1543 'Pfr;': '\U0001d513',
1544 'pfr;': '\U0001d52d',
1548 'phmmat;': '\u2133',
1552 'pitchfork;': '\u22d4',
1554 'planck;': '\u210f',
1555 'planckh;': '\u210e',
1556 'plankv;': '\u210f',
1558 'plusacir;': '\u2a23',
1560 'pluscir;': '\u2a22',
1561 'plusdo;': '\u2214',
1562 'plusdu;': '\u2a25',
1564 'PlusMinus;': '\xb1',
1567 'plussim;': '\u2a26',
1568 'plustwo;': '\u2a27',
1570 'Poincareplane;': '\u210c',
1571 'pointint;': '\u2a15',
1573 'popf;': '\U0001d561',
1583 'precapprox;': '\u2ab7',
1584 'preccurlyeq;': '\u227c',
1585 'Precedes;': '\u227a',
1586 'PrecedesEqual;': '\u2aaf',
1587 'PrecedesSlantEqual;': '\u227c',
1588 'PrecedesTilde;': '\u227e',
1589 'preceq;': '\u2aaf',
1590 'precnapprox;': '\u2ab9',
1591 'precneqq;': '\u2ab5',
1592 'precnsim;': '\u22e8',
1593 'precsim;': '\u227e',
1596 'primes;': '\u2119',
1599 'prnsim;': '\u22e8',
1601 'Product;': '\u220f',
1602 'profalar;': '\u232e',
1603 'profline;': '\u2312',
1604 'profsurf;': '\u2313',
1606 'Proportion;': '\u2237',
1607 'Proportional;': '\u221d',
1608 'propto;': '\u221d',
1610 'prurel;': '\u22b0',
1611 'Pscr;': '\U0001d4ab',
1612 'pscr;': '\U0001d4c5',
1615 'puncsp;': '\u2008',
1616 'Qfr;': '\U0001d514',
1617 'qfr;': '\U0001d52e',
1620 'qopf;': '\U0001d562',
1621 'qprime;': '\u2057',
1622 'Qscr;': '\U0001d4ac',
1623 'qscr;': '\U0001d4c6',
1624 'quaternions;': '\u210d',
1625 'quatint;': '\u2a16',
1627 'questeq;': '\u225f',
1633 'race;': '\u223d\u0331',
1634 'Racute;': '\u0154',
1635 'racute;': '\u0155',
1637 'raemptyv;': '\u29b3',
1642 'rangle;': '\u27e9',
1648 'rarrap;': '\u2975',
1650 'rarrbfs;': '\u2920',
1652 'rarrfs;': '\u291e',
1653 'rarrhk;': '\u21aa',
1654 'rarrlp;': '\u21ac',
1655 'rarrpl;': '\u2945',
1656 'rarrsim;': '\u2974',
1657 'Rarrtl;': '\u2916',
1658 'rarrtl;': '\u21a3',
1660 'rAtail;': '\u291c',
1661 'ratail;': '\u291a',
1663 'rationals;': '\u211a',
1671 'rbrksld;': '\u298e',
1672 'rbrkslu;': '\u2990',
1673 'Rcaron;': '\u0158',
1674 'rcaron;': '\u0159',
1675 'Rcedil;': '\u0156',
1676 'rcedil;': '\u0157',
1682 'rdldhar;': '\u2969',
1684 'rdquor;': '\u201d',
1688 'realine;': '\u211b',
1689 'realpart;': '\u211c',
1696 'ReverseElement;': '\u220b',
1697 'ReverseEquilibrium;': '\u21cb',
1698 'ReverseUpEquilibrium;': '\u296f',
1699 'rfisht;': '\u297d',
1700 'rfloor;': '\u230b',
1702 'rfr;': '\U0001d52f',
1706 'rharul;': '\u296c',
1710 'RightAngleBracket;': '\u27e9',
1711 'RightArrow;': '\u2192',
1712 'Rightarrow;': '\u21d2',
1713 'rightarrow;': '\u2192',
1714 'RightArrowBar;': '\u21e5',
1715 'RightArrowLeftArrow;': '\u21c4',
1716 'rightarrowtail;': '\u21a3',
1717 'RightCeiling;': '\u2309',
1718 'RightDoubleBracket;': '\u27e7',
1719 'RightDownTeeVector;': '\u295d',
1720 'RightDownVector;': '\u21c2',
1721 'RightDownVectorBar;': '\u2955',
1722 'RightFloor;': '\u230b',
1723 'rightharpoondown;': '\u21c1',
1724 'rightharpoonup;': '\u21c0',
1725 'rightleftarrows;': '\u21c4',
1726 'rightleftharpoons;': '\u21cc',
1727 'rightrightarrows;': '\u21c9',
1728 'rightsquigarrow;': '\u219d',
1729 'RightTee;': '\u22a2',
1730 'RightTeeArrow;': '\u21a6',
1731 'RightTeeVector;': '\u295b',
1732 'rightthreetimes;': '\u22cc',
1733 'RightTriangle;': '\u22b3',
1734 'RightTriangleBar;': '\u29d0',
1735 'RightTriangleEqual;': '\u22b5',
1736 'RightUpDownVector;': '\u294f',
1737 'RightUpTeeVector;': '\u295c',
1738 'RightUpVector;': '\u21be',
1739 'RightUpVectorBar;': '\u2954',
1740 'RightVector;': '\u21c0',
1741 'RightVectorBar;': '\u2953',
1743 'risingdotseq;': '\u2253',
1747 'rmoust;': '\u23b1',
1748 'rmoustache;': '\u23b1',
1755 'ropf;': '\U0001d563',
1756 'roplus;': '\u2a2e',
1757 'rotimes;': '\u2a35',
1758 'RoundImplies;': '\u2970',
1760 'rpargt;': '\u2994',
1761 'rppolint;': '\u2a12',
1763 'Rrightarrow;': '\u21db',
1764 'rsaquo;': '\u203a',
1766 'rscr;': '\U0001d4c7',
1771 'rsquor;': '\u2019',
1772 'rthree;': '\u22cc',
1773 'rtimes;': '\u22ca',
1777 'rtriltri;': '\u29ce',
1778 'RuleDelayed;': '\u29f4',
1779 'ruluhar;': '\u2968',
1781 'Sacute;': '\u015a',
1782 'sacute;': '\u015b',
1787 'Scaron;': '\u0160',
1788 'scaron;': '\u0161',
1792 'Scedil;': '\u015e',
1793 'scedil;': '\u015f',
1798 'scnsim;': '\u22e9',
1799 'scpolint;': '\u2a13',
1806 'searhk;': '\u2925',
1809 'searrow;': '\u2198',
1813 'seswar;': '\u2929',
1814 'setminus;': '\u2216',
1817 'Sfr;': '\U0001d516',
1818 'sfr;': '\U0001d530',
1819 'sfrown;': '\u2322',
1821 'SHCHcy;': '\u0429',
1822 'shchcy;': '\u0449',
1825 'ShortDownArrow;': '\u2193',
1826 'ShortLeftArrow;': '\u2190',
1827 'shortmid;': '\u2223',
1828 'shortparallel;': '\u2225',
1829 'ShortRightArrow;': '\u2192',
1830 'ShortUpArrow;': '\u2191',
1835 'sigmaf;': '\u03c2',
1836 'sigmav;': '\u03c2',
1838 'simdot;': '\u2a6a',
1846 'simplus;': '\u2a24',
1847 'simrarr;': '\u2972',
1849 'SmallCircle;': '\u2218',
1850 'smallsetminus;': '\u2216',
1851 'smashp;': '\u2a33',
1852 'smeparsl;': '\u29e4',
1857 'smtes;': '\u2aac\ufe00',
1858 'SOFTcy;': '\u042c',
1859 'softcy;': '\u044c',
1862 'solbar;': '\u233f',
1863 'Sopf;': '\U0001d54a',
1864 'sopf;': '\U0001d564',
1865 'spades;': '\u2660',
1866 'spadesuit;': '\u2660',
1869 'sqcaps;': '\u2293\ufe00',
1871 'sqcups;': '\u2294\ufe00',
1874 'sqsube;': '\u2291',
1875 'sqsubset;': '\u228f',
1876 'sqsubseteq;': '\u2291',
1878 'sqsupe;': '\u2292',
1879 'sqsupset;': '\u2290',
1880 'sqsupseteq;': '\u2292',
1882 'Square;': '\u25a1',
1883 'square;': '\u25a1',
1884 'SquareIntersection;': '\u2293',
1885 'SquareSubset;': '\u228f',
1886 'SquareSubsetEqual;': '\u2291',
1887 'SquareSuperset;': '\u2290',
1888 'SquareSupersetEqual;': '\u2292',
1889 'SquareUnion;': '\u2294',
1890 'squarf;': '\u25aa',
1893 'Sscr;': '\U0001d4ae',
1894 'sscr;': '\U0001d4c8',
1895 'ssetmn;': '\u2216',
1896 'ssmile;': '\u2323',
1897 'sstarf;': '\u22c6',
1901 'straightepsilon;': '\u03f5',
1902 'straightphi;': '\u03d5',
1906 'subdot;': '\u2abd',
1909 'subedot;': '\u2ac3',
1910 'submult;': '\u2ac1',
1913 'subplus;': '\u2abf',
1914 'subrarr;': '\u2979',
1915 'Subset;': '\u22d0',
1916 'subset;': '\u2282',
1917 'subseteq;': '\u2286',
1918 'subseteqq;': '\u2ac5',
1919 'SubsetEqual;': '\u2286',
1920 'subsetneq;': '\u228a',
1921 'subsetneqq;': '\u2acb',
1922 'subsim;': '\u2ac7',
1923 'subsub;': '\u2ad5',
1924 'subsup;': '\u2ad3',
1926 'succapprox;': '\u2ab8',
1927 'succcurlyeq;': '\u227d',
1928 'Succeeds;': '\u227b',
1929 'SucceedsEqual;': '\u2ab0',
1930 'SucceedsSlantEqual;': '\u227d',
1931 'SucceedsTilde;': '\u227f',
1932 'succeq;': '\u2ab0',
1933 'succnapprox;': '\u2aba',
1934 'succneqq;': '\u2ab6',
1935 'succnsim;': '\u22e9',
1936 'succsim;': '\u227f',
1937 'SuchThat;': '\u220b',
1949 'supdot;': '\u2abe',
1950 'supdsub;': '\u2ad8',
1953 'supedot;': '\u2ac4',
1954 'Superset;': '\u2283',
1955 'SupersetEqual;': '\u2287',
1956 'suphsol;': '\u27c9',
1957 'suphsub;': '\u2ad7',
1958 'suplarr;': '\u297b',
1959 'supmult;': '\u2ac2',
1962 'supplus;': '\u2ac0',
1963 'Supset;': '\u22d1',
1964 'supset;': '\u2283',
1965 'supseteq;': '\u2287',
1966 'supseteqq;': '\u2ac6',
1967 'supsetneq;': '\u228b',
1968 'supsetneqq;': '\u2acc',
1969 'supsim;': '\u2ac8',
1970 'supsub;': '\u2ad4',
1971 'supsup;': '\u2ad6',
1972 'swarhk;': '\u2926',
1975 'swarrow;': '\u2199',
1976 'swnwar;': '\u292a',
1980 'target;': '\u2316',
1984 'Tcaron;': '\u0164',
1985 'tcaron;': '\u0165',
1986 'Tcedil;': '\u0162',
1987 'tcedil;': '\u0163',
1991 'telrec;': '\u2315',
1992 'Tfr;': '\U0001d517',
1993 'tfr;': '\U0001d531',
1994 'there4;': '\u2234',
1995 'Therefore;': '\u2234',
1996 'therefore;': '\u2234',
1999 'thetasym;': '\u03d1',
2000 'thetav;': '\u03d1',
2001 'thickapprox;': '\u2248',
2002 'thicksim;': '\u223c',
2003 'ThickSpace;': '\u205f\u200a',
2004 'thinsp;': '\u2009',
2005 'ThinSpace;': '\u2009',
2007 'thksim;': '\u223c',
2014 'TildeEqual;': '\u2243',
2015 'TildeFullEqual;': '\u2245',
2016 'TildeTilde;': '\u2248',
2019 'timesb;': '\u22a0',
2020 'timesbar;': '\u2a31',
2021 'timesd;': '\u2a30',
2025 'topbot;': '\u2336',
2026 'topcir;': '\u2af1',
2027 'Topf;': '\U0001d54b',
2028 'topf;': '\U0001d565',
2029 'topfork;': '\u2ada',
2031 'tprime;': '\u2034',
2034 'triangle;': '\u25b5',
2035 'triangledown;': '\u25bf',
2036 'triangleleft;': '\u25c3',
2037 'trianglelefteq;': '\u22b4',
2038 'triangleq;': '\u225c',
2039 'triangleright;': '\u25b9',
2040 'trianglerighteq;': '\u22b5',
2041 'tridot;': '\u25ec',
2043 'triminus;': '\u2a3a',
2044 'TripleDot;': '\u20db',
2045 'triplus;': '\u2a39',
2047 'tritime;': '\u2a3b',
2048 'trpezium;': '\u23e2',
2049 'Tscr;': '\U0001d4af',
2050 'tscr;': '\U0001d4c9',
2055 'Tstrok;': '\u0166',
2056 'tstrok;': '\u0167',
2058 'twoheadleftarrow;': '\u219e',
2059 'twoheadrightarrow;': '\u21a0',
2067 'Uarrocir;': '\u2949',
2070 'Ubreve;': '\u016c',
2071 'ubreve;': '\u016d',
2079 'Udblac;': '\u0170',
2080 'udblac;': '\u0171',
2082 'ufisht;': '\u297e',
2083 'Ufr;': '\U0001d518',
2084 'ufr;': '\U0001d532',
2093 'ulcorn;': '\u231c',
2094 'ulcorner;': '\u231c',
2095 'ulcrop;': '\u230f',
2102 'UnderBrace;': '\u23df',
2103 'UnderBracket;': '\u23b5',
2104 'UnderParenthesis;': '\u23dd',
2106 'UnionPlus;': '\u228e',
2109 'Uopf;': '\U0001d54c',
2110 'uopf;': '\U0001d566',
2111 'UpArrow;': '\u2191',
2112 'Uparrow;': '\u21d1',
2113 'uparrow;': '\u2191',
2114 'UpArrowBar;': '\u2912',
2115 'UpArrowDownArrow;': '\u21c5',
2116 'UpDownArrow;': '\u2195',
2117 'Updownarrow;': '\u21d5',
2118 'updownarrow;': '\u2195',
2119 'UpEquilibrium;': '\u296e',
2120 'upharpoonleft;': '\u21bf',
2121 'upharpoonright;': '\u21be',
2123 'UpperLeftArrow;': '\u2196',
2124 'UpperRightArrow;': '\u2197',
2128 'Upsilon;': '\u03a5',
2129 'upsilon;': '\u03c5',
2131 'UpTeeArrow;': '\u21a5',
2132 'upuparrows;': '\u21c8',
2133 'urcorn;': '\u231d',
2134 'urcorner;': '\u231d',
2135 'urcrop;': '\u230e',
2139 'Uscr;': '\U0001d4b0',
2140 'uscr;': '\U0001d4ca',
2142 'Utilde;': '\u0168',
2143 'utilde;': '\u0169',
2151 'uwangle;': '\u29a7',
2152 'vangrt;': '\u299c',
2153 'varepsilon;': '\u03f5',
2154 'varkappa;': '\u03f0',
2155 'varnothing;': '\u2205',
2156 'varphi;': '\u03d5',
2158 'varpropto;': '\u221d',
2161 'varrho;': '\u03f1',
2162 'varsigma;': '\u03c2',
2163 'varsubsetneq;': '\u228a\ufe00',
2164 'varsubsetneqq;': '\u2acb\ufe00',
2165 'varsupsetneq;': '\u228b\ufe00',
2166 'varsupsetneqq;': '\u2acc\ufe00',
2167 'vartheta;': '\u03d1',
2168 'vartriangleleft;': '\u22b2',
2169 'vartriangleright;': '\u22b3',
2179 'Vdashl;': '\u2ae6',
2182 'veebar;': '\u22bb',
2184 'vellip;': '\u22ee',
2185 'Verbar;': '\u2016',
2189 'VerticalBar;': '\u2223',
2190 'VerticalLine;': '|',
2191 'VerticalSeparator;': '\u2758',
2192 'VerticalTilde;': '\u2240',
2193 'VeryThinSpace;': '\u200a',
2194 'Vfr;': '\U0001d519',
2195 'vfr;': '\U0001d533',
2197 'vnsub;': '\u2282\u20d2',
2198 'vnsup;': '\u2283\u20d2',
2199 'Vopf;': '\U0001d54d',
2200 'vopf;': '\U0001d567',
2203 'Vscr;': '\U0001d4b1',
2204 'vscr;': '\U0001d4cb',
2205 'vsubnE;': '\u2acb\ufe00',
2206 'vsubne;': '\u228a\ufe00',
2207 'vsupnE;': '\u2acc\ufe00',
2208 'vsupne;': '\u228b\ufe00',
2209 'Vvdash;': '\u22aa',
2210 'vzigzag;': '\u299a',
2213 'wedbar;': '\u2a5f',
2216 'wedgeq;': '\u2259',
2217 'weierp;': '\u2118',
2218 'Wfr;': '\U0001d51a',
2219 'wfr;': '\U0001d534',
2220 'Wopf;': '\U0001d54e',
2221 'wopf;': '\U0001d568',
2224 'wreath;': '\u2240',
2225 'Wscr;': '\U0001d4b2',
2226 'wscr;': '\U0001d4cc',
2231 'Xfr;': '\U0001d51b',
2232 'xfr;': '\U0001d535',
2242 'Xopf;': '\U0001d54f',
2243 'xopf;': '\U0001d569',
2244 'xoplus;': '\u2a01',
2245 'xotime;': '\u2a02',
2248 'Xscr;': '\U0001d4b3',
2249 'xscr;': '\U0001d4cd',
2250 'xsqcup;': '\u2a06',
2251 'xuplus;': '\u2a04',
2254 'xwedge;': '\u22c0',
2267 'Yfr;': '\U0001d51c',
2268 'yfr;': '\U0001d536',
2271 'Yopf;': '\U0001d550',
2272 'yopf;': '\U0001d56a',
2273 'Yscr;': '\U0001d4b4',
2274 'yscr;': '\U0001d4ce',
2280 'Zacute;': '\u0179',
2281 'zacute;': '\u017a',
2282 'Zcaron;': '\u017d',
2283 'zcaron;': '\u017e',
2288 'zeetrf;': '\u2128',
2289 'ZeroWidthSpace;': '\u200b',
2293 'zfr;': '\U0001d537',
2296 'zigrarr;': '\u21dd',
2298 'zopf;': '\U0001d56b',
2299 'Zscr;': '\U0001d4b5',
2300 'zscr;': '\U0001d4cf',
2306 import http
.client
as compat_http_client
2307 except ImportError: # Python 2
2308 import httplib
as compat_http_client
2311 from urllib
.error
import HTTPError
as compat_HTTPError
2312 except ImportError: # Python 2
2313 from urllib2
import HTTPError
as compat_HTTPError
2316 from urllib
.request
import urlretrieve
as compat_urlretrieve
2317 except ImportError: # Python 2
2318 from urllib
import urlretrieve
as compat_urlretrieve
2321 from html
.parser
import HTMLParser
as compat_HTMLParser
2322 except ImportError: # Python 2
2323 from HTMLParser
import HTMLParser
as compat_HTMLParser
2326 from subprocess
import DEVNULL
2327 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
2329 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
2332 import http
.server
as compat_http_server
2334 import BaseHTTPServer
as compat_http_server
2337 compat_str
= unicode # Python 2
2342 from urllib
.parse
import unquote_to_bytes
as compat_urllib_parse_unquote_to_bytes
2343 from urllib
.parse
import unquote
as compat_urllib_parse_unquote
2344 from urllib
.parse
import unquote_plus
as compat_urllib_parse_unquote_plus
2345 except ImportError: # Python 2
2346 _asciire
= (compat_urllib_parse
._asciire
if hasattr(compat_urllib_parse
, '_asciire')
2347 else re
.compile(r
'([\x00-\x7f]+)'))
2349 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2350 # implementations from cpython 3.4.3's stdlib. Python 2's version
2351 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
2353 def compat_urllib_parse_unquote_to_bytes(string
):
2354 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2355 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2356 # unescaped non-ASCII characters, which URIs should not.
2358 # Is it a string-like object?
2361 if isinstance(string
, compat_str
):
2362 string
= string
.encode('utf-8')
2363 bits
= string
.split(b
'%')
2368 for item
in bits
[1:]:
2370 append(compat_urllib_parse
._hextochr
[item
[:2]])
2375 return b
''.join(res
)
2377 def compat_urllib_parse_unquote(string
, encoding
='utf-8', errors
='replace'):
2378 """Replace %xx escapes by their single-character equivalent. The optional
2379 encoding and errors parameters specify how to decode percent-encoded
2380 sequences into Unicode characters, as accepted by the bytes.decode()
2382 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2383 sequences are replaced by a placeholder character.
2385 unquote('abc%20def') -> 'abc def'.
2387 if '%' not in string
:
2390 if encoding
is None:
2394 bits
= _asciire
.split(string
)
2397 for i
in range(1, len(bits
), 2):
2398 append(compat_urllib_parse_unquote_to_bytes(bits
[i
]).decode(encoding
, errors
))
2402 def compat_urllib_parse_unquote_plus(string
, encoding
='utf-8', errors
='replace'):
2403 """Like unquote(), but also replace plus signs by spaces, as required for
2404 unquoting HTML form values.
2406 unquote_plus('%7e/abc+def') -> '~/abc def'
2408 string
= string
.replace('+', ' ')
2409 return compat_urllib_parse_unquote(string
, encoding
, errors
)
2412 from urllib
.parse
import urlencode
as compat_urllib_parse_urlencode
2413 except ImportError: # Python 2
2414 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2415 # Possible solutions are to either port it from python 3 with all
2416 # the friends or manually ensure input query contains only byte strings.
2417 # We will stick with latter thus recursively encoding the whole query.
2418 def compat_urllib_parse_urlencode(query
, doseq
=0, encoding
='utf-8'):
2420 if isinstance(e
, dict):
2422 elif isinstance(e
, (list, tuple,)):
2423 list_e
= encode_list(e
)
2424 e
= tuple(list_e
) if isinstance(e
, tuple) else list_e
2425 elif isinstance(e
, compat_str
):
2426 e
= e
.encode(encoding
)
2430 return dict((encode_elem(k
), encode_elem(v
)) for k
, v
in d
.items())
2433 return [encode_elem(e
) for e
in l
]
2435 return compat_urllib_parse
.urlencode(encode_elem(query
), doseq
=doseq
)
2438 from urllib
.request
import DataHandler
as compat_urllib_request_DataHandler
2439 except ImportError: # Python < 3.4
2440 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2441 class compat_urllib_request_DataHandler(compat_urllib_request
.BaseHandler
):
2442 def data_open(self
, req
):
2443 # data URLs as specified in RFC 2397.
2445 # ignores POSTed data
2448 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2449 # mediatype := [ type "/" subtype ] *( ";" parameter )
2451 # parameter := attribute "=" value
2452 url
= req
.get_full_url()
2454 scheme
, data
= url
.split(':', 1)
2455 mediatype
, data
= data
.split(',', 1)
2457 # even base64 encoded data URLs might be quoted so unquote in any case:
2458 data
= compat_urllib_parse_unquote_to_bytes(data
)
2459 if mediatype
.endswith(';base64'):
2460 data
= binascii
.a2b_base64(data
)
2461 mediatype
= mediatype
[:-7]
2464 mediatype
= 'text/plain;charset=US-ASCII'
2466 headers
= email
.message_from_string(
2467 'Content-type: %s\nContent-length: %d\n' % (mediatype
, len(data
)))
2469 return compat_urllib_response
.addinfourl(io
.BytesIO(data
), headers
, url
)
2472 compat_basestring
= basestring
# Python 2
2474 compat_basestring
= str
2477 compat_chr
= unichr # Python 2
2482 from xml
.etree
.ElementTree
import ParseError
as compat_xml_parse_error
2483 except ImportError: # Python 2.6
2484 from xml
.parsers
.expat
import ExpatError
as compat_xml_parse_error
2487 etree
= xml
.etree
.ElementTree
2490 class _TreeBuilder(etree
.TreeBuilder
):
2491 def doctype(self
, name
, pubid
, system
):
2495 if sys
.version_info
[0] >= 3:
2496 def compat_etree_fromstring(text
):
2497 return etree
.XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder()))
2499 # python 2.x tries to encode unicode strings with ascii (see the
2500 # XMLParser._fixtext method)
2502 _etree_iter
= etree
.Element
.iter
2503 except AttributeError: # Python <=2.6
2504 def _etree_iter(root
):
2505 for el
in root
.findall('*'):
2507 for sub
in _etree_iter(el
):
2510 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2512 def _XML(text
, parser
=None):
2514 parser
= etree
.XMLParser(target
=_TreeBuilder())
2516 return parser
.close()
2518 def _element_factory(*args
, **kwargs
):
2519 el
= etree
.Element(*args
, **kwargs
)
2520 for k
, v
in el
.items():
2521 if isinstance(v
, bytes):
2522 el
.set(k
, v
.decode('utf-8'))
2525 def compat_etree_fromstring(text
):
2526 doc
= _XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder(element_factory
=_element_factory
)))
2527 for el
in _etree_iter(doc
):
2528 if el
.text
is not None and isinstance(el
.text
, bytes):
2529 el
.text
= el
.text
.decode('utf-8')
2532 if hasattr(etree
, 'register_namespace'):
2533 compat_etree_register_namespace
= etree
.register_namespace
2535 def compat_etree_register_namespace(prefix
, uri
):
2536 """Register a namespace prefix.
2537 The registry is global, and any existing mapping for either the
2538 given prefix or the namespace URI will be removed.
2539 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2540 attributes in this namespace will be serialized with prefix if possible.
2541 ValueError is raised if prefix is reserved or is invalid.
2543 if re
.match(r
"ns\d+$", prefix
):
2544 raise ValueError("Prefix format reserved for internal use")
2545 for k
, v
in list(etree
._namespace
_map
.items()):
2546 if k
== uri
or v
== prefix
:
2547 del etree
._namespace
_map
[k
]
2548 etree
._namespace
_map
[uri
] = prefix
2550 if sys
.version_info
< (2, 7):
2551 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2552 # .//node does not match if a node is a direct child of . !
2553 def compat_xpath(xpath
):
2554 if isinstance(xpath
, compat_str
):
2555 xpath
= xpath
.encode('ascii')
2558 compat_xpath
= lambda xpath
: xpath
2561 from urllib
.parse
import parse_qs
as compat_parse_qs
2562 except ImportError: # Python 2
2563 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2564 # Python 2's version is apparently totally broken
2566 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
2567 encoding
='utf-8', errors
='replace'):
2568 qs
, _coerce_result
= qs
, compat_str
2569 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
2571 for name_value
in pairs
:
2572 if not name_value
and not strict_parsing
:
2574 nv
= name_value
.split('=', 1)
2577 raise ValueError('bad query field: %r' % (name_value
,))
2578 # Handle case of a control-name with no equal sign
2579 if keep_blank_values
:
2583 if len(nv
[1]) or keep_blank_values
:
2584 name
= nv
[0].replace('+', ' ')
2585 name
= compat_urllib_parse_unquote(
2586 name
, encoding
=encoding
, errors
=errors
)
2587 name
= _coerce_result(name
)
2588 value
= nv
[1].replace('+', ' ')
2589 value
= compat_urllib_parse_unquote(
2590 value
, encoding
=encoding
, errors
=errors
)
2591 value
= _coerce_result(value
)
2592 r
.append((name
, value
))
2595 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
2596 encoding
='utf-8', errors
='replace'):
2598 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
2599 encoding
=encoding
, errors
=errors
)
2600 for name
, value
in pairs
:
2601 if name
in parsed_result
:
2602 parsed_result
[name
].append(value
)
2604 parsed_result
[name
] = [value
]
2605 return parsed_result
2608 from shlex
import quote
as compat_shlex_quote
2609 except ImportError: # Python < 3.3
2610 def compat_shlex_quote(s
):
2611 if re
.match(r
'^[-_\w./]+$', s
):
2614 return "'" + s
.replace("'", "'\"'\"'") + "'"
2618 args
= shlex
.split('äøę')
2619 assert (isinstance(args
, list) and
2620 isinstance(args
[0], compat_str
) and
2621 args
[0] == 'äøę')
2622 compat_shlex_split
= shlex
.split
2623 except (AssertionError, UnicodeEncodeError):
2624 # Working around shlex issue with unicode strings on some python 2
2625 # versions (see http://bugs.python.org/issue1548891)
2626 def compat_shlex_split(s
, comments
=False, posix
=True):
2627 if isinstance(s
, compat_str
):
2628 s
= s
.encode('utf-8')
2629 return list(map(lambda s
: s
.decode('utf-8'), shlex
.split(s
, comments
, posix
)))
2639 compat_os_name
= os
._name
if os
.name
== 'java' else os
.name
2642 if sys
.version_info
>= (3, 0):
2643 compat_getenv
= os
.getenv
2644 compat_expanduser
= os
.path
.expanduser
2646 def compat_setenv(key
, value
, env
=os
.environ
):
2649 # Environment variables should be decoded with filesystem encoding.
2650 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2652 def compat_getenv(key
, default
=None):
2653 from .utils
import get_filesystem_encoding
2654 env
= os
.getenv(key
, default
)
2656 env
= env
.decode(get_filesystem_encoding())
2659 def compat_setenv(key
, value
, env
=os
.environ
):
2661 from .utils
import get_filesystem_encoding
2662 return v
.encode(get_filesystem_encoding()) if isinstance(v
, compat_str
) else v
2663 env
[encode(key
)] = encode(value
)
2665 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2666 # environment variables with filesystem encoding. We will work around this by
2667 # providing adjusted implementations.
2668 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2669 # for different platforms with correct environment variables decoding.
2671 if compat_os_name
== 'posix':
2672 def compat_expanduser(path
):
2673 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2675 if not path
.startswith('~'):
2677 i
= path
.find('/', 1)
2681 if 'HOME' not in os
.environ
:
2683 userhome
= pwd
.getpwuid(os
.getuid()).pw_dir
2685 userhome
= compat_getenv('HOME')
2689 pwent
= pwd
.getpwnam(path
[1:i
])
2692 userhome
= pwent
.pw_dir
2693 userhome
= userhome
.rstrip('/')
2694 return (userhome
+ path
[i
:]) or '/'
2695 elif compat_os_name
== 'nt' or compat_os_name
== 'ce':
2696 def compat_expanduser(path
):
2697 """Expand ~ and ~user constructs.
2699 If user or $HOME is unknown, do nothing."""
2703 while i
< n
and path
[i
] not in '/\\':
2706 if 'HOME' in os
.environ
:
2707 userhome
= compat_getenv('HOME')
2708 elif 'USERPROFILE' in os
.environ
:
2709 userhome
= compat_getenv('USERPROFILE')
2710 elif 'HOMEPATH' not in os
.environ
:
2714 drive
= compat_getenv('HOMEDRIVE')
2717 userhome
= os
.path
.join(drive
, compat_getenv('HOMEPATH'))
2720 userhome
= os
.path
.join(os
.path
.dirname(userhome
), path
[1:i
])
2722 return userhome
+ path
[i
:]
2724 compat_expanduser
= os
.path
.expanduser
2727 if sys
.version_info
< (3, 0):
2728 def compat_print(s
):
2729 from .utils
import preferredencoding
2730 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
2732 def compat_print(s
):
2733 assert isinstance(s
, compat_str
)
2737 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
2738 def compat_getpass(prompt
, *args
, **kwargs
):
2739 if isinstance(prompt
, compat_str
):
2740 from .utils
import preferredencoding
2741 prompt
= prompt
.encode(preferredencoding())
2742 return getpass
.getpass(prompt
, *args
, **kwargs
)
2744 compat_getpass
= getpass
.getpass
2747 compat_input
= raw_input
2748 except NameError: # Python 3
2749 compat_input
= input
2751 # Python < 2.6.5 require kwargs to be bytes
2755 _testfunc(**{'x': 0})
2757 def compat_kwargs(kwargs
):
2758 return dict((bytes(k
), v
) for k
, v
in kwargs
.items())
2760 compat_kwargs
= lambda kwargs
: kwargs
2764 compat_numeric_types
= (int, float, long, complex)
2765 except NameError: # Python 3
2766 compat_numeric_types
= (int, float, complex)
2769 if sys
.version_info
< (2, 7):
2770 def compat_socket_create_connection(address
, timeout
, source_address
=None):
2771 host
, port
= address
2773 for res
in socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
):
2774 af
, socktype
, proto
, canonname
, sa
= res
2777 sock
= socket
.socket(af
, socktype
, proto
)
2778 sock
.settimeout(timeout
)
2780 sock
.bind(source_address
)
2783 except socket
.error
as _
:
2785 if sock
is not None:
2790 raise socket
.error('getaddrinfo returns an empty list')
2792 compat_socket_create_connection
= socket
.create_connection
2795 # Fix https://github.com/rg3/youtube-dl/issues/4223
2796 # See http://bugs.python.org/issue9161 for what is broken
2797 def workaround_optparse_bug9161():
2798 op
= optparse
.OptionParser()
2799 og
= optparse
.OptionGroup(op
, 'foo')
2803 real_add_option
= optparse
.OptionGroup
.add_option
2805 def _compat_add_option(self
, *args
, **kwargs
):
2807 v
.encode('ascii', 'replace') if isinstance(v
, compat_str
)
2809 bargs
= [enc(a
) for a
in args
]
2811 (k
, enc(v
)) for k
, v
in kwargs
.items())
2812 return real_add_option(self
, *bargs
, **bkwargs
)
2813 optparse
.OptionGroup
.add_option
= _compat_add_option
2816 if hasattr(shutil
, 'get_terminal_size'): # Python >= 3.3
2817 compat_get_terminal_size
= shutil
.get_terminal_size
2819 _terminal_size
= collections
.namedtuple('terminal_size', ['columns', 'lines'])
2821 def compat_get_terminal_size(fallback
=(80, 24)):
2822 columns
= compat_getenv('COLUMNS')
2824 columns
= int(columns
)
2827 lines
= compat_getenv('LINES')
2833 if columns
is None or lines
is None or columns
<= 0 or lines
<= 0:
2835 sp
= subprocess
.Popen(
2837 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2838 out
, err
= sp
.communicate()
2839 _lines
, _columns
= map(int, out
.split())
2841 _columns
, _lines
= _terminal_size(*fallback
)
2843 if columns
is None or columns
<= 0:
2845 if lines
is None or lines
<= 0:
2847 return _terminal_size(columns
, lines
)
2850 itertools
.count(start
=0, step
=1)
2851 compat_itertools_count
= itertools
.count
2852 except TypeError: # Python 2.6
2853 def compat_itertools_count(start
=0, step
=1):
2859 if sys
.version_info
>= (3, 0):
2860 from tokenize
import tokenize
as compat_tokenize_tokenize
2862 from tokenize
import generate_tokens
as compat_tokenize_tokenize
2866 struct
.pack('!I', 0)
2868 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2869 # See https://bugs.python.org/issue19099
2870 def compat_struct_pack(spec
, *args
):
2871 if isinstance(spec
, compat_str
):
2872 spec
= spec
.encode('ascii')
2873 return struct
.pack(spec
, *args
)
2875 def compat_struct_unpack(spec
, *args
):
2876 if isinstance(spec
, compat_str
):
2877 spec
= spec
.encode('ascii')
2878 return struct
.unpack(spec
, *args
)
2880 compat_struct_pack
= struct
.pack
2881 compat_struct_unpack
= struct
.unpack
2885 'compat_HTMLParser',
2887 'compat_basestring',
2891 'compat_etree_fromstring',
2892 'compat_etree_register_namespace',
2893 'compat_expanduser',
2894 'compat_get_terminal_size',
2897 'compat_html_entities',
2898 'compat_html_entities_html5',
2899 'compat_http_client',
2900 'compat_http_server',
2902 'compat_itertools_count',
2904 'compat_numeric_types',
2910 'compat_shlex_quote',
2911 'compat_shlex_split',
2912 'compat_socket_create_connection',
2914 'compat_struct_pack',
2915 'compat_struct_unpack',
2916 'compat_subprocess_get_DEVNULL',
2917 'compat_tokenize_tokenize',
2918 'compat_urllib_error',
2919 'compat_urllib_parse',
2920 'compat_urllib_parse_unquote',
2921 'compat_urllib_parse_unquote_plus',
2922 'compat_urllib_parse_unquote_to_bytes',
2923 'compat_urllib_parse_urlencode',
2924 'compat_urllib_parse_urlparse',
2925 'compat_urllib_request',
2926 'compat_urllib_request_DataHandler',
2927 'compat_urllib_response',
2929 'compat_urlretrieve',
2930 'compat_xml_parse_error',
2932 'workaround_optparse_bug9161',