1 from __future__
import unicode_literals
18 import xml
.etree
.ElementTree
22 import urllib
.request
as compat_urllib_request
23 except ImportError: # Python 2
24 import urllib2
as compat_urllib_request
27 import urllib
.error
as compat_urllib_error
28 except ImportError: # Python 2
29 import urllib2
as compat_urllib_error
32 import urllib
.parse
as compat_urllib_parse
33 except ImportError: # Python 2
34 import urllib
as compat_urllib_parse
37 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
38 except ImportError: # Python 2
39 from urlparse
import urlparse
as compat_urllib_parse_urlparse
42 import urllib
.parse
as compat_urlparse
43 except ImportError: # Python 2
44 import urlparse
as compat_urlparse
47 import urllib
.response
as compat_urllib_response
48 except ImportError: # Python 2
49 import urllib
as compat_urllib_response
52 import http
.cookiejar
as compat_cookiejar
53 except ImportError: # Python 2
54 import cookielib
as compat_cookiejar
57 import http
.cookies
as compat_cookies
58 except ImportError: # Python 2
59 import Cookie
as compat_cookies
62 import html
.entities
as compat_html_entities
63 except ImportError: # Python 2
64 import htmlentitydefs
as compat_html_entities
67 compat_html_entities_html5
= compat_html_entities
.html5
68 except AttributeError:
69 # Copied from CPython 3.5.1 html/entities.py
70 compat_html_entities_html5
= {
79 'acE;': '\u223e\u0333',
114 'andslope;': '\u2a58',
120 'angmsdaa;': '\u29a8',
121 'angmsdab;': '\u29a9',
122 'angmsdac;': '\u29aa',
123 'angmsdad;': '\u29ab',
124 'angmsdae;': '\u29ac',
125 'angmsdaf;': '\u29ad',
126 'angmsdag;': '\u29ae',
127 'angmsdah;': '\u29af',
129 'angrtvb;': '\u22be',
130 'angrtvbd;': '\u299d',
133 'angzarr;': '\u237c',
136 'Aopf;': '\U0001d538',
137 'aopf;': '\U0001d552',
144 'ApplyFunction;': '\u2061',
146 'approxeq;': '\u224a',
151 'Ascr;': '\U0001d49c',
152 'ascr;': '\U0001d4b6',
156 'asympeq;': '\u224d',
165 'awconint;': '\u2233',
167 'backcong;': '\u224c',
168 'backepsilon;': '\u03f6',
169 'backprime;': '\u2035',
170 'backsim;': '\u223d',
171 'backsimeq;': '\u22cd',
172 'Backslash;': '\u2216',
177 'barwedge;': '\u2305',
179 'bbrktbrk;': '\u23b6',
185 'Because;': '\u2235',
186 'because;': '\u2235',
187 'bemptyv;': '\u29b0',
190 'Bernoullis;': '\u212c',
194 'between;': '\u226c',
195 'Bfr;': '\U0001d505',
196 'bfr;': '\U0001d51f',
198 'bigcirc;': '\u25ef',
200 'bigodot;': '\u2a00',
201 'bigoplus;': '\u2a01',
202 'bigotimes;': '\u2a02',
203 'bigsqcup;': '\u2a06',
204 'bigstar;': '\u2605',
205 'bigtriangledown;': '\u25bd',
206 'bigtriangleup;': '\u25b3',
207 'biguplus;': '\u2a04',
209 'bigwedge;': '\u22c0',
211 'blacklozenge;': '\u29eb',
212 'blacksquare;': '\u25aa',
213 'blacktriangle;': '\u25b4',
214 'blacktriangledown;': '\u25be',
215 'blacktriangleleft;': '\u25c2',
216 'blacktriangleright;': '\u25b8',
223 'bnequiv;': '\u2261\u20e5',
226 'Bopf;': '\U0001d539',
227 'bopf;': '\U0001d553',
250 'boxminus;': '\u229f',
251 'boxplus;': '\u229e',
252 'boxtimes;': '\u22a0',
281 'bscr;': '\U0001d4b7',
287 'bsolhsub;': '\u27c8',
300 'capbrcup;': '\u2a49',
304 'CapitalDifferentialD;': '\u2145',
305 'caps;': '\u2229\ufe00',
308 'Cayleys;': '\u212d',
318 'Cconint;': '\u2230',
320 'ccupssm;': '\u2a50',
326 'cemptyv;': '\u29b2',
329 'CenterDot;': '\xb7',
330 'centerdot;': '\xb7',
332 'cfr;': '\U0001d520',
336 'checkmark;': '\u2713',
342 'circlearrowleft;': '\u21ba',
343 'circlearrowright;': '\u21bb',
344 'circledast;': '\u229b',
345 'circledcirc;': '\u229a',
346 'circleddash;': '\u229d',
347 'CircleDot;': '\u2299',
349 'circledS;': '\u24c8',
350 'CircleMinus;': '\u2296',
351 'CirclePlus;': '\u2295',
352 'CircleTimes;': '\u2297',
355 'cirfnint;': '\u2a10',
357 'cirscir;': '\u29c2',
358 'ClockwiseContourIntegral;': '\u2232',
359 'CloseCurlyDoubleQuote;': '\u201d',
360 'CloseCurlyQuote;': '\u2019',
362 'clubsuit;': '\u2663',
367 'coloneq;': '\u2254',
372 'complement;': '\u2201',
373 'complexes;': '\u2102',
375 'congdot;': '\u2a6d',
376 'Congruent;': '\u2261',
379 'ContourIntegral;': '\u222e',
381 'copf;': '\U0001d554',
383 'Coproduct;': '\u2210',
389 'CounterClockwiseContourIntegral;': '\u2233',
393 'Cscr;': '\U0001d49e',
394 'cscr;': '\U0001d4b8',
400 'cudarrl;': '\u2938',
401 'cudarrr;': '\u2935',
405 'cularrp;': '\u293d',
408 'cupbrcap;': '\u2a48',
414 'cups;': '\u222a\ufe00',
416 'curarrm;': '\u293c',
417 'curlyeqprec;': '\u22de',
418 'curlyeqsucc;': '\u22df',
419 'curlyvee;': '\u22ce',
420 'curlywedge;': '\u22cf',
423 'curvearrowleft;': '\u21b6',
424 'curvearrowright;': '\u21b7',
427 'cwconint;': '\u2232',
439 'dbkarow;': '\u290f',
447 'ddagger;': '\u2021',
449 'DDotrahd;': '\u2911',
450 'ddotseq;': '\u2a77',
456 'demptyv;': '\u29b1',
458 'Dfr;': '\U0001d507',
459 'dfr;': '\U0001d521',
463 'DiacriticalAcute;': '\xb4',
464 'DiacriticalDot;': '\u02d9',
465 'DiacriticalDoubleAcute;': '\u02dd',
466 'DiacriticalGrave;': '`',
467 'DiacriticalTilde;': '\u02dc',
469 'Diamond;': '\u22c4',
470 'diamond;': '\u22c4',
471 'diamondsuit;': '\u2666',
474 'DifferentialD;': '\u2146',
475 'digamma;': '\u03dd',
480 'divideontimes;': '\u22c7',
487 'Dopf;': '\U0001d53b',
488 'dopf;': '\U0001d555',
493 'doteqdot;': '\u2251',
494 'DotEqual;': '\u2250',
495 'dotminus;': '\u2238',
496 'dotplus;': '\u2214',
497 'dotsquare;': '\u22a1',
498 'doublebarwedge;': '\u2306',
499 'DoubleContourIntegral;': '\u222f',
500 'DoubleDot;': '\xa8',
501 'DoubleDownArrow;': '\u21d3',
502 'DoubleLeftArrow;': '\u21d0',
503 'DoubleLeftRightArrow;': '\u21d4',
504 'DoubleLeftTee;': '\u2ae4',
505 'DoubleLongLeftArrow;': '\u27f8',
506 'DoubleLongLeftRightArrow;': '\u27fa',
507 'DoubleLongRightArrow;': '\u27f9',
508 'DoubleRightArrow;': '\u21d2',
509 'DoubleRightTee;': '\u22a8',
510 'DoubleUpArrow;': '\u21d1',
511 'DoubleUpDownArrow;': '\u21d5',
512 'DoubleVerticalBar;': '\u2225',
513 'DownArrow;': '\u2193',
514 'Downarrow;': '\u21d3',
515 'downarrow;': '\u2193',
516 'DownArrowBar;': '\u2913',
517 'DownArrowUpArrow;': '\u21f5',
518 'DownBreve;': '\u0311',
519 'downdownarrows;': '\u21ca',
520 'downharpoonleft;': '\u21c3',
521 'downharpoonright;': '\u21c2',
522 'DownLeftRightVector;': '\u2950',
523 'DownLeftTeeVector;': '\u295e',
524 'DownLeftVector;': '\u21bd',
525 'DownLeftVectorBar;': '\u2956',
526 'DownRightTeeVector;': '\u295f',
527 'DownRightVector;': '\u21c1',
528 'DownRightVectorBar;': '\u2957',
529 'DownTee;': '\u22a4',
530 'DownTeeArrow;': '\u21a7',
531 'drbkarow;': '\u2910',
534 'Dscr;': '\U0001d49f',
535 'dscr;': '\U0001d4b9',
546 'dwangle;': '\u29a6',
549 'dzigrarr;': '\u27ff',
571 'Efr;': '\U0001d508',
572 'efr;': '\U0001d522',
581 'Element;': '\u2208',
582 'elinters;': '\u23e7',
589 'emptyset;': '\u2205',
590 'EmptySmallSquare;': '\u25fb',
592 'EmptyVerySmallSquare;': '\u25ab',
601 'Eopf;': '\U0001d53c',
602 'eopf;': '\U0001d556',
607 'Epsilon;': '\u0395',
608 'epsilon;': '\u03b5',
611 'eqcolon;': '\u2255',
613 'eqslantgtr;': '\u2a96',
614 'eqslantless;': '\u2a95',
617 'EqualTilde;': '\u2242',
619 'Equilibrium;': '\u21cc',
621 'equivDD;': '\u2a78',
622 'eqvparsl;': '\u29e5',
644 'expectation;': '\u2130',
645 'ExponentialE;': '\u2147',
646 'exponentiale;': '\u2147',
647 'fallingdotseq;': '\u2252',
654 'Ffr;': '\U0001d509',
655 'ffr;': '\U0001d523',
657 'FilledSmallSquare;': '\u25fc',
658 'FilledVerySmallSquare;': '\u25aa',
664 'Fopf;': '\U0001d53d',
665 'fopf;': '\U0001d557',
670 'Fouriertrf;': '\u2131',
671 'fpartint;': '\u2a0d',
693 'fscr;': '\U0001d4bb',
715 'geqslant;': '\u2a7e',
719 'gesdoto;': '\u2a82',
720 'gesdotol;': '\u2a84',
721 'gesl;': '\u22db\ufe00',
723 'Gfr;': '\U0001d50a',
724 'gfr;': '\U0001d524',
736 'gnapprox;': '\u2a8a',
742 'Gopf;': '\U0001d53e',
743 'gopf;': '\U0001d558',
745 'GreaterEqual;': '\u2265',
746 'GreaterEqualLess;': '\u22db',
747 'GreaterFullEqual;': '\u2267',
748 'GreaterGreater;': '\u2aa2',
749 'GreaterLess;': '\u2277',
750 'GreaterSlantEqual;': '\u2a7e',
751 'GreaterTilde;': '\u2273',
752 'Gscr;': '\U0001d4a2',
766 'gtquest;': '\u2a7c',
767 'gtrapprox;': '\u2a86',
770 'gtreqless;': '\u22db',
771 'gtreqqless;': '\u2a8c',
772 'gtrless;': '\u2277',
774 'gvertneqq;': '\u2269\ufe00',
775 'gvnE;': '\u2269\ufe00',
784 'harrcir;': '\u2948',
791 'heartsuit;': '\u2665',
795 'hfr;': '\U0001d525',
796 'HilbertSpace;': '\u210b',
797 'hksearow;': '\u2925',
798 'hkswarow;': '\u2926',
801 'hookleftarrow;': '\u21a9',
802 'hookrightarrow;': '\u21aa',
804 'hopf;': '\U0001d559',
806 'HorizontalLine;': '\u2500',
808 'hscr;': '\U0001d4bd',
812 'HumpDownHump;': '\u224e',
813 'HumpEqual;': '\u224f',
834 'ifr;': '\U0001d526',
850 'ImaginaryI;': '\u2148',
851 'imagline;': '\u2110',
852 'imagpart;': '\u2111',
856 'Implies;': '\u21d2',
860 'infintie;': '\u29dd',
865 'integers;': '\u2124',
866 'Integral;': '\u222b',
867 'intercal;': '\u22ba',
868 'Intersection;': '\u22c2',
869 'intlarhk;': '\u2a17',
870 'intprod;': '\u2a3c',
871 'InvisibleComma;': '\u2063',
872 'InvisibleTimes;': '\u2062',
877 'Iopf;': '\U0001d540',
878 'iopf;': '\U0001d55a',
885 'iscr;': '\U0001d4be',
887 'isindot;': '\u22f5',
905 'Jfr;': '\U0001d50d',
906 'jfr;': '\U0001d527',
908 'Jopf;': '\U0001d541',
909 'jopf;': '\U0001d55b',
910 'Jscr;': '\U0001d4a5',
911 'jscr;': '\U0001d4bf',
923 'Kfr;': '\U0001d50e',
924 'kfr;': '\U0001d528',
930 'Kopf;': '\U0001d542',
931 'kopf;': '\U0001d55c',
932 'Kscr;': '\U0001d4a6',
933 'kscr;': '\U0001d4c0',
937 'laemptyv;': '\u29b4',
946 'Laplacetrf;': '\u2112',
953 'larrbfs;': '\u291f',
958 'larrsim;': '\u2973',
964 'lates;': '\u2aad\ufe00',
971 'lbrksld;': '\u298f',
972 'lbrkslu;': '\u298d',
984 'ldrdhar;': '\u2967',
985 'ldrushar;': '\u294b',
989 'LeftAngleBracket;': '\u27e8',
990 'LeftArrow;': '\u2190',
991 'Leftarrow;': '\u21d0',
992 'leftarrow;': '\u2190',
993 'LeftArrowBar;': '\u21e4',
994 'LeftArrowRightArrow;': '\u21c6',
995 'leftarrowtail;': '\u21a2',
996 'LeftCeiling;': '\u2308',
997 'LeftDoubleBracket;': '\u27e6',
998 'LeftDownTeeVector;': '\u2961',
999 'LeftDownVector;': '\u21c3',
1000 'LeftDownVectorBar;': '\u2959',
1001 'LeftFloor;': '\u230a',
1002 'leftharpoondown;': '\u21bd',
1003 'leftharpoonup;': '\u21bc',
1004 'leftleftarrows;': '\u21c7',
1005 'LeftRightArrow;': '\u2194',
1006 'Leftrightarrow;': '\u21d4',
1007 'leftrightarrow;': '\u2194',
1008 'leftrightarrows;': '\u21c6',
1009 'leftrightharpoons;': '\u21cb',
1010 'leftrightsquigarrow;': '\u21ad',
1011 'LeftRightVector;': '\u294e',
1012 'LeftTee;': '\u22a3',
1013 'LeftTeeArrow;': '\u21a4',
1014 'LeftTeeVector;': '\u295a',
1015 'leftthreetimes;': '\u22cb',
1016 'LeftTriangle;': '\u22b2',
1017 'LeftTriangleBar;': '\u29cf',
1018 'LeftTriangleEqual;': '\u22b4',
1019 'LeftUpDownVector;': '\u2951',
1020 'LeftUpTeeVector;': '\u2960',
1021 'LeftUpVector;': '\u21bf',
1022 'LeftUpVectorBar;': '\u2958',
1023 'LeftVector;': '\u21bc',
1024 'LeftVectorBar;': '\u2952',
1029 'leqslant;': '\u2a7d',
1032 'lesdot;': '\u2a7f',
1033 'lesdoto;': '\u2a81',
1034 'lesdotor;': '\u2a83',
1035 'lesg;': '\u22da\ufe00',
1036 'lesges;': '\u2a93',
1037 'lessapprox;': '\u2a85',
1038 'lessdot;': '\u22d6',
1039 'lesseqgtr;': '\u22da',
1040 'lesseqqgtr;': '\u2a8b',
1041 'LessEqualGreater;': '\u22da',
1042 'LessFullEqual;': '\u2266',
1043 'LessGreater;': '\u2276',
1044 'lessgtr;': '\u2276',
1045 'LessLess;': '\u2aa1',
1046 'lesssim;': '\u2272',
1047 'LessSlantEqual;': '\u2a7d',
1048 'LessTilde;': '\u2272',
1049 'lfisht;': '\u297c',
1050 'lfloor;': '\u230a',
1051 'Lfr;': '\U0001d50f',
1052 'lfr;': '\U0001d529',
1058 'lharul;': '\u296a',
1065 'llcorner;': '\u231e',
1066 'Lleftarrow;': '\u21da',
1067 'llhard;': '\u296b',
1069 'Lmidot;': '\u013f',
1070 'lmidot;': '\u0140',
1071 'lmoust;': '\u23b0',
1072 'lmoustache;': '\u23b0',
1074 'lnapprox;': '\u2a89',
1083 'LongLeftArrow;': '\u27f5',
1084 'Longleftarrow;': '\u27f8',
1085 'longleftarrow;': '\u27f5',
1086 'LongLeftRightArrow;': '\u27f7',
1087 'Longleftrightarrow;': '\u27fa',
1088 'longleftrightarrow;': '\u27f7',
1089 'longmapsto;': '\u27fc',
1090 'LongRightArrow;': '\u27f6',
1091 'Longrightarrow;': '\u27f9',
1092 'longrightarrow;': '\u27f6',
1093 'looparrowleft;': '\u21ab',
1094 'looparrowright;': '\u21ac',
1096 'Lopf;': '\U0001d543',
1097 'lopf;': '\U0001d55d',
1098 'loplus;': '\u2a2d',
1099 'lotimes;': '\u2a34',
1100 'lowast;': '\u2217',
1102 'LowerLeftArrow;': '\u2199',
1103 'LowerRightArrow;': '\u2198',
1105 'lozenge;': '\u25ca',
1108 'lparlt;': '\u2993',
1110 'lrcorner;': '\u231f',
1112 'lrhard;': '\u296d',
1115 'lsaquo;': '\u2039',
1117 'lscr;': '\U0001d4c1',
1125 'lsquor;': '\u201a',
1126 'Lstrok;': '\u0141',
1127 'lstrok;': '\u0142',
1136 'lthree;': '\u22cb',
1137 'ltimes;': '\u22c9',
1138 'ltlarr;': '\u2976',
1139 'ltquest;': '\u2a7b',
1143 'ltrPar;': '\u2996',
1144 'lurdshar;': '\u294a',
1145 'luruhar;': '\u2966',
1146 'lvertneqq;': '\u2268\ufe00',
1147 'lvnE;': '\u2268\ufe00',
1152 'maltese;': '\u2720',
1155 'mapsto;': '\u21a6',
1156 'mapstodown;': '\u21a7',
1157 'mapstoleft;': '\u21a4',
1158 'mapstoup;': '\u21a5',
1159 'marker;': '\u25ae',
1160 'mcomma;': '\u2a29',
1165 'measuredangle;': '\u2221',
1166 'MediumSpace;': '\u205f',
1167 'Mellintrf;': '\u2133',
1168 'Mfr;': '\U0001d510',
1169 'mfr;': '\U0001d52a',
1175 'midcir;': '\u2af0',
1179 'minusb;': '\u229f',
1180 'minusd;': '\u2238',
1181 'minusdu;': '\u2a2a',
1182 'MinusPlus;': '\u2213',
1185 'mnplus;': '\u2213',
1186 'models;': '\u22a7',
1187 'Mopf;': '\U0001d544',
1188 'mopf;': '\U0001d55e',
1191 'mscr;': '\U0001d4c2',
1192 'mstpos;': '\u223e',
1195 'multimap;': '\u22b8',
1198 'Nacute;': '\u0143',
1199 'nacute;': '\u0144',
1200 'nang;': '\u2220\u20d2',
1202 'napE;': '\u2a70\u0338',
1203 'napid;': '\u224b\u0338',
1205 'napprox;': '\u2249',
1207 'natural;': '\u266e',
1208 'naturals;': '\u2115',
1211 'nbump;': '\u224e\u0338',
1212 'nbumpe;': '\u224f\u0338',
1214 'Ncaron;': '\u0147',
1215 'ncaron;': '\u0148',
1216 'Ncedil;': '\u0145',
1217 'ncedil;': '\u0146',
1219 'ncongdot;': '\u2a6d\u0338',
1225 'nearhk;': '\u2924',
1228 'nearrow;': '\u2197',
1229 'nedot;': '\u2250\u0338',
1230 'NegativeMediumSpace;': '\u200b',
1231 'NegativeThickSpace;': '\u200b',
1232 'NegativeThinSpace;': '\u200b',
1233 'NegativeVeryThinSpace;': '\u200b',
1234 'nequiv;': '\u2262',
1235 'nesear;': '\u2928',
1236 'nesim;': '\u2242\u0338',
1237 'NestedGreaterGreater;': '\u226b',
1238 'NestedLessLess;': '\u226a',
1240 'nexist;': '\u2204',
1241 'nexists;': '\u2204',
1242 'Nfr;': '\U0001d511',
1243 'nfr;': '\U0001d52b',
1244 'ngE;': '\u2267\u0338',
1247 'ngeqq;': '\u2267\u0338',
1248 'ngeqslant;': '\u2a7e\u0338',
1249 'nges;': '\u2a7e\u0338',
1250 'nGg;': '\u22d9\u0338',
1252 'nGt;': '\u226b\u20d2',
1255 'nGtv;': '\u226b\u0338',
1268 'nlE;': '\u2266\u0338',
1270 'nLeftarrow;': '\u21cd',
1271 'nleftarrow;': '\u219a',
1272 'nLeftrightarrow;': '\u21ce',
1273 'nleftrightarrow;': '\u21ae',
1275 'nleqq;': '\u2266\u0338',
1276 'nleqslant;': '\u2a7d\u0338',
1277 'nles;': '\u2a7d\u0338',
1279 'nLl;': '\u22d8\u0338',
1281 'nLt;': '\u226a\u20d2',
1284 'nltrie;': '\u22ec',
1285 'nLtv;': '\u226a\u0338',
1287 'NoBreak;': '\u2060',
1288 'NonBreakingSpace;': '\xa0',
1290 'nopf;': '\U0001d55f',
1294 'NotCongruent;': '\u2262',
1295 'NotCupCap;': '\u226d',
1296 'NotDoubleVerticalBar;': '\u2226',
1297 'NotElement;': '\u2209',
1298 'NotEqual;': '\u2260',
1299 'NotEqualTilde;': '\u2242\u0338',
1300 'NotExists;': '\u2204',
1301 'NotGreater;': '\u226f',
1302 'NotGreaterEqual;': '\u2271',
1303 'NotGreaterFullEqual;': '\u2267\u0338',
1304 'NotGreaterGreater;': '\u226b\u0338',
1305 'NotGreaterLess;': '\u2279',
1306 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1307 'NotGreaterTilde;': '\u2275',
1308 'NotHumpDownHump;': '\u224e\u0338',
1309 'NotHumpEqual;': '\u224f\u0338',
1311 'notindot;': '\u22f5\u0338',
1312 'notinE;': '\u22f9\u0338',
1313 'notinva;': '\u2209',
1314 'notinvb;': '\u22f7',
1315 'notinvc;': '\u22f6',
1316 'NotLeftTriangle;': '\u22ea',
1317 'NotLeftTriangleBar;': '\u29cf\u0338',
1318 'NotLeftTriangleEqual;': '\u22ec',
1319 'NotLess;': '\u226e',
1320 'NotLessEqual;': '\u2270',
1321 'NotLessGreater;': '\u2278',
1322 'NotLessLess;': '\u226a\u0338',
1323 'NotLessSlantEqual;': '\u2a7d\u0338',
1324 'NotLessTilde;': '\u2274',
1325 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1326 'NotNestedLessLess;': '\u2aa1\u0338',
1328 'notniva;': '\u220c',
1329 'notnivb;': '\u22fe',
1330 'notnivc;': '\u22fd',
1331 'NotPrecedes;': '\u2280',
1332 'NotPrecedesEqual;': '\u2aaf\u0338',
1333 'NotPrecedesSlantEqual;': '\u22e0',
1334 'NotReverseElement;': '\u220c',
1335 'NotRightTriangle;': '\u22eb',
1336 'NotRightTriangleBar;': '\u29d0\u0338',
1337 'NotRightTriangleEqual;': '\u22ed',
1338 'NotSquareSubset;': '\u228f\u0338',
1339 'NotSquareSubsetEqual;': '\u22e2',
1340 'NotSquareSuperset;': '\u2290\u0338',
1341 'NotSquareSupersetEqual;': '\u22e3',
1342 'NotSubset;': '\u2282\u20d2',
1343 'NotSubsetEqual;': '\u2288',
1344 'NotSucceeds;': '\u2281',
1345 'NotSucceedsEqual;': '\u2ab0\u0338',
1346 'NotSucceedsSlantEqual;': '\u22e1',
1347 'NotSucceedsTilde;': '\u227f\u0338',
1348 'NotSuperset;': '\u2283\u20d2',
1349 'NotSupersetEqual;': '\u2289',
1350 'NotTilde;': '\u2241',
1351 'NotTildeEqual;': '\u2244',
1352 'NotTildeFullEqual;': '\u2247',
1353 'NotTildeTilde;': '\u2249',
1354 'NotVerticalBar;': '\u2224',
1356 'nparallel;': '\u2226',
1357 'nparsl;': '\u2afd\u20e5',
1358 'npart;': '\u2202\u0338',
1359 'npolint;': '\u2a14',
1361 'nprcue;': '\u22e0',
1362 'npre;': '\u2aaf\u0338',
1364 'npreceq;': '\u2aaf\u0338',
1367 'nrarrc;': '\u2933\u0338',
1368 'nrarrw;': '\u219d\u0338',
1369 'nRightarrow;': '\u21cf',
1370 'nrightarrow;': '\u219b',
1372 'nrtrie;': '\u22ed',
1374 'nsccue;': '\u22e1',
1375 'nsce;': '\u2ab0\u0338',
1376 'Nscr;': '\U0001d4a9',
1377 'nscr;': '\U0001d4c3',
1378 'nshortmid;': '\u2224',
1379 'nshortparallel;': '\u2226',
1382 'nsimeq;': '\u2244',
1385 'nsqsube;': '\u22e2',
1386 'nsqsupe;': '\u22e3',
1388 'nsubE;': '\u2ac5\u0338',
1390 'nsubset;': '\u2282\u20d2',
1391 'nsubseteq;': '\u2288',
1392 'nsubseteqq;': '\u2ac5\u0338',
1394 'nsucceq;': '\u2ab0\u0338',
1396 'nsupE;': '\u2ac6\u0338',
1398 'nsupset;': '\u2283\u20d2',
1399 'nsupseteq;': '\u2289',
1400 'nsupseteqq;': '\u2ac6\u0338',
1407 'ntriangleleft;': '\u22ea',
1408 'ntrianglelefteq;': '\u22ec',
1409 'ntriangleright;': '\u22eb',
1410 'ntrianglerighteq;': '\u22ed',
1414 'numero;': '\u2116',
1416 'nvap;': '\u224d\u20d2',
1417 'nVDash;': '\u22af',
1418 'nVdash;': '\u22ae',
1419 'nvDash;': '\u22ad',
1420 'nvdash;': '\u22ac',
1421 'nvge;': '\u2265\u20d2',
1423 'nvHarr;': '\u2904',
1424 'nvinfin;': '\u29de',
1425 'nvlArr;': '\u2902',
1426 'nvle;': '\u2264\u20d2',
1428 'nvltrie;': '\u22b4\u20d2',
1429 'nvrArr;': '\u2903',
1430 'nvrtrie;': '\u22b5\u20d2',
1431 'nvsim;': '\u223c\u20d2',
1432 'nwarhk;': '\u2923',
1435 'nwarrow;': '\u2196',
1436 'nwnear;': '\u2927',
1450 'Odblac;': '\u0150',
1451 'odblac;': '\u0151',
1454 'odsold;': '\u29bc',
1458 'Ofr;': '\U0001d512',
1459 'ofr;': '\U0001d52c',
1471 'olcross;': '\u29bb',
1478 'Omicron;': '\u039f',
1479 'omicron;': '\u03bf',
1481 'ominus;': '\u2296',
1482 'Oopf;': '\U0001d546',
1483 'oopf;': '\U0001d560',
1485 'OpenCurlyDoubleQuote;': '\u201c',
1486 'OpenCurlyQuote;': '\u2018',
1494 'orderof;': '\u2134',
1499 'origof;': '\u22b6',
1501 'orslope;': '\u2a57',
1504 'Oscr;': '\U0001d4aa',
1515 'Otimes;': '\u2a37',
1516 'otimes;': '\u2297',
1517 'otimesas;': '\u2a36',
1523 'OverBar;': '\u203e',
1524 'OverBrace;': '\u23de',
1525 'OverBracket;': '\u23b4',
1526 'OverParenthesis;': '\u23dc',
1530 'parallel;': '\u2225',
1531 'parsim;': '\u2af3',
1534 'PartialD;': '\u2202',
1539 'permil;': '\u2030',
1541 'pertenk;': '\u2031',
1542 'Pfr;': '\U0001d513',
1543 'pfr;': '\U0001d52d',
1547 'phmmat;': '\u2133',
1551 'pitchfork;': '\u22d4',
1553 'planck;': '\u210f',
1554 'planckh;': '\u210e',
1555 'plankv;': '\u210f',
1557 'plusacir;': '\u2a23',
1559 'pluscir;': '\u2a22',
1560 'plusdo;': '\u2214',
1561 'plusdu;': '\u2a25',
1563 'PlusMinus;': '\xb1',
1566 'plussim;': '\u2a26',
1567 'plustwo;': '\u2a27',
1569 'Poincareplane;': '\u210c',
1570 'pointint;': '\u2a15',
1572 'popf;': '\U0001d561',
1582 'precapprox;': '\u2ab7',
1583 'preccurlyeq;': '\u227c',
1584 'Precedes;': '\u227a',
1585 'PrecedesEqual;': '\u2aaf',
1586 'PrecedesSlantEqual;': '\u227c',
1587 'PrecedesTilde;': '\u227e',
1588 'preceq;': '\u2aaf',
1589 'precnapprox;': '\u2ab9',
1590 'precneqq;': '\u2ab5',
1591 'precnsim;': '\u22e8',
1592 'precsim;': '\u227e',
1595 'primes;': '\u2119',
1598 'prnsim;': '\u22e8',
1600 'Product;': '\u220f',
1601 'profalar;': '\u232e',
1602 'profline;': '\u2312',
1603 'profsurf;': '\u2313',
1605 'Proportion;': '\u2237',
1606 'Proportional;': '\u221d',
1607 'propto;': '\u221d',
1609 'prurel;': '\u22b0',
1610 'Pscr;': '\U0001d4ab',
1611 'pscr;': '\U0001d4c5',
1614 'puncsp;': '\u2008',
1615 'Qfr;': '\U0001d514',
1616 'qfr;': '\U0001d52e',
1619 'qopf;': '\U0001d562',
1620 'qprime;': '\u2057',
1621 'Qscr;': '\U0001d4ac',
1622 'qscr;': '\U0001d4c6',
1623 'quaternions;': '\u210d',
1624 'quatint;': '\u2a16',
1626 'questeq;': '\u225f',
1632 'race;': '\u223d\u0331',
1633 'Racute;': '\u0154',
1634 'racute;': '\u0155',
1636 'raemptyv;': '\u29b3',
1641 'rangle;': '\u27e9',
1647 'rarrap;': '\u2975',
1649 'rarrbfs;': '\u2920',
1651 'rarrfs;': '\u291e',
1652 'rarrhk;': '\u21aa',
1653 'rarrlp;': '\u21ac',
1654 'rarrpl;': '\u2945',
1655 'rarrsim;': '\u2974',
1656 'Rarrtl;': '\u2916',
1657 'rarrtl;': '\u21a3',
1659 'rAtail;': '\u291c',
1660 'ratail;': '\u291a',
1662 'rationals;': '\u211a',
1670 'rbrksld;': '\u298e',
1671 'rbrkslu;': '\u2990',
1672 'Rcaron;': '\u0158',
1673 'rcaron;': '\u0159',
1674 'Rcedil;': '\u0156',
1675 'rcedil;': '\u0157',
1681 'rdldhar;': '\u2969',
1683 'rdquor;': '\u201d',
1687 'realine;': '\u211b',
1688 'realpart;': '\u211c',
1695 'ReverseElement;': '\u220b',
1696 'ReverseEquilibrium;': '\u21cb',
1697 'ReverseUpEquilibrium;': '\u296f',
1698 'rfisht;': '\u297d',
1699 'rfloor;': '\u230b',
1701 'rfr;': '\U0001d52f',
1705 'rharul;': '\u296c',
1709 'RightAngleBracket;': '\u27e9',
1710 'RightArrow;': '\u2192',
1711 'Rightarrow;': '\u21d2',
1712 'rightarrow;': '\u2192',
1713 'RightArrowBar;': '\u21e5',
1714 'RightArrowLeftArrow;': '\u21c4',
1715 'rightarrowtail;': '\u21a3',
1716 'RightCeiling;': '\u2309',
1717 'RightDoubleBracket;': '\u27e7',
1718 'RightDownTeeVector;': '\u295d',
1719 'RightDownVector;': '\u21c2',
1720 'RightDownVectorBar;': '\u2955',
1721 'RightFloor;': '\u230b',
1722 'rightharpoondown;': '\u21c1',
1723 'rightharpoonup;': '\u21c0',
1724 'rightleftarrows;': '\u21c4',
1725 'rightleftharpoons;': '\u21cc',
1726 'rightrightarrows;': '\u21c9',
1727 'rightsquigarrow;': '\u219d',
1728 'RightTee;': '\u22a2',
1729 'RightTeeArrow;': '\u21a6',
1730 'RightTeeVector;': '\u295b',
1731 'rightthreetimes;': '\u22cc',
1732 'RightTriangle;': '\u22b3',
1733 'RightTriangleBar;': '\u29d0',
1734 'RightTriangleEqual;': '\u22b5',
1735 'RightUpDownVector;': '\u294f',
1736 'RightUpTeeVector;': '\u295c',
1737 'RightUpVector;': '\u21be',
1738 'RightUpVectorBar;': '\u2954',
1739 'RightVector;': '\u21c0',
1740 'RightVectorBar;': '\u2953',
1742 'risingdotseq;': '\u2253',
1746 'rmoust;': '\u23b1',
1747 'rmoustache;': '\u23b1',
1754 'ropf;': '\U0001d563',
1755 'roplus;': '\u2a2e',
1756 'rotimes;': '\u2a35',
1757 'RoundImplies;': '\u2970',
1759 'rpargt;': '\u2994',
1760 'rppolint;': '\u2a12',
1762 'Rrightarrow;': '\u21db',
1763 'rsaquo;': '\u203a',
1765 'rscr;': '\U0001d4c7',
1770 'rsquor;': '\u2019',
1771 'rthree;': '\u22cc',
1772 'rtimes;': '\u22ca',
1776 'rtriltri;': '\u29ce',
1777 'RuleDelayed;': '\u29f4',
1778 'ruluhar;': '\u2968',
1780 'Sacute;': '\u015a',
1781 'sacute;': '\u015b',
1786 'Scaron;': '\u0160',
1787 'scaron;': '\u0161',
1791 'Scedil;': '\u015e',
1792 'scedil;': '\u015f',
1797 'scnsim;': '\u22e9',
1798 'scpolint;': '\u2a13',
1805 'searhk;': '\u2925',
1808 'searrow;': '\u2198',
1812 'seswar;': '\u2929',
1813 'setminus;': '\u2216',
1816 'Sfr;': '\U0001d516',
1817 'sfr;': '\U0001d530',
1818 'sfrown;': '\u2322',
1820 'SHCHcy;': '\u0429',
1821 'shchcy;': '\u0449',
1824 'ShortDownArrow;': '\u2193',
1825 'ShortLeftArrow;': '\u2190',
1826 'shortmid;': '\u2223',
1827 'shortparallel;': '\u2225',
1828 'ShortRightArrow;': '\u2192',
1829 'ShortUpArrow;': '\u2191',
1834 'sigmaf;': '\u03c2',
1835 'sigmav;': '\u03c2',
1837 'simdot;': '\u2a6a',
1845 'simplus;': '\u2a24',
1846 'simrarr;': '\u2972',
1848 'SmallCircle;': '\u2218',
1849 'smallsetminus;': '\u2216',
1850 'smashp;': '\u2a33',
1851 'smeparsl;': '\u29e4',
1856 'smtes;': '\u2aac\ufe00',
1857 'SOFTcy;': '\u042c',
1858 'softcy;': '\u044c',
1861 'solbar;': '\u233f',
1862 'Sopf;': '\U0001d54a',
1863 'sopf;': '\U0001d564',
1864 'spades;': '\u2660',
1865 'spadesuit;': '\u2660',
1868 'sqcaps;': '\u2293\ufe00',
1870 'sqcups;': '\u2294\ufe00',
1873 'sqsube;': '\u2291',
1874 'sqsubset;': '\u228f',
1875 'sqsubseteq;': '\u2291',
1877 'sqsupe;': '\u2292',
1878 'sqsupset;': '\u2290',
1879 'sqsupseteq;': '\u2292',
1881 'Square;': '\u25a1',
1882 'square;': '\u25a1',
1883 'SquareIntersection;': '\u2293',
1884 'SquareSubset;': '\u228f',
1885 'SquareSubsetEqual;': '\u2291',
1886 'SquareSuperset;': '\u2290',
1887 'SquareSupersetEqual;': '\u2292',
1888 'SquareUnion;': '\u2294',
1889 'squarf;': '\u25aa',
1892 'Sscr;': '\U0001d4ae',
1893 'sscr;': '\U0001d4c8',
1894 'ssetmn;': '\u2216',
1895 'ssmile;': '\u2323',
1896 'sstarf;': '\u22c6',
1900 'straightepsilon;': '\u03f5',
1901 'straightphi;': '\u03d5',
1905 'subdot;': '\u2abd',
1908 'subedot;': '\u2ac3',
1909 'submult;': '\u2ac1',
1912 'subplus;': '\u2abf',
1913 'subrarr;': '\u2979',
1914 'Subset;': '\u22d0',
1915 'subset;': '\u2282',
1916 'subseteq;': '\u2286',
1917 'subseteqq;': '\u2ac5',
1918 'SubsetEqual;': '\u2286',
1919 'subsetneq;': '\u228a',
1920 'subsetneqq;': '\u2acb',
1921 'subsim;': '\u2ac7',
1922 'subsub;': '\u2ad5',
1923 'subsup;': '\u2ad3',
1925 'succapprox;': '\u2ab8',
1926 'succcurlyeq;': '\u227d',
1927 'Succeeds;': '\u227b',
1928 'SucceedsEqual;': '\u2ab0',
1929 'SucceedsSlantEqual;': '\u227d',
1930 'SucceedsTilde;': '\u227f',
1931 'succeq;': '\u2ab0',
1932 'succnapprox;': '\u2aba',
1933 'succneqq;': '\u2ab6',
1934 'succnsim;': '\u22e9',
1935 'succsim;': '\u227f',
1936 'SuchThat;': '\u220b',
1948 'supdot;': '\u2abe',
1949 'supdsub;': '\u2ad8',
1952 'supedot;': '\u2ac4',
1953 'Superset;': '\u2283',
1954 'SupersetEqual;': '\u2287',
1955 'suphsol;': '\u27c9',
1956 'suphsub;': '\u2ad7',
1957 'suplarr;': '\u297b',
1958 'supmult;': '\u2ac2',
1961 'supplus;': '\u2ac0',
1962 'Supset;': '\u22d1',
1963 'supset;': '\u2283',
1964 'supseteq;': '\u2287',
1965 'supseteqq;': '\u2ac6',
1966 'supsetneq;': '\u228b',
1967 'supsetneqq;': '\u2acc',
1968 'supsim;': '\u2ac8',
1969 'supsub;': '\u2ad4',
1970 'supsup;': '\u2ad6',
1971 'swarhk;': '\u2926',
1974 'swarrow;': '\u2199',
1975 'swnwar;': '\u292a',
1979 'target;': '\u2316',
1983 'Tcaron;': '\u0164',
1984 'tcaron;': '\u0165',
1985 'Tcedil;': '\u0162',
1986 'tcedil;': '\u0163',
1990 'telrec;': '\u2315',
1991 'Tfr;': '\U0001d517',
1992 'tfr;': '\U0001d531',
1993 'there4;': '\u2234',
1994 'Therefore;': '\u2234',
1995 'therefore;': '\u2234',
1998 'thetasym;': '\u03d1',
1999 'thetav;': '\u03d1',
2000 'thickapprox;': '\u2248',
2001 'thicksim;': '\u223c',
2002 'ThickSpace;': '\u205f\u200a',
2003 'thinsp;': '\u2009',
2004 'ThinSpace;': '\u2009',
2006 'thksim;': '\u223c',
2013 'TildeEqual;': '\u2243',
2014 'TildeFullEqual;': '\u2245',
2015 'TildeTilde;': '\u2248',
2018 'timesb;': '\u22a0',
2019 'timesbar;': '\u2a31',
2020 'timesd;': '\u2a30',
2024 'topbot;': '\u2336',
2025 'topcir;': '\u2af1',
2026 'Topf;': '\U0001d54b',
2027 'topf;': '\U0001d565',
2028 'topfork;': '\u2ada',
2030 'tprime;': '\u2034',
2033 'triangle;': '\u25b5',
2034 'triangledown;': '\u25bf',
2035 'triangleleft;': '\u25c3',
2036 'trianglelefteq;': '\u22b4',
2037 'triangleq;': '\u225c',
2038 'triangleright;': '\u25b9',
2039 'trianglerighteq;': '\u22b5',
2040 'tridot;': '\u25ec',
2042 'triminus;': '\u2a3a',
2043 'TripleDot;': '\u20db',
2044 'triplus;': '\u2a39',
2046 'tritime;': '\u2a3b',
2047 'trpezium;': '\u23e2',
2048 'Tscr;': '\U0001d4af',
2049 'tscr;': '\U0001d4c9',
2054 'Tstrok;': '\u0166',
2055 'tstrok;': '\u0167',
2057 'twoheadleftarrow;': '\u219e',
2058 'twoheadrightarrow;': '\u21a0',
2066 'Uarrocir;': '\u2949',
2069 'Ubreve;': '\u016c',
2070 'ubreve;': '\u016d',
2078 'Udblac;': '\u0170',
2079 'udblac;': '\u0171',
2081 'ufisht;': '\u297e',
2082 'Ufr;': '\U0001d518',
2083 'ufr;': '\U0001d532',
2092 'ulcorn;': '\u231c',
2093 'ulcorner;': '\u231c',
2094 'ulcrop;': '\u230f',
2101 'UnderBrace;': '\u23df',
2102 'UnderBracket;': '\u23b5',
2103 'UnderParenthesis;': '\u23dd',
2105 'UnionPlus;': '\u228e',
2108 'Uopf;': '\U0001d54c',
2109 'uopf;': '\U0001d566',
2110 'UpArrow;': '\u2191',
2111 'Uparrow;': '\u21d1',
2112 'uparrow;': '\u2191',
2113 'UpArrowBar;': '\u2912',
2114 'UpArrowDownArrow;': '\u21c5',
2115 'UpDownArrow;': '\u2195',
2116 'Updownarrow;': '\u21d5',
2117 'updownarrow;': '\u2195',
2118 'UpEquilibrium;': '\u296e',
2119 'upharpoonleft;': '\u21bf',
2120 'upharpoonright;': '\u21be',
2122 'UpperLeftArrow;': '\u2196',
2123 'UpperRightArrow;': '\u2197',
2127 'Upsilon;': '\u03a5',
2128 'upsilon;': '\u03c5',
2130 'UpTeeArrow;': '\u21a5',
2131 'upuparrows;': '\u21c8',
2132 'urcorn;': '\u231d',
2133 'urcorner;': '\u231d',
2134 'urcrop;': '\u230e',
2138 'Uscr;': '\U0001d4b0',
2139 'uscr;': '\U0001d4ca',
2141 'Utilde;': '\u0168',
2142 'utilde;': '\u0169',
2150 'uwangle;': '\u29a7',
2151 'vangrt;': '\u299c',
2152 'varepsilon;': '\u03f5',
2153 'varkappa;': '\u03f0',
2154 'varnothing;': '\u2205',
2155 'varphi;': '\u03d5',
2157 'varpropto;': '\u221d',
2160 'varrho;': '\u03f1',
2161 'varsigma;': '\u03c2',
2162 'varsubsetneq;': '\u228a\ufe00',
2163 'varsubsetneqq;': '\u2acb\ufe00',
2164 'varsupsetneq;': '\u228b\ufe00',
2165 'varsupsetneqq;': '\u2acc\ufe00',
2166 'vartheta;': '\u03d1',
2167 'vartriangleleft;': '\u22b2',
2168 'vartriangleright;': '\u22b3',
2178 'Vdashl;': '\u2ae6',
2181 'veebar;': '\u22bb',
2183 'vellip;': '\u22ee',
2184 'Verbar;': '\u2016',
2188 'VerticalBar;': '\u2223',
2189 'VerticalLine;': '|',
2190 'VerticalSeparator;': '\u2758',
2191 'VerticalTilde;': '\u2240',
2192 'VeryThinSpace;': '\u200a',
2193 'Vfr;': '\U0001d519',
2194 'vfr;': '\U0001d533',
2196 'vnsub;': '\u2282\u20d2',
2197 'vnsup;': '\u2283\u20d2',
2198 'Vopf;': '\U0001d54d',
2199 'vopf;': '\U0001d567',
2202 'Vscr;': '\U0001d4b1',
2203 'vscr;': '\U0001d4cb',
2204 'vsubnE;': '\u2acb\ufe00',
2205 'vsubne;': '\u228a\ufe00',
2206 'vsupnE;': '\u2acc\ufe00',
2207 'vsupne;': '\u228b\ufe00',
2208 'Vvdash;': '\u22aa',
2209 'vzigzag;': '\u299a',
2212 'wedbar;': '\u2a5f',
2215 'wedgeq;': '\u2259',
2216 'weierp;': '\u2118',
2217 'Wfr;': '\U0001d51a',
2218 'wfr;': '\U0001d534',
2219 'Wopf;': '\U0001d54e',
2220 'wopf;': '\U0001d568',
2223 'wreath;': '\u2240',
2224 'Wscr;': '\U0001d4b2',
2225 'wscr;': '\U0001d4cc',
2230 'Xfr;': '\U0001d51b',
2231 'xfr;': '\U0001d535',
2241 'Xopf;': '\U0001d54f',
2242 'xopf;': '\U0001d569',
2243 'xoplus;': '\u2a01',
2244 'xotime;': '\u2a02',
2247 'Xscr;': '\U0001d4b3',
2248 'xscr;': '\U0001d4cd',
2249 'xsqcup;': '\u2a06',
2250 'xuplus;': '\u2a04',
2253 'xwedge;': '\u22c0',
2266 'Yfr;': '\U0001d51c',
2267 'yfr;': '\U0001d536',
2270 'Yopf;': '\U0001d550',
2271 'yopf;': '\U0001d56a',
2272 'Yscr;': '\U0001d4b4',
2273 'yscr;': '\U0001d4ce',
2279 'Zacute;': '\u0179',
2280 'zacute;': '\u017a',
2281 'Zcaron;': '\u017d',
2282 'zcaron;': '\u017e',
2287 'zeetrf;': '\u2128',
2288 'ZeroWidthSpace;': '\u200b',
2292 'zfr;': '\U0001d537',
2295 'zigrarr;': '\u21dd',
2297 'zopf;': '\U0001d56b',
2298 'Zscr;': '\U0001d4b5',
2299 'zscr;': '\U0001d4cf',
2305 import http
.client
as compat_http_client
2306 except ImportError: # Python 2
2307 import httplib
as compat_http_client
2310 from urllib
.error
import HTTPError
as compat_HTTPError
2311 except ImportError: # Python 2
2312 from urllib2
import HTTPError
as compat_HTTPError
2315 from urllib
.request
import urlretrieve
as compat_urlretrieve
2316 except ImportError: # Python 2
2317 from urllib
import urlretrieve
as compat_urlretrieve
2320 from html
.parser
import HTMLParser
as compat_HTMLParser
2321 except ImportError: # Python 2
2322 from HTMLParser
import HTMLParser
as compat_HTMLParser
2325 from subprocess
import DEVNULL
2326 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
2328 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
2331 import http
.server
as compat_http_server
2333 import BaseHTTPServer
as compat_http_server
2336 compat_str
= unicode # Python 2
2341 from urllib
.parse
import unquote_to_bytes
as compat_urllib_parse_unquote_to_bytes
2342 from urllib
.parse
import unquote
as compat_urllib_parse_unquote
2343 from urllib
.parse
import unquote_plus
as compat_urllib_parse_unquote_plus
2344 except ImportError: # Python 2
2345 _asciire
= (compat_urllib_parse
._asciire
if hasattr(compat_urllib_parse
, '_asciire')
2346 else re
.compile('([\x00-\x7f]+)'))
2348 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2349 # implementations from cpython 3.4.3's stdlib. Python 2's version
2350 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
2352 def compat_urllib_parse_unquote_to_bytes(string
):
2353 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2354 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2355 # unescaped non-ASCII characters, which URIs should not.
2357 # Is it a string-like object?
2360 if isinstance(string
, compat_str
):
2361 string
= string
.encode('utf-8')
2362 bits
= string
.split(b
'%')
2367 for item
in bits
[1:]:
2369 append(compat_urllib_parse
._hextochr
[item
[:2]])
2374 return b
''.join(res
)
2376 def compat_urllib_parse_unquote(string
, encoding
='utf-8', errors
='replace'):
2377 """Replace %xx escapes by their single-character equivalent. The optional
2378 encoding and errors parameters specify how to decode percent-encoded
2379 sequences into Unicode characters, as accepted by the bytes.decode()
2381 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2382 sequences are replaced by a placeholder character.
2384 unquote('abc%20def') -> 'abc def'.
2386 if '%' not in string
:
2389 if encoding
is None:
2393 bits
= _asciire
.split(string
)
2396 for i
in range(1, len(bits
), 2):
2397 append(compat_urllib_parse_unquote_to_bytes(bits
[i
]).decode(encoding
, errors
))
2401 def compat_urllib_parse_unquote_plus(string
, encoding
='utf-8', errors
='replace'):
2402 """Like unquote(), but also replace plus signs by spaces, as required for
2403 unquoting HTML form values.
2405 unquote_plus('%7e/abc+def') -> '~/abc def'
2407 string
= string
.replace('+', ' ')
2408 return compat_urllib_parse_unquote(string
, encoding
, errors
)
2411 from urllib
.parse
import urlencode
as compat_urllib_parse_urlencode
2412 except ImportError: # Python 2
2413 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2414 # Possible solutions are to either port it from python 3 with all
2415 # the friends or manually ensure input query contains only byte strings.
2416 # We will stick with latter thus recursively encoding the whole query.
2417 def compat_urllib_parse_urlencode(query
, doseq
=0, encoding
='utf-8'):
2419 if isinstance(e
, dict):
2421 elif isinstance(e
, (list, tuple,)):
2422 list_e
= encode_list(e
)
2423 e
= tuple(list_e
) if isinstance(e
, tuple) else list_e
2424 elif isinstance(e
, compat_str
):
2425 e
= e
.encode(encoding
)
2429 return dict((encode_elem(k
), encode_elem(v
)) for k
, v
in d
.items())
2432 return [encode_elem(e
) for e
in l
]
2434 return compat_urllib_parse
.urlencode(encode_elem(query
), doseq
=doseq
)
2437 from urllib
.request
import DataHandler
as compat_urllib_request_DataHandler
2438 except ImportError: # Python < 3.4
2439 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2440 class compat_urllib_request_DataHandler(compat_urllib_request
.BaseHandler
):
2441 def data_open(self
, req
):
2442 # data URLs as specified in RFC 2397.
2444 # ignores POSTed data
2447 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2448 # mediatype := [ type "/" subtype ] *( ";" parameter )
2450 # parameter := attribute "=" value
2451 url
= req
.get_full_url()
2453 scheme
, data
= url
.split(':', 1)
2454 mediatype
, data
= data
.split(',', 1)
2456 # even base64 encoded data URLs might be quoted so unquote in any case:
2457 data
= compat_urllib_parse_unquote_to_bytes(data
)
2458 if mediatype
.endswith(';base64'):
2459 data
= binascii
.a2b_base64(data
)
2460 mediatype
= mediatype
[:-7]
2463 mediatype
= 'text/plain;charset=US-ASCII'
2465 headers
= email
.message_from_string(
2466 'Content-type: %s\nContent-length: %d\n' % (mediatype
, len(data
)))
2468 return compat_urllib_response
.addinfourl(io
.BytesIO(data
), headers
, url
)
2471 compat_basestring
= basestring
# Python 2
2473 compat_basestring
= str
2476 compat_chr
= unichr # Python 2
2481 from xml
.etree
.ElementTree
import ParseError
as compat_xml_parse_error
2482 except ImportError: # Python 2.6
2483 from xml
.parsers
.expat
import ExpatError
as compat_xml_parse_error
2486 etree
= xml
.etree
.ElementTree
2489 class _TreeBuilder(etree
.TreeBuilder
):
2490 def doctype(self
, name
, pubid
, system
):
2493 if sys
.version_info
[0] >= 3:
2494 def compat_etree_fromstring(text
):
2495 return etree
.XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder()))
2497 # python 2.x tries to encode unicode strings with ascii (see the
2498 # XMLParser._fixtext method)
2500 _etree_iter
= etree
.Element
.iter
2501 except AttributeError: # Python <=2.6
2502 def _etree_iter(root
):
2503 for el
in root
.findall('*'):
2505 for sub
in _etree_iter(el
):
2508 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2510 def _XML(text
, parser
=None):
2512 parser
= etree
.XMLParser(target
=_TreeBuilder())
2514 return parser
.close()
2516 def _element_factory(*args
, **kwargs
):
2517 el
= etree
.Element(*args
, **kwargs
)
2518 for k
, v
in el
.items():
2519 if isinstance(v
, bytes):
2520 el
.set(k
, v
.decode('utf-8'))
2523 def compat_etree_fromstring(text
):
2524 doc
= _XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder(element_factory
=_element_factory
)))
2525 for el
in _etree_iter(doc
):
2526 if el
.text
is not None and isinstance(el
.text
, bytes):
2527 el
.text
= el
.text
.decode('utf-8')
2530 if sys
.version_info
< (2, 7):
2531 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2532 # .//node does not match if a node is a direct child of . !
2533 def compat_xpath(xpath
):
2534 if isinstance(xpath
, compat_str
):
2535 xpath
= xpath
.encode('ascii')
2538 compat_xpath
= lambda xpath
: xpath
2541 from urllib
.parse
import parse_qs
as compat_parse_qs
2542 except ImportError: # Python 2
2543 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2544 # Python 2's version is apparently totally broken
2546 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
2547 encoding
='utf-8', errors
='replace'):
2548 qs
, _coerce_result
= qs
, compat_str
2549 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
2551 for name_value
in pairs
:
2552 if not name_value
and not strict_parsing
:
2554 nv
= name_value
.split('=', 1)
2557 raise ValueError('bad query field: %r' % (name_value
,))
2558 # Handle case of a control-name with no equal sign
2559 if keep_blank_values
:
2563 if len(nv
[1]) or keep_blank_values
:
2564 name
= nv
[0].replace('+', ' ')
2565 name
= compat_urllib_parse_unquote(
2566 name
, encoding
=encoding
, errors
=errors
)
2567 name
= _coerce_result(name
)
2568 value
= nv
[1].replace('+', ' ')
2569 value
= compat_urllib_parse_unquote(
2570 value
, encoding
=encoding
, errors
=errors
)
2571 value
= _coerce_result(value
)
2572 r
.append((name
, value
))
2575 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
2576 encoding
='utf-8', errors
='replace'):
2578 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
2579 encoding
=encoding
, errors
=errors
)
2580 for name
, value
in pairs
:
2581 if name
in parsed_result
:
2582 parsed_result
[name
].append(value
)
2584 parsed_result
[name
] = [value
]
2585 return parsed_result
2588 from shlex
import quote
as compat_shlex_quote
2589 except ImportError: # Python < 3.3
2590 def compat_shlex_quote(s
):
2591 if re
.match(r
'^[-_\w./]+$', s
):
2594 return "'" + s
.replace("'", "'\"'\"'") + "'"
2597 if sys
.version_info
>= (2, 7, 3):
2598 compat_shlex_split
= shlex
.split
2600 # Working around shlex issue with unicode strings on some python 2
2601 # versions (see http://bugs.python.org/issue1548891)
2602 def compat_shlex_split(s
, comments
=False, posix
=True):
2603 if isinstance(s
, compat_str
):
2604 s
= s
.encode('utf-8')
2605 return shlex
.split(s
, comments
, posix
)
2615 compat_os_name
= os
._name
if os
.name
== 'java' else os
.name
2618 if sys
.version_info
>= (3, 0):
2619 compat_getenv
= os
.getenv
2620 compat_expanduser
= os
.path
.expanduser
2622 def compat_setenv(key
, value
, env
=os
.environ
):
2625 # Environment variables should be decoded with filesystem encoding.
2626 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2628 def compat_getenv(key
, default
=None):
2629 from .utils
import get_filesystem_encoding
2630 env
= os
.getenv(key
, default
)
2632 env
= env
.decode(get_filesystem_encoding())
2635 def compat_setenv(key
, value
, env
=os
.environ
):
2637 from .utils
import get_filesystem_encoding
2638 return v
.encode(get_filesystem_encoding()) if isinstance(v
, compat_str
) else v
2639 env
[encode(key
)] = encode(value
)
2641 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2642 # environment variables with filesystem encoding. We will work around this by
2643 # providing adjusted implementations.
2644 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2645 # for different platforms with correct environment variables decoding.
2647 if compat_os_name
== 'posix':
2648 def compat_expanduser(path
):
2649 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2651 if not path
.startswith('~'):
2653 i
= path
.find('/', 1)
2657 if 'HOME' not in os
.environ
:
2659 userhome
= pwd
.getpwuid(os
.getuid()).pw_dir
2661 userhome
= compat_getenv('HOME')
2665 pwent
= pwd
.getpwnam(path
[1:i
])
2668 userhome
= pwent
.pw_dir
2669 userhome
= userhome
.rstrip('/')
2670 return (userhome
+ path
[i
:]) or '/'
2671 elif compat_os_name
== 'nt' or compat_os_name
== 'ce':
2672 def compat_expanduser(path
):
2673 """Expand ~ and ~user constructs.
2675 If user or $HOME is unknown, do nothing."""
2679 while i
< n
and path
[i
] not in '/\\':
2682 if 'HOME' in os
.environ
:
2683 userhome
= compat_getenv('HOME')
2684 elif 'USERPROFILE' in os
.environ
:
2685 userhome
= compat_getenv('USERPROFILE')
2686 elif 'HOMEPATH' not in os
.environ
:
2690 drive
= compat_getenv('HOMEDRIVE')
2693 userhome
= os
.path
.join(drive
, compat_getenv('HOMEPATH'))
2696 userhome
= os
.path
.join(os
.path
.dirname(userhome
), path
[1:i
])
2698 return userhome
+ path
[i
:]
2700 compat_expanduser
= os
.path
.expanduser
2703 if sys
.version_info
< (3, 0):
2704 def compat_print(s
):
2705 from .utils
import preferredencoding
2706 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
2708 def compat_print(s
):
2709 assert isinstance(s
, compat_str
)
2713 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
2714 def compat_getpass(prompt
, *args
, **kwargs
):
2715 if isinstance(prompt
, compat_str
):
2716 from .utils
import preferredencoding
2717 prompt
= prompt
.encode(preferredencoding())
2718 return getpass
.getpass(prompt
, *args
, **kwargs
)
2720 compat_getpass
= getpass
.getpass
2723 compat_input
= raw_input
2724 except NameError: # Python 3
2725 compat_input
= input
2727 # Python < 2.6.5 require kwargs to be bytes
2731 _testfunc(**{'x': 0})
2733 def compat_kwargs(kwargs
):
2734 return dict((bytes(k
), v
) for k
, v
in kwargs
.items())
2736 compat_kwargs
= lambda kwargs
: kwargs
2739 if sys
.version_info
< (2, 7):
2740 def compat_socket_create_connection(address
, timeout
, source_address
=None):
2741 host
, port
= address
2743 for res
in socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
):
2744 af
, socktype
, proto
, canonname
, sa
= res
2747 sock
= socket
.socket(af
, socktype
, proto
)
2748 sock
.settimeout(timeout
)
2750 sock
.bind(source_address
)
2753 except socket
.error
as _
:
2755 if sock
is not None:
2760 raise socket
.error('getaddrinfo returns an empty list')
2762 compat_socket_create_connection
= socket
.create_connection
2765 # Fix https://github.com/rg3/youtube-dl/issues/4223
2766 # See http://bugs.python.org/issue9161 for what is broken
2767 def workaround_optparse_bug9161():
2768 op
= optparse
.OptionParser()
2769 og
= optparse
.OptionGroup(op
, 'foo')
2773 real_add_option
= optparse
.OptionGroup
.add_option
2775 def _compat_add_option(self
, *args
, **kwargs
):
2777 v
.encode('ascii', 'replace') if isinstance(v
, compat_str
)
2779 bargs
= [enc(a
) for a
in args
]
2781 (k
, enc(v
)) for k
, v
in kwargs
.items())
2782 return real_add_option(self
, *bargs
, **bkwargs
)
2783 optparse
.OptionGroup
.add_option
= _compat_add_option
2785 if hasattr(shutil
, 'get_terminal_size'): # Python >= 3.3
2786 compat_get_terminal_size
= shutil
.get_terminal_size
2788 _terminal_size
= collections
.namedtuple('terminal_size', ['columns', 'lines'])
2790 def compat_get_terminal_size(fallback
=(80, 24)):
2791 columns
= compat_getenv('COLUMNS')
2793 columns
= int(columns
)
2796 lines
= compat_getenv('LINES')
2802 if columns
is None or lines
is None or columns
<= 0 or lines
<= 0:
2804 sp
= subprocess
.Popen(
2806 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2807 out
, err
= sp
.communicate()
2808 _lines
, _columns
= map(int, out
.split())
2810 _columns
, _lines
= _terminal_size(*fallback
)
2812 if columns
is None or columns
<= 0:
2814 if lines
is None or lines
<= 0:
2816 return _terminal_size(columns
, lines
)
2819 itertools
.count(start
=0, step
=1)
2820 compat_itertools_count
= itertools
.count
2821 except TypeError: # Python 2.6
2822 def compat_itertools_count(start
=0, step
=1):
2828 if sys
.version_info
>= (3, 0):
2829 from tokenize
import tokenize
as compat_tokenize_tokenize
2831 from tokenize
import generate_tokens
as compat_tokenize_tokenize
2835 struct
.pack('!I', 0)
2837 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2838 # See https://bugs.python.org/issue19099
2839 def compat_struct_pack(spec
, *args
):
2840 if isinstance(spec
, compat_str
):
2841 spec
= spec
.encode('ascii')
2842 return struct
.pack(spec
, *args
)
2844 def compat_struct_unpack(spec
, *args
):
2845 if isinstance(spec
, compat_str
):
2846 spec
= spec
.encode('ascii')
2847 return struct
.unpack(spec
, *args
)
2849 compat_struct_pack
= struct
.pack
2850 compat_struct_unpack
= struct
.unpack
2854 'compat_HTMLParser',
2856 'compat_basestring',
2860 'compat_etree_fromstring',
2861 'compat_expanduser',
2862 'compat_get_terminal_size',
2865 'compat_html_entities',
2866 'compat_html_entities_html5',
2867 'compat_http_client',
2868 'compat_http_server',
2870 'compat_itertools_count',
2877 'compat_shlex_quote',
2878 'compat_shlex_split',
2879 'compat_socket_create_connection',
2881 'compat_struct_pack',
2882 'compat_struct_unpack',
2883 'compat_subprocess_get_DEVNULL',
2884 'compat_tokenize_tokenize',
2885 'compat_urllib_error',
2886 'compat_urllib_parse',
2887 'compat_urllib_parse_unquote',
2888 'compat_urllib_parse_unquote_plus',
2889 'compat_urllib_parse_unquote_to_bytes',
2890 'compat_urllib_parse_urlencode',
2891 'compat_urllib_parse_urlparse',
2892 'compat_urllib_request',
2893 'compat_urllib_request_DataHandler',
2894 'compat_urllib_response',
2896 'compat_urlretrieve',
2897 'compat_xml_parse_error',
2899 'workaround_optparse_bug9161',