2 from __future__
import unicode_literals
19 import xml
.etree
.ElementTree
23 import urllib
.request
as compat_urllib_request
24 except ImportError: # Python 2
25 import urllib2
as compat_urllib_request
28 import urllib
.error
as compat_urllib_error
29 except ImportError: # Python 2
30 import urllib2
as compat_urllib_error
33 import urllib
.parse
as compat_urllib_parse
34 except ImportError: # Python 2
35 import urllib
as compat_urllib_parse
38 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
39 except ImportError: # Python 2
40 from urlparse
import urlparse
as compat_urllib_parse_urlparse
43 import urllib
.parse
as compat_urlparse
44 except ImportError: # Python 2
45 import urlparse
as compat_urlparse
48 import urllib
.response
as compat_urllib_response
49 except ImportError: # Python 2
50 import urllib
as compat_urllib_response
53 import http
.cookiejar
as compat_cookiejar
54 except ImportError: # Python 2
55 import cookielib
as compat_cookiejar
58 import http
.cookies
as compat_cookies
59 except ImportError: # Python 2
60 import Cookie
as compat_cookies
63 import html
.entities
as compat_html_entities
64 except ImportError: # Python 2
65 import htmlentitydefs
as compat_html_entities
68 compat_html_entities_html5
= compat_html_entities
.html5
69 except AttributeError:
70 # Copied from CPython 3.5.1 html/entities.py
71 compat_html_entities_html5
= {
80 'acE;': '\u223e\u0333',
100 'alefsym;': '\u2135',
115 'andslope;': '\u2a58',
121 'angmsdaa;': '\u29a8',
122 'angmsdab;': '\u29a9',
123 'angmsdac;': '\u29aa',
124 'angmsdad;': '\u29ab',
125 'angmsdae;': '\u29ac',
126 'angmsdaf;': '\u29ad',
127 'angmsdag;': '\u29ae',
128 'angmsdah;': '\u29af',
130 'angrtvb;': '\u22be',
131 'angrtvbd;': '\u299d',
134 'angzarr;': '\u237c',
137 'Aopf;': '\U0001d538',
138 'aopf;': '\U0001d552',
145 'ApplyFunction;': '\u2061',
147 'approxeq;': '\u224a',
152 'Ascr;': '\U0001d49c',
153 'ascr;': '\U0001d4b6',
157 'asympeq;': '\u224d',
166 'awconint;': '\u2233',
168 'backcong;': '\u224c',
169 'backepsilon;': '\u03f6',
170 'backprime;': '\u2035',
171 'backsim;': '\u223d',
172 'backsimeq;': '\u22cd',
173 'Backslash;': '\u2216',
178 'barwedge;': '\u2305',
180 'bbrktbrk;': '\u23b6',
186 'Because;': '\u2235',
187 'because;': '\u2235',
188 'bemptyv;': '\u29b0',
191 'Bernoullis;': '\u212c',
195 'between;': '\u226c',
196 'Bfr;': '\U0001d505',
197 'bfr;': '\U0001d51f',
199 'bigcirc;': '\u25ef',
201 'bigodot;': '\u2a00',
202 'bigoplus;': '\u2a01',
203 'bigotimes;': '\u2a02',
204 'bigsqcup;': '\u2a06',
205 'bigstar;': '\u2605',
206 'bigtriangledown;': '\u25bd',
207 'bigtriangleup;': '\u25b3',
208 'biguplus;': '\u2a04',
210 'bigwedge;': '\u22c0',
212 'blacklozenge;': '\u29eb',
213 'blacksquare;': '\u25aa',
214 'blacktriangle;': '\u25b4',
215 'blacktriangledown;': '\u25be',
216 'blacktriangleleft;': '\u25c2',
217 'blacktriangleright;': '\u25b8',
224 'bnequiv;': '\u2261\u20e5',
227 'Bopf;': '\U0001d539',
228 'bopf;': '\U0001d553',
251 'boxminus;': '\u229f',
252 'boxplus;': '\u229e',
253 'boxtimes;': '\u22a0',
282 'bscr;': '\U0001d4b7',
288 'bsolhsub;': '\u27c8',
301 'capbrcup;': '\u2a49',
305 'CapitalDifferentialD;': '\u2145',
306 'caps;': '\u2229\ufe00',
309 'Cayleys;': '\u212d',
319 'Cconint;': '\u2230',
321 'ccupssm;': '\u2a50',
327 'cemptyv;': '\u29b2',
330 'CenterDot;': '\xb7',
331 'centerdot;': '\xb7',
333 'cfr;': '\U0001d520',
337 'checkmark;': '\u2713',
343 'circlearrowleft;': '\u21ba',
344 'circlearrowright;': '\u21bb',
345 'circledast;': '\u229b',
346 'circledcirc;': '\u229a',
347 'circleddash;': '\u229d',
348 'CircleDot;': '\u2299',
350 'circledS;': '\u24c8',
351 'CircleMinus;': '\u2296',
352 'CirclePlus;': '\u2295',
353 'CircleTimes;': '\u2297',
356 'cirfnint;': '\u2a10',
358 'cirscir;': '\u29c2',
359 'ClockwiseContourIntegral;': '\u2232',
360 'CloseCurlyDoubleQuote;': '\u201d',
361 'CloseCurlyQuote;': '\u2019',
363 'clubsuit;': '\u2663',
368 'coloneq;': '\u2254',
373 'complement;': '\u2201',
374 'complexes;': '\u2102',
376 'congdot;': '\u2a6d',
377 'Congruent;': '\u2261',
380 'ContourIntegral;': '\u222e',
382 'copf;': '\U0001d554',
384 'Coproduct;': '\u2210',
390 'CounterClockwiseContourIntegral;': '\u2233',
394 'Cscr;': '\U0001d49e',
395 'cscr;': '\U0001d4b8',
401 'cudarrl;': '\u2938',
402 'cudarrr;': '\u2935',
406 'cularrp;': '\u293d',
409 'cupbrcap;': '\u2a48',
415 'cups;': '\u222a\ufe00',
417 'curarrm;': '\u293c',
418 'curlyeqprec;': '\u22de',
419 'curlyeqsucc;': '\u22df',
420 'curlyvee;': '\u22ce',
421 'curlywedge;': '\u22cf',
424 'curvearrowleft;': '\u21b6',
425 'curvearrowright;': '\u21b7',
428 'cwconint;': '\u2232',
440 'dbkarow;': '\u290f',
448 'ddagger;': '\u2021',
450 'DDotrahd;': '\u2911',
451 'ddotseq;': '\u2a77',
457 'demptyv;': '\u29b1',
459 'Dfr;': '\U0001d507',
460 'dfr;': '\U0001d521',
464 'DiacriticalAcute;': '\xb4',
465 'DiacriticalDot;': '\u02d9',
466 'DiacriticalDoubleAcute;': '\u02dd',
467 'DiacriticalGrave;': '`',
468 'DiacriticalTilde;': '\u02dc',
470 'Diamond;': '\u22c4',
471 'diamond;': '\u22c4',
472 'diamondsuit;': '\u2666',
475 'DifferentialD;': '\u2146',
476 'digamma;': '\u03dd',
481 'divideontimes;': '\u22c7',
488 'Dopf;': '\U0001d53b',
489 'dopf;': '\U0001d555',
494 'doteqdot;': '\u2251',
495 'DotEqual;': '\u2250',
496 'dotminus;': '\u2238',
497 'dotplus;': '\u2214',
498 'dotsquare;': '\u22a1',
499 'doublebarwedge;': '\u2306',
500 'DoubleContourIntegral;': '\u222f',
501 'DoubleDot;': '\xa8',
502 'DoubleDownArrow;': '\u21d3',
503 'DoubleLeftArrow;': '\u21d0',
504 'DoubleLeftRightArrow;': '\u21d4',
505 'DoubleLeftTee;': '\u2ae4',
506 'DoubleLongLeftArrow;': '\u27f8',
507 'DoubleLongLeftRightArrow;': '\u27fa',
508 'DoubleLongRightArrow;': '\u27f9',
509 'DoubleRightArrow;': '\u21d2',
510 'DoubleRightTee;': '\u22a8',
511 'DoubleUpArrow;': '\u21d1',
512 'DoubleUpDownArrow;': '\u21d5',
513 'DoubleVerticalBar;': '\u2225',
514 'DownArrow;': '\u2193',
515 'Downarrow;': '\u21d3',
516 'downarrow;': '\u2193',
517 'DownArrowBar;': '\u2913',
518 'DownArrowUpArrow;': '\u21f5',
519 'DownBreve;': '\u0311',
520 'downdownarrows;': '\u21ca',
521 'downharpoonleft;': '\u21c3',
522 'downharpoonright;': '\u21c2',
523 'DownLeftRightVector;': '\u2950',
524 'DownLeftTeeVector;': '\u295e',
525 'DownLeftVector;': '\u21bd',
526 'DownLeftVectorBar;': '\u2956',
527 'DownRightTeeVector;': '\u295f',
528 'DownRightVector;': '\u21c1',
529 'DownRightVectorBar;': '\u2957',
530 'DownTee;': '\u22a4',
531 'DownTeeArrow;': '\u21a7',
532 'drbkarow;': '\u2910',
535 'Dscr;': '\U0001d49f',
536 'dscr;': '\U0001d4b9',
547 'dwangle;': '\u29a6',
550 'dzigrarr;': '\u27ff',
572 'Efr;': '\U0001d508',
573 'efr;': '\U0001d522',
582 'Element;': '\u2208',
583 'elinters;': '\u23e7',
590 'emptyset;': '\u2205',
591 'EmptySmallSquare;': '\u25fb',
593 'EmptyVerySmallSquare;': '\u25ab',
602 'Eopf;': '\U0001d53c',
603 'eopf;': '\U0001d556',
608 'Epsilon;': '\u0395',
609 'epsilon;': '\u03b5',
612 'eqcolon;': '\u2255',
614 'eqslantgtr;': '\u2a96',
615 'eqslantless;': '\u2a95',
618 'EqualTilde;': '\u2242',
620 'Equilibrium;': '\u21cc',
622 'equivDD;': '\u2a78',
623 'eqvparsl;': '\u29e5',
645 'expectation;': '\u2130',
646 'ExponentialE;': '\u2147',
647 'exponentiale;': '\u2147',
648 'fallingdotseq;': '\u2252',
655 'Ffr;': '\U0001d509',
656 'ffr;': '\U0001d523',
658 'FilledSmallSquare;': '\u25fc',
659 'FilledVerySmallSquare;': '\u25aa',
665 'Fopf;': '\U0001d53d',
666 'fopf;': '\U0001d557',
671 'Fouriertrf;': '\u2131',
672 'fpartint;': '\u2a0d',
694 'fscr;': '\U0001d4bb',
716 'geqslant;': '\u2a7e',
720 'gesdoto;': '\u2a82',
721 'gesdotol;': '\u2a84',
722 'gesl;': '\u22db\ufe00',
724 'Gfr;': '\U0001d50a',
725 'gfr;': '\U0001d524',
737 'gnapprox;': '\u2a8a',
743 'Gopf;': '\U0001d53e',
744 'gopf;': '\U0001d558',
746 'GreaterEqual;': '\u2265',
747 'GreaterEqualLess;': '\u22db',
748 'GreaterFullEqual;': '\u2267',
749 'GreaterGreater;': '\u2aa2',
750 'GreaterLess;': '\u2277',
751 'GreaterSlantEqual;': '\u2a7e',
752 'GreaterTilde;': '\u2273',
753 'Gscr;': '\U0001d4a2',
767 'gtquest;': '\u2a7c',
768 'gtrapprox;': '\u2a86',
771 'gtreqless;': '\u22db',
772 'gtreqqless;': '\u2a8c',
773 'gtrless;': '\u2277',
775 'gvertneqq;': '\u2269\ufe00',
776 'gvnE;': '\u2269\ufe00',
785 'harrcir;': '\u2948',
792 'heartsuit;': '\u2665',
796 'hfr;': '\U0001d525',
797 'HilbertSpace;': '\u210b',
798 'hksearow;': '\u2925',
799 'hkswarow;': '\u2926',
802 'hookleftarrow;': '\u21a9',
803 'hookrightarrow;': '\u21aa',
805 'hopf;': '\U0001d559',
807 'HorizontalLine;': '\u2500',
809 'hscr;': '\U0001d4bd',
813 'HumpDownHump;': '\u224e',
814 'HumpEqual;': '\u224f',
835 'ifr;': '\U0001d526',
851 'ImaginaryI;': '\u2148',
852 'imagline;': '\u2110',
853 'imagpart;': '\u2111',
857 'Implies;': '\u21d2',
861 'infintie;': '\u29dd',
866 'integers;': '\u2124',
867 'Integral;': '\u222b',
868 'intercal;': '\u22ba',
869 'Intersection;': '\u22c2',
870 'intlarhk;': '\u2a17',
871 'intprod;': '\u2a3c',
872 'InvisibleComma;': '\u2063',
873 'InvisibleTimes;': '\u2062',
878 'Iopf;': '\U0001d540',
879 'iopf;': '\U0001d55a',
886 'iscr;': '\U0001d4be',
888 'isindot;': '\u22f5',
906 'Jfr;': '\U0001d50d',
907 'jfr;': '\U0001d527',
909 'Jopf;': '\U0001d541',
910 'jopf;': '\U0001d55b',
911 'Jscr;': '\U0001d4a5',
912 'jscr;': '\U0001d4bf',
924 'Kfr;': '\U0001d50e',
925 'kfr;': '\U0001d528',
931 'Kopf;': '\U0001d542',
932 'kopf;': '\U0001d55c',
933 'Kscr;': '\U0001d4a6',
934 'kscr;': '\U0001d4c0',
938 'laemptyv;': '\u29b4',
947 'Laplacetrf;': '\u2112',
954 'larrbfs;': '\u291f',
959 'larrsim;': '\u2973',
965 'lates;': '\u2aad\ufe00',
972 'lbrksld;': '\u298f',
973 'lbrkslu;': '\u298d',
985 'ldrdhar;': '\u2967',
986 'ldrushar;': '\u294b',
990 'LeftAngleBracket;': '\u27e8',
991 'LeftArrow;': '\u2190',
992 'Leftarrow;': '\u21d0',
993 'leftarrow;': '\u2190',
994 'LeftArrowBar;': '\u21e4',
995 'LeftArrowRightArrow;': '\u21c6',
996 'leftarrowtail;': '\u21a2',
997 'LeftCeiling;': '\u2308',
998 'LeftDoubleBracket;': '\u27e6',
999 'LeftDownTeeVector;': '\u2961',
1000 'LeftDownVector;': '\u21c3',
1001 'LeftDownVectorBar;': '\u2959',
1002 'LeftFloor;': '\u230a',
1003 'leftharpoondown;': '\u21bd',
1004 'leftharpoonup;': '\u21bc',
1005 'leftleftarrows;': '\u21c7',
1006 'LeftRightArrow;': '\u2194',
1007 'Leftrightarrow;': '\u21d4',
1008 'leftrightarrow;': '\u2194',
1009 'leftrightarrows;': '\u21c6',
1010 'leftrightharpoons;': '\u21cb',
1011 'leftrightsquigarrow;': '\u21ad',
1012 'LeftRightVector;': '\u294e',
1013 'LeftTee;': '\u22a3',
1014 'LeftTeeArrow;': '\u21a4',
1015 'LeftTeeVector;': '\u295a',
1016 'leftthreetimes;': '\u22cb',
1017 'LeftTriangle;': '\u22b2',
1018 'LeftTriangleBar;': '\u29cf',
1019 'LeftTriangleEqual;': '\u22b4',
1020 'LeftUpDownVector;': '\u2951',
1021 'LeftUpTeeVector;': '\u2960',
1022 'LeftUpVector;': '\u21bf',
1023 'LeftUpVectorBar;': '\u2958',
1024 'LeftVector;': '\u21bc',
1025 'LeftVectorBar;': '\u2952',
1030 'leqslant;': '\u2a7d',
1033 'lesdot;': '\u2a7f',
1034 'lesdoto;': '\u2a81',
1035 'lesdotor;': '\u2a83',
1036 'lesg;': '\u22da\ufe00',
1037 'lesges;': '\u2a93',
1038 'lessapprox;': '\u2a85',
1039 'lessdot;': '\u22d6',
1040 'lesseqgtr;': '\u22da',
1041 'lesseqqgtr;': '\u2a8b',
1042 'LessEqualGreater;': '\u22da',
1043 'LessFullEqual;': '\u2266',
1044 'LessGreater;': '\u2276',
1045 'lessgtr;': '\u2276',
1046 'LessLess;': '\u2aa1',
1047 'lesssim;': '\u2272',
1048 'LessSlantEqual;': '\u2a7d',
1049 'LessTilde;': '\u2272',
1050 'lfisht;': '\u297c',
1051 'lfloor;': '\u230a',
1052 'Lfr;': '\U0001d50f',
1053 'lfr;': '\U0001d529',
1059 'lharul;': '\u296a',
1066 'llcorner;': '\u231e',
1067 'Lleftarrow;': '\u21da',
1068 'llhard;': '\u296b',
1070 'Lmidot;': '\u013f',
1071 'lmidot;': '\u0140',
1072 'lmoust;': '\u23b0',
1073 'lmoustache;': '\u23b0',
1075 'lnapprox;': '\u2a89',
1084 'LongLeftArrow;': '\u27f5',
1085 'Longleftarrow;': '\u27f8',
1086 'longleftarrow;': '\u27f5',
1087 'LongLeftRightArrow;': '\u27f7',
1088 'Longleftrightarrow;': '\u27fa',
1089 'longleftrightarrow;': '\u27f7',
1090 'longmapsto;': '\u27fc',
1091 'LongRightArrow;': '\u27f6',
1092 'Longrightarrow;': '\u27f9',
1093 'longrightarrow;': '\u27f6',
1094 'looparrowleft;': '\u21ab',
1095 'looparrowright;': '\u21ac',
1097 'Lopf;': '\U0001d543',
1098 'lopf;': '\U0001d55d',
1099 'loplus;': '\u2a2d',
1100 'lotimes;': '\u2a34',
1101 'lowast;': '\u2217',
1103 'LowerLeftArrow;': '\u2199',
1104 'LowerRightArrow;': '\u2198',
1106 'lozenge;': '\u25ca',
1109 'lparlt;': '\u2993',
1111 'lrcorner;': '\u231f',
1113 'lrhard;': '\u296d',
1116 'lsaquo;': '\u2039',
1118 'lscr;': '\U0001d4c1',
1126 'lsquor;': '\u201a',
1127 'Lstrok;': '\u0141',
1128 'lstrok;': '\u0142',
1137 'lthree;': '\u22cb',
1138 'ltimes;': '\u22c9',
1139 'ltlarr;': '\u2976',
1140 'ltquest;': '\u2a7b',
1144 'ltrPar;': '\u2996',
1145 'lurdshar;': '\u294a',
1146 'luruhar;': '\u2966',
1147 'lvertneqq;': '\u2268\ufe00',
1148 'lvnE;': '\u2268\ufe00',
1153 'maltese;': '\u2720',
1156 'mapsto;': '\u21a6',
1157 'mapstodown;': '\u21a7',
1158 'mapstoleft;': '\u21a4',
1159 'mapstoup;': '\u21a5',
1160 'marker;': '\u25ae',
1161 'mcomma;': '\u2a29',
1166 'measuredangle;': '\u2221',
1167 'MediumSpace;': '\u205f',
1168 'Mellintrf;': '\u2133',
1169 'Mfr;': '\U0001d510',
1170 'mfr;': '\U0001d52a',
1176 'midcir;': '\u2af0',
1180 'minusb;': '\u229f',
1181 'minusd;': '\u2238',
1182 'minusdu;': '\u2a2a',
1183 'MinusPlus;': '\u2213',
1186 'mnplus;': '\u2213',
1187 'models;': '\u22a7',
1188 'Mopf;': '\U0001d544',
1189 'mopf;': '\U0001d55e',
1192 'mscr;': '\U0001d4c2',
1193 'mstpos;': '\u223e',
1196 'multimap;': '\u22b8',
1199 'Nacute;': '\u0143',
1200 'nacute;': '\u0144',
1201 'nang;': '\u2220\u20d2',
1203 'napE;': '\u2a70\u0338',
1204 'napid;': '\u224b\u0338',
1206 'napprox;': '\u2249',
1208 'natural;': '\u266e',
1209 'naturals;': '\u2115',
1212 'nbump;': '\u224e\u0338',
1213 'nbumpe;': '\u224f\u0338',
1215 'Ncaron;': '\u0147',
1216 'ncaron;': '\u0148',
1217 'Ncedil;': '\u0145',
1218 'ncedil;': '\u0146',
1220 'ncongdot;': '\u2a6d\u0338',
1226 'nearhk;': '\u2924',
1229 'nearrow;': '\u2197',
1230 'nedot;': '\u2250\u0338',
1231 'NegativeMediumSpace;': '\u200b',
1232 'NegativeThickSpace;': '\u200b',
1233 'NegativeThinSpace;': '\u200b',
1234 'NegativeVeryThinSpace;': '\u200b',
1235 'nequiv;': '\u2262',
1236 'nesear;': '\u2928',
1237 'nesim;': '\u2242\u0338',
1238 'NestedGreaterGreater;': '\u226b',
1239 'NestedLessLess;': '\u226a',
1241 'nexist;': '\u2204',
1242 'nexists;': '\u2204',
1243 'Nfr;': '\U0001d511',
1244 'nfr;': '\U0001d52b',
1245 'ngE;': '\u2267\u0338',
1248 'ngeqq;': '\u2267\u0338',
1249 'ngeqslant;': '\u2a7e\u0338',
1250 'nges;': '\u2a7e\u0338',
1251 'nGg;': '\u22d9\u0338',
1253 'nGt;': '\u226b\u20d2',
1256 'nGtv;': '\u226b\u0338',
1269 'nlE;': '\u2266\u0338',
1271 'nLeftarrow;': '\u21cd',
1272 'nleftarrow;': '\u219a',
1273 'nLeftrightarrow;': '\u21ce',
1274 'nleftrightarrow;': '\u21ae',
1276 'nleqq;': '\u2266\u0338',
1277 'nleqslant;': '\u2a7d\u0338',
1278 'nles;': '\u2a7d\u0338',
1280 'nLl;': '\u22d8\u0338',
1282 'nLt;': '\u226a\u20d2',
1285 'nltrie;': '\u22ec',
1286 'nLtv;': '\u226a\u0338',
1288 'NoBreak;': '\u2060',
1289 'NonBreakingSpace;': '\xa0',
1291 'nopf;': '\U0001d55f',
1295 'NotCongruent;': '\u2262',
1296 'NotCupCap;': '\u226d',
1297 'NotDoubleVerticalBar;': '\u2226',
1298 'NotElement;': '\u2209',
1299 'NotEqual;': '\u2260',
1300 'NotEqualTilde;': '\u2242\u0338',
1301 'NotExists;': '\u2204',
1302 'NotGreater;': '\u226f',
1303 'NotGreaterEqual;': '\u2271',
1304 'NotGreaterFullEqual;': '\u2267\u0338',
1305 'NotGreaterGreater;': '\u226b\u0338',
1306 'NotGreaterLess;': '\u2279',
1307 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1308 'NotGreaterTilde;': '\u2275',
1309 'NotHumpDownHump;': '\u224e\u0338',
1310 'NotHumpEqual;': '\u224f\u0338',
1312 'notindot;': '\u22f5\u0338',
1313 'notinE;': '\u22f9\u0338',
1314 'notinva;': '\u2209',
1315 'notinvb;': '\u22f7',
1316 'notinvc;': '\u22f6',
1317 'NotLeftTriangle;': '\u22ea',
1318 'NotLeftTriangleBar;': '\u29cf\u0338',
1319 'NotLeftTriangleEqual;': '\u22ec',
1320 'NotLess;': '\u226e',
1321 'NotLessEqual;': '\u2270',
1322 'NotLessGreater;': '\u2278',
1323 'NotLessLess;': '\u226a\u0338',
1324 'NotLessSlantEqual;': '\u2a7d\u0338',
1325 'NotLessTilde;': '\u2274',
1326 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1327 'NotNestedLessLess;': '\u2aa1\u0338',
1329 'notniva;': '\u220c',
1330 'notnivb;': '\u22fe',
1331 'notnivc;': '\u22fd',
1332 'NotPrecedes;': '\u2280',
1333 'NotPrecedesEqual;': '\u2aaf\u0338',
1334 'NotPrecedesSlantEqual;': '\u22e0',
1335 'NotReverseElement;': '\u220c',
1336 'NotRightTriangle;': '\u22eb',
1337 'NotRightTriangleBar;': '\u29d0\u0338',
1338 'NotRightTriangleEqual;': '\u22ed',
1339 'NotSquareSubset;': '\u228f\u0338',
1340 'NotSquareSubsetEqual;': '\u22e2',
1341 'NotSquareSuperset;': '\u2290\u0338',
1342 'NotSquareSupersetEqual;': '\u22e3',
1343 'NotSubset;': '\u2282\u20d2',
1344 'NotSubsetEqual;': '\u2288',
1345 'NotSucceeds;': '\u2281',
1346 'NotSucceedsEqual;': '\u2ab0\u0338',
1347 'NotSucceedsSlantEqual;': '\u22e1',
1348 'NotSucceedsTilde;': '\u227f\u0338',
1349 'NotSuperset;': '\u2283\u20d2',
1350 'NotSupersetEqual;': '\u2289',
1351 'NotTilde;': '\u2241',
1352 'NotTildeEqual;': '\u2244',
1353 'NotTildeFullEqual;': '\u2247',
1354 'NotTildeTilde;': '\u2249',
1355 'NotVerticalBar;': '\u2224',
1357 'nparallel;': '\u2226',
1358 'nparsl;': '\u2afd\u20e5',
1359 'npart;': '\u2202\u0338',
1360 'npolint;': '\u2a14',
1362 'nprcue;': '\u22e0',
1363 'npre;': '\u2aaf\u0338',
1365 'npreceq;': '\u2aaf\u0338',
1368 'nrarrc;': '\u2933\u0338',
1369 'nrarrw;': '\u219d\u0338',
1370 'nRightarrow;': '\u21cf',
1371 'nrightarrow;': '\u219b',
1373 'nrtrie;': '\u22ed',
1375 'nsccue;': '\u22e1',
1376 'nsce;': '\u2ab0\u0338',
1377 'Nscr;': '\U0001d4a9',
1378 'nscr;': '\U0001d4c3',
1379 'nshortmid;': '\u2224',
1380 'nshortparallel;': '\u2226',
1383 'nsimeq;': '\u2244',
1386 'nsqsube;': '\u22e2',
1387 'nsqsupe;': '\u22e3',
1389 'nsubE;': '\u2ac5\u0338',
1391 'nsubset;': '\u2282\u20d2',
1392 'nsubseteq;': '\u2288',
1393 'nsubseteqq;': '\u2ac5\u0338',
1395 'nsucceq;': '\u2ab0\u0338',
1397 'nsupE;': '\u2ac6\u0338',
1399 'nsupset;': '\u2283\u20d2',
1400 'nsupseteq;': '\u2289',
1401 'nsupseteqq;': '\u2ac6\u0338',
1408 'ntriangleleft;': '\u22ea',
1409 'ntrianglelefteq;': '\u22ec',
1410 'ntriangleright;': '\u22eb',
1411 'ntrianglerighteq;': '\u22ed',
1415 'numero;': '\u2116',
1417 'nvap;': '\u224d\u20d2',
1418 'nVDash;': '\u22af',
1419 'nVdash;': '\u22ae',
1420 'nvDash;': '\u22ad',
1421 'nvdash;': '\u22ac',
1422 'nvge;': '\u2265\u20d2',
1424 'nvHarr;': '\u2904',
1425 'nvinfin;': '\u29de',
1426 'nvlArr;': '\u2902',
1427 'nvle;': '\u2264\u20d2',
1429 'nvltrie;': '\u22b4\u20d2',
1430 'nvrArr;': '\u2903',
1431 'nvrtrie;': '\u22b5\u20d2',
1432 'nvsim;': '\u223c\u20d2',
1433 'nwarhk;': '\u2923',
1436 'nwarrow;': '\u2196',
1437 'nwnear;': '\u2927',
1451 'Odblac;': '\u0150',
1452 'odblac;': '\u0151',
1455 'odsold;': '\u29bc',
1459 'Ofr;': '\U0001d512',
1460 'ofr;': '\U0001d52c',
1472 'olcross;': '\u29bb',
1479 'Omicron;': '\u039f',
1480 'omicron;': '\u03bf',
1482 'ominus;': '\u2296',
1483 'Oopf;': '\U0001d546',
1484 'oopf;': '\U0001d560',
1486 'OpenCurlyDoubleQuote;': '\u201c',
1487 'OpenCurlyQuote;': '\u2018',
1495 'orderof;': '\u2134',
1500 'origof;': '\u22b6',
1502 'orslope;': '\u2a57',
1505 'Oscr;': '\U0001d4aa',
1516 'Otimes;': '\u2a37',
1517 'otimes;': '\u2297',
1518 'otimesas;': '\u2a36',
1524 'OverBar;': '\u203e',
1525 'OverBrace;': '\u23de',
1526 'OverBracket;': '\u23b4',
1527 'OverParenthesis;': '\u23dc',
1531 'parallel;': '\u2225',
1532 'parsim;': '\u2af3',
1535 'PartialD;': '\u2202',
1540 'permil;': '\u2030',
1542 'pertenk;': '\u2031',
1543 'Pfr;': '\U0001d513',
1544 'pfr;': '\U0001d52d',
1548 'phmmat;': '\u2133',
1552 'pitchfork;': '\u22d4',
1554 'planck;': '\u210f',
1555 'planckh;': '\u210e',
1556 'plankv;': '\u210f',
1558 'plusacir;': '\u2a23',
1560 'pluscir;': '\u2a22',
1561 'plusdo;': '\u2214',
1562 'plusdu;': '\u2a25',
1564 'PlusMinus;': '\xb1',
1567 'plussim;': '\u2a26',
1568 'plustwo;': '\u2a27',
1570 'Poincareplane;': '\u210c',
1571 'pointint;': '\u2a15',
1573 'popf;': '\U0001d561',
1583 'precapprox;': '\u2ab7',
1584 'preccurlyeq;': '\u227c',
1585 'Precedes;': '\u227a',
1586 'PrecedesEqual;': '\u2aaf',
1587 'PrecedesSlantEqual;': '\u227c',
1588 'PrecedesTilde;': '\u227e',
1589 'preceq;': '\u2aaf',
1590 'precnapprox;': '\u2ab9',
1591 'precneqq;': '\u2ab5',
1592 'precnsim;': '\u22e8',
1593 'precsim;': '\u227e',
1596 'primes;': '\u2119',
1599 'prnsim;': '\u22e8',
1601 'Product;': '\u220f',
1602 'profalar;': '\u232e',
1603 'profline;': '\u2312',
1604 'profsurf;': '\u2313',
1606 'Proportion;': '\u2237',
1607 'Proportional;': '\u221d',
1608 'propto;': '\u221d',
1610 'prurel;': '\u22b0',
1611 'Pscr;': '\U0001d4ab',
1612 'pscr;': '\U0001d4c5',
1615 'puncsp;': '\u2008',
1616 'Qfr;': '\U0001d514',
1617 'qfr;': '\U0001d52e',
1620 'qopf;': '\U0001d562',
1621 'qprime;': '\u2057',
1622 'Qscr;': '\U0001d4ac',
1623 'qscr;': '\U0001d4c6',
1624 'quaternions;': '\u210d',
1625 'quatint;': '\u2a16',
1627 'questeq;': '\u225f',
1633 'race;': '\u223d\u0331',
1634 'Racute;': '\u0154',
1635 'racute;': '\u0155',
1637 'raemptyv;': '\u29b3',
1642 'rangle;': '\u27e9',
1648 'rarrap;': '\u2975',
1650 'rarrbfs;': '\u2920',
1652 'rarrfs;': '\u291e',
1653 'rarrhk;': '\u21aa',
1654 'rarrlp;': '\u21ac',
1655 'rarrpl;': '\u2945',
1656 'rarrsim;': '\u2974',
1657 'Rarrtl;': '\u2916',
1658 'rarrtl;': '\u21a3',
1660 'rAtail;': '\u291c',
1661 'ratail;': '\u291a',
1663 'rationals;': '\u211a',
1671 'rbrksld;': '\u298e',
1672 'rbrkslu;': '\u2990',
1673 'Rcaron;': '\u0158',
1674 'rcaron;': '\u0159',
1675 'Rcedil;': '\u0156',
1676 'rcedil;': '\u0157',
1682 'rdldhar;': '\u2969',
1684 'rdquor;': '\u201d',
1688 'realine;': '\u211b',
1689 'realpart;': '\u211c',
1696 'ReverseElement;': '\u220b',
1697 'ReverseEquilibrium;': '\u21cb',
1698 'ReverseUpEquilibrium;': '\u296f',
1699 'rfisht;': '\u297d',
1700 'rfloor;': '\u230b',
1702 'rfr;': '\U0001d52f',
1706 'rharul;': '\u296c',
1710 'RightAngleBracket;': '\u27e9',
1711 'RightArrow;': '\u2192',
1712 'Rightarrow;': '\u21d2',
1713 'rightarrow;': '\u2192',
1714 'RightArrowBar;': '\u21e5',
1715 'RightArrowLeftArrow;': '\u21c4',
1716 'rightarrowtail;': '\u21a3',
1717 'RightCeiling;': '\u2309',
1718 'RightDoubleBracket;': '\u27e7',
1719 'RightDownTeeVector;': '\u295d',
1720 'RightDownVector;': '\u21c2',
1721 'RightDownVectorBar;': '\u2955',
1722 'RightFloor;': '\u230b',
1723 'rightharpoondown;': '\u21c1',
1724 'rightharpoonup;': '\u21c0',
1725 'rightleftarrows;': '\u21c4',
1726 'rightleftharpoons;': '\u21cc',
1727 'rightrightarrows;': '\u21c9',
1728 'rightsquigarrow;': '\u219d',
1729 'RightTee;': '\u22a2',
1730 'RightTeeArrow;': '\u21a6',
1731 'RightTeeVector;': '\u295b',
1732 'rightthreetimes;': '\u22cc',
1733 'RightTriangle;': '\u22b3',
1734 'RightTriangleBar;': '\u29d0',
1735 'RightTriangleEqual;': '\u22b5',
1736 'RightUpDownVector;': '\u294f',
1737 'RightUpTeeVector;': '\u295c',
1738 'RightUpVector;': '\u21be',
1739 'RightUpVectorBar;': '\u2954',
1740 'RightVector;': '\u21c0',
1741 'RightVectorBar;': '\u2953',
1743 'risingdotseq;': '\u2253',
1747 'rmoust;': '\u23b1',
1748 'rmoustache;': '\u23b1',
1755 'ropf;': '\U0001d563',
1756 'roplus;': '\u2a2e',
1757 'rotimes;': '\u2a35',
1758 'RoundImplies;': '\u2970',
1760 'rpargt;': '\u2994',
1761 'rppolint;': '\u2a12',
1763 'Rrightarrow;': '\u21db',
1764 'rsaquo;': '\u203a',
1766 'rscr;': '\U0001d4c7',
1771 'rsquor;': '\u2019',
1772 'rthree;': '\u22cc',
1773 'rtimes;': '\u22ca',
1777 'rtriltri;': '\u29ce',
1778 'RuleDelayed;': '\u29f4',
1779 'ruluhar;': '\u2968',
1781 'Sacute;': '\u015a',
1782 'sacute;': '\u015b',
1787 'Scaron;': '\u0160',
1788 'scaron;': '\u0161',
1792 'Scedil;': '\u015e',
1793 'scedil;': '\u015f',
1798 'scnsim;': '\u22e9',
1799 'scpolint;': '\u2a13',
1806 'searhk;': '\u2925',
1809 'searrow;': '\u2198',
1813 'seswar;': '\u2929',
1814 'setminus;': '\u2216',
1817 'Sfr;': '\U0001d516',
1818 'sfr;': '\U0001d530',
1819 'sfrown;': '\u2322',
1821 'SHCHcy;': '\u0429',
1822 'shchcy;': '\u0449',
1825 'ShortDownArrow;': '\u2193',
1826 'ShortLeftArrow;': '\u2190',
1827 'shortmid;': '\u2223',
1828 'shortparallel;': '\u2225',
1829 'ShortRightArrow;': '\u2192',
1830 'ShortUpArrow;': '\u2191',
1835 'sigmaf;': '\u03c2',
1836 'sigmav;': '\u03c2',
1838 'simdot;': '\u2a6a',
1846 'simplus;': '\u2a24',
1847 'simrarr;': '\u2972',
1849 'SmallCircle;': '\u2218',
1850 'smallsetminus;': '\u2216',
1851 'smashp;': '\u2a33',
1852 'smeparsl;': '\u29e4',
1857 'smtes;': '\u2aac\ufe00',
1858 'SOFTcy;': '\u042c',
1859 'softcy;': '\u044c',
1862 'solbar;': '\u233f',
1863 'Sopf;': '\U0001d54a',
1864 'sopf;': '\U0001d564',
1865 'spades;': '\u2660',
1866 'spadesuit;': '\u2660',
1869 'sqcaps;': '\u2293\ufe00',
1871 'sqcups;': '\u2294\ufe00',
1874 'sqsube;': '\u2291',
1875 'sqsubset;': '\u228f',
1876 'sqsubseteq;': '\u2291',
1878 'sqsupe;': '\u2292',
1879 'sqsupset;': '\u2290',
1880 'sqsupseteq;': '\u2292',
1882 'Square;': '\u25a1',
1883 'square;': '\u25a1',
1884 'SquareIntersection;': '\u2293',
1885 'SquareSubset;': '\u228f',
1886 'SquareSubsetEqual;': '\u2291',
1887 'SquareSuperset;': '\u2290',
1888 'SquareSupersetEqual;': '\u2292',
1889 'SquareUnion;': '\u2294',
1890 'squarf;': '\u25aa',
1893 'Sscr;': '\U0001d4ae',
1894 'sscr;': '\U0001d4c8',
1895 'ssetmn;': '\u2216',
1896 'ssmile;': '\u2323',
1897 'sstarf;': '\u22c6',
1901 'straightepsilon;': '\u03f5',
1902 'straightphi;': '\u03d5',
1906 'subdot;': '\u2abd',
1909 'subedot;': '\u2ac3',
1910 'submult;': '\u2ac1',
1913 'subplus;': '\u2abf',
1914 'subrarr;': '\u2979',
1915 'Subset;': '\u22d0',
1916 'subset;': '\u2282',
1917 'subseteq;': '\u2286',
1918 'subseteqq;': '\u2ac5',
1919 'SubsetEqual;': '\u2286',
1920 'subsetneq;': '\u228a',
1921 'subsetneqq;': '\u2acb',
1922 'subsim;': '\u2ac7',
1923 'subsub;': '\u2ad5',
1924 'subsup;': '\u2ad3',
1926 'succapprox;': '\u2ab8',
1927 'succcurlyeq;': '\u227d',
1928 'Succeeds;': '\u227b',
1929 'SucceedsEqual;': '\u2ab0',
1930 'SucceedsSlantEqual;': '\u227d',
1931 'SucceedsTilde;': '\u227f',
1932 'succeq;': '\u2ab0',
1933 'succnapprox;': '\u2aba',
1934 'succneqq;': '\u2ab6',
1935 'succnsim;': '\u22e9',
1936 'succsim;': '\u227f',
1937 'SuchThat;': '\u220b',
1949 'supdot;': '\u2abe',
1950 'supdsub;': '\u2ad8',
1953 'supedot;': '\u2ac4',
1954 'Superset;': '\u2283',
1955 'SupersetEqual;': '\u2287',
1956 'suphsol;': '\u27c9',
1957 'suphsub;': '\u2ad7',
1958 'suplarr;': '\u297b',
1959 'supmult;': '\u2ac2',
1962 'supplus;': '\u2ac0',
1963 'Supset;': '\u22d1',
1964 'supset;': '\u2283',
1965 'supseteq;': '\u2287',
1966 'supseteqq;': '\u2ac6',
1967 'supsetneq;': '\u228b',
1968 'supsetneqq;': '\u2acc',
1969 'supsim;': '\u2ac8',
1970 'supsub;': '\u2ad4',
1971 'supsup;': '\u2ad6',
1972 'swarhk;': '\u2926',
1975 'swarrow;': '\u2199',
1976 'swnwar;': '\u292a',
1980 'target;': '\u2316',
1984 'Tcaron;': '\u0164',
1985 'tcaron;': '\u0165',
1986 'Tcedil;': '\u0162',
1987 'tcedil;': '\u0163',
1991 'telrec;': '\u2315',
1992 'Tfr;': '\U0001d517',
1993 'tfr;': '\U0001d531',
1994 'there4;': '\u2234',
1995 'Therefore;': '\u2234',
1996 'therefore;': '\u2234',
1999 'thetasym;': '\u03d1',
2000 'thetav;': '\u03d1',
2001 'thickapprox;': '\u2248',
2002 'thicksim;': '\u223c',
2003 'ThickSpace;': '\u205f\u200a',
2004 'thinsp;': '\u2009',
2005 'ThinSpace;': '\u2009',
2007 'thksim;': '\u223c',
2014 'TildeEqual;': '\u2243',
2015 'TildeFullEqual;': '\u2245',
2016 'TildeTilde;': '\u2248',
2019 'timesb;': '\u22a0',
2020 'timesbar;': '\u2a31',
2021 'timesd;': '\u2a30',
2025 'topbot;': '\u2336',
2026 'topcir;': '\u2af1',
2027 'Topf;': '\U0001d54b',
2028 'topf;': '\U0001d565',
2029 'topfork;': '\u2ada',
2031 'tprime;': '\u2034',
2034 'triangle;': '\u25b5',
2035 'triangledown;': '\u25bf',
2036 'triangleleft;': '\u25c3',
2037 'trianglelefteq;': '\u22b4',
2038 'triangleq;': '\u225c',
2039 'triangleright;': '\u25b9',
2040 'trianglerighteq;': '\u22b5',
2041 'tridot;': '\u25ec',
2043 'triminus;': '\u2a3a',
2044 'TripleDot;': '\u20db',
2045 'triplus;': '\u2a39',
2047 'tritime;': '\u2a3b',
2048 'trpezium;': '\u23e2',
2049 'Tscr;': '\U0001d4af',
2050 'tscr;': '\U0001d4c9',
2055 'Tstrok;': '\u0166',
2056 'tstrok;': '\u0167',
2058 'twoheadleftarrow;': '\u219e',
2059 'twoheadrightarrow;': '\u21a0',
2067 'Uarrocir;': '\u2949',
2070 'Ubreve;': '\u016c',
2071 'ubreve;': '\u016d',
2079 'Udblac;': '\u0170',
2080 'udblac;': '\u0171',
2082 'ufisht;': '\u297e',
2083 'Ufr;': '\U0001d518',
2084 'ufr;': '\U0001d532',
2093 'ulcorn;': '\u231c',
2094 'ulcorner;': '\u231c',
2095 'ulcrop;': '\u230f',
2102 'UnderBrace;': '\u23df',
2103 'UnderBracket;': '\u23b5',
2104 'UnderParenthesis;': '\u23dd',
2106 'UnionPlus;': '\u228e',
2109 'Uopf;': '\U0001d54c',
2110 'uopf;': '\U0001d566',
2111 'UpArrow;': '\u2191',
2112 'Uparrow;': '\u21d1',
2113 'uparrow;': '\u2191',
2114 'UpArrowBar;': '\u2912',
2115 'UpArrowDownArrow;': '\u21c5',
2116 'UpDownArrow;': '\u2195',
2117 'Updownarrow;': '\u21d5',
2118 'updownarrow;': '\u2195',
2119 'UpEquilibrium;': '\u296e',
2120 'upharpoonleft;': '\u21bf',
2121 'upharpoonright;': '\u21be',
2123 'UpperLeftArrow;': '\u2196',
2124 'UpperRightArrow;': '\u2197',
2128 'Upsilon;': '\u03a5',
2129 'upsilon;': '\u03c5',
2131 'UpTeeArrow;': '\u21a5',
2132 'upuparrows;': '\u21c8',
2133 'urcorn;': '\u231d',
2134 'urcorner;': '\u231d',
2135 'urcrop;': '\u230e',
2139 'Uscr;': '\U0001d4b0',
2140 'uscr;': '\U0001d4ca',
2142 'Utilde;': '\u0168',
2143 'utilde;': '\u0169',
2151 'uwangle;': '\u29a7',
2152 'vangrt;': '\u299c',
2153 'varepsilon;': '\u03f5',
2154 'varkappa;': '\u03f0',
2155 'varnothing;': '\u2205',
2156 'varphi;': '\u03d5',
2158 'varpropto;': '\u221d',
2161 'varrho;': '\u03f1',
2162 'varsigma;': '\u03c2',
2163 'varsubsetneq;': '\u228a\ufe00',
2164 'varsubsetneqq;': '\u2acb\ufe00',
2165 'varsupsetneq;': '\u228b\ufe00',
2166 'varsupsetneqq;': '\u2acc\ufe00',
2167 'vartheta;': '\u03d1',
2168 'vartriangleleft;': '\u22b2',
2169 'vartriangleright;': '\u22b3',
2179 'Vdashl;': '\u2ae6',
2182 'veebar;': '\u22bb',
2184 'vellip;': '\u22ee',
2185 'Verbar;': '\u2016',
2189 'VerticalBar;': '\u2223',
2190 'VerticalLine;': '|',
2191 'VerticalSeparator;': '\u2758',
2192 'VerticalTilde;': '\u2240',
2193 'VeryThinSpace;': '\u200a',
2194 'Vfr;': '\U0001d519',
2195 'vfr;': '\U0001d533',
2197 'vnsub;': '\u2282\u20d2',
2198 'vnsup;': '\u2283\u20d2',
2199 'Vopf;': '\U0001d54d',
2200 'vopf;': '\U0001d567',
2203 'Vscr;': '\U0001d4b1',
2204 'vscr;': '\U0001d4cb',
2205 'vsubnE;': '\u2acb\ufe00',
2206 'vsubne;': '\u228a\ufe00',
2207 'vsupnE;': '\u2acc\ufe00',
2208 'vsupne;': '\u228b\ufe00',
2209 'Vvdash;': '\u22aa',
2210 'vzigzag;': '\u299a',
2213 'wedbar;': '\u2a5f',
2216 'wedgeq;': '\u2259',
2217 'weierp;': '\u2118',
2218 'Wfr;': '\U0001d51a',
2219 'wfr;': '\U0001d534',
2220 'Wopf;': '\U0001d54e',
2221 'wopf;': '\U0001d568',
2224 'wreath;': '\u2240',
2225 'Wscr;': '\U0001d4b2',
2226 'wscr;': '\U0001d4cc',
2231 'Xfr;': '\U0001d51b',
2232 'xfr;': '\U0001d535',
2242 'Xopf;': '\U0001d54f',
2243 'xopf;': '\U0001d569',
2244 'xoplus;': '\u2a01',
2245 'xotime;': '\u2a02',
2248 'Xscr;': '\U0001d4b3',
2249 'xscr;': '\U0001d4cd',
2250 'xsqcup;': '\u2a06',
2251 'xuplus;': '\u2a04',
2254 'xwedge;': '\u22c0',
2267 'Yfr;': '\U0001d51c',
2268 'yfr;': '\U0001d536',
2271 'Yopf;': '\U0001d550',
2272 'yopf;': '\U0001d56a',
2273 'Yscr;': '\U0001d4b4',
2274 'yscr;': '\U0001d4ce',
2280 'Zacute;': '\u0179',
2281 'zacute;': '\u017a',
2282 'Zcaron;': '\u017d',
2283 'zcaron;': '\u017e',
2288 'zeetrf;': '\u2128',
2289 'ZeroWidthSpace;': '\u200b',
2293 'zfr;': '\U0001d537',
2296 'zigrarr;': '\u21dd',
2298 'zopf;': '\U0001d56b',
2299 'Zscr;': '\U0001d4b5',
2300 'zscr;': '\U0001d4cf',
2306 import http
.client
as compat_http_client
2307 except ImportError: # Python 2
2308 import httplib
as compat_http_client
2311 from urllib
.error
import HTTPError
as compat_HTTPError
2312 except ImportError: # Python 2
2313 from urllib2
import HTTPError
as compat_HTTPError
2316 from urllib
.request
import urlretrieve
as compat_urlretrieve
2317 except ImportError: # Python 2
2318 from urllib
import urlretrieve
as compat_urlretrieve
2321 from html
.parser
import HTMLParser
as compat_HTMLParser
2322 except ImportError: # Python 2
2323 from HTMLParser
import HTMLParser
as compat_HTMLParser
2326 from HTMLParser
import HTMLParseError
as compat_HTMLParseError
2327 except ImportError: # Python <3.4
2329 from html
.parser
import HTMLParseError
as compat_HTMLParseError
2330 except ImportError: # Python >3.4
2332 # HTMLParseError has been deprecated in Python 3.3 and removed in
2333 # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
2334 # and uniform cross-version exceptiong handling
2335 class compat_HTMLParseError(Exception):
2339 from subprocess
import DEVNULL
2340 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
2342 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
2345 import http
.server
as compat_http_server
2347 import BaseHTTPServer
as compat_http_server
2350 compat_str
= unicode # Python 2
2355 from urllib
.parse
import unquote_to_bytes
as compat_urllib_parse_unquote_to_bytes
2356 from urllib
.parse
import unquote
as compat_urllib_parse_unquote
2357 from urllib
.parse
import unquote_plus
as compat_urllib_parse_unquote_plus
2358 except ImportError: # Python 2
2359 _asciire
= (compat_urllib_parse
._asciire
if hasattr(compat_urllib_parse
, '_asciire')
2360 else re
.compile(r
'([\x00-\x7f]+)'))
2362 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2363 # implementations from cpython 3.4.3's stdlib. Python 2's version
2364 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
2366 def compat_urllib_parse_unquote_to_bytes(string
):
2367 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2368 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2369 # unescaped non-ASCII characters, which URIs should not.
2371 # Is it a string-like object?
2374 if isinstance(string
, compat_str
):
2375 string
= string
.encode('utf-8')
2376 bits
= string
.split(b
'%')
2381 for item
in bits
[1:]:
2383 append(compat_urllib_parse
._hextochr
[item
[:2]])
2388 return b
''.join(res
)
2390 def compat_urllib_parse_unquote(string
, encoding
='utf-8', errors
='replace'):
2391 """Replace %xx escapes by their single-character equivalent. The optional
2392 encoding and errors parameters specify how to decode percent-encoded
2393 sequences into Unicode characters, as accepted by the bytes.decode()
2395 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2396 sequences are replaced by a placeholder character.
2398 unquote('abc%20def') -> 'abc def'.
2400 if '%' not in string
:
2403 if encoding
is None:
2407 bits
= _asciire
.split(string
)
2410 for i
in range(1, len(bits
), 2):
2411 append(compat_urllib_parse_unquote_to_bytes(bits
[i
]).decode(encoding
, errors
))
2415 def compat_urllib_parse_unquote_plus(string
, encoding
='utf-8', errors
='replace'):
2416 """Like unquote(), but also replace plus signs by spaces, as required for
2417 unquoting HTML form values.
2419 unquote_plus('%7e/abc+def') -> '~/abc def'
2421 string
= string
.replace('+', ' ')
2422 return compat_urllib_parse_unquote(string
, encoding
, errors
)
2425 from urllib
.parse
import urlencode
as compat_urllib_parse_urlencode
2426 except ImportError: # Python 2
2427 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2428 # Possible solutions are to either port it from python 3 with all
2429 # the friends or manually ensure input query contains only byte strings.
2430 # We will stick with latter thus recursively encoding the whole query.
2431 def compat_urllib_parse_urlencode(query
, doseq
=0, encoding
='utf-8'):
2433 if isinstance(e
, dict):
2435 elif isinstance(e
, (list, tuple,)):
2436 list_e
= encode_list(e
)
2437 e
= tuple(list_e
) if isinstance(e
, tuple) else list_e
2438 elif isinstance(e
, compat_str
):
2439 e
= e
.encode(encoding
)
2443 return dict((encode_elem(k
), encode_elem(v
)) for k
, v
in d
.items())
2446 return [encode_elem(e
) for e
in l
]
2448 return compat_urllib_parse
.urlencode(encode_elem(query
), doseq
=doseq
)
2451 from urllib
.request
import DataHandler
as compat_urllib_request_DataHandler
2452 except ImportError: # Python < 3.4
2453 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2454 class compat_urllib_request_DataHandler(compat_urllib_request
.BaseHandler
):
2455 def data_open(self
, req
):
2456 # data URLs as specified in RFC 2397.
2458 # ignores POSTed data
2461 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2462 # mediatype := [ type "/" subtype ] *( ";" parameter )
2464 # parameter := attribute "=" value
2465 url
= req
.get_full_url()
2467 scheme
, data
= url
.split(':', 1)
2468 mediatype
, data
= data
.split(',', 1)
2470 # even base64 encoded data URLs might be quoted so unquote in any case:
2471 data
= compat_urllib_parse_unquote_to_bytes(data
)
2472 if mediatype
.endswith(';base64'):
2473 data
= binascii
.a2b_base64(data
)
2474 mediatype
= mediatype
[:-7]
2477 mediatype
= 'text/plain;charset=US-ASCII'
2479 headers
= email
.message_from_string(
2480 'Content-type: %s\nContent-length: %d\n' % (mediatype
, len(data
)))
2482 return compat_urllib_response
.addinfourl(io
.BytesIO(data
), headers
, url
)
2485 compat_basestring
= basestring
# Python 2
2487 compat_basestring
= str
2490 compat_chr
= unichr # Python 2
2495 from xml
.etree
.ElementTree
import ParseError
as compat_xml_parse_error
2496 except ImportError: # Python 2.6
2497 from xml
.parsers
.expat
import ExpatError
as compat_xml_parse_error
2500 etree
= xml
.etree
.ElementTree
2503 class _TreeBuilder(etree
.TreeBuilder
):
2504 def doctype(self
, name
, pubid
, system
):
2508 if sys
.version_info
[0] >= 3:
2509 def compat_etree_fromstring(text
):
2510 return etree
.XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder()))
2512 # python 2.x tries to encode unicode strings with ascii (see the
2513 # XMLParser._fixtext method)
2515 _etree_iter
= etree
.Element
.iter
2516 except AttributeError: # Python <=2.6
2517 def _etree_iter(root
):
2518 for el
in root
.findall('*'):
2520 for sub
in _etree_iter(el
):
2523 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2525 def _XML(text
, parser
=None):
2527 parser
= etree
.XMLParser(target
=_TreeBuilder())
2529 return parser
.close()
2531 def _element_factory(*args
, **kwargs
):
2532 el
= etree
.Element(*args
, **kwargs
)
2533 for k
, v
in el
.items():
2534 if isinstance(v
, bytes):
2535 el
.set(k
, v
.decode('utf-8'))
2538 def compat_etree_fromstring(text
):
2539 doc
= _XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder(element_factory
=_element_factory
)))
2540 for el
in _etree_iter(doc
):
2541 if el
.text
is not None and isinstance(el
.text
, bytes):
2542 el
.text
= el
.text
.decode('utf-8')
2545 if hasattr(etree
, 'register_namespace'):
2546 compat_etree_register_namespace
= etree
.register_namespace
2548 def compat_etree_register_namespace(prefix
, uri
):
2549 """Register a namespace prefix.
2550 The registry is global, and any existing mapping for either the
2551 given prefix or the namespace URI will be removed.
2552 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2553 attributes in this namespace will be serialized with prefix if possible.
2554 ValueError is raised if prefix is reserved or is invalid.
2556 if re
.match(r
"ns\d+$", prefix
):
2557 raise ValueError("Prefix format reserved for internal use")
2558 for k
, v
in list(etree
._namespace
_map
.items()):
2559 if k
== uri
or v
== prefix
:
2560 del etree
._namespace
_map
[k
]
2561 etree
._namespace
_map
[uri
] = prefix
2563 if sys
.version_info
< (2, 7):
2564 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2565 # .//node does not match if a node is a direct child of . !
2566 def compat_xpath(xpath
):
2567 if isinstance(xpath
, compat_str
):
2568 xpath
= xpath
.encode('ascii')
2571 compat_xpath
= lambda xpath
: xpath
2574 from urllib
.parse
import parse_qs
as compat_parse_qs
2575 except ImportError: # Python 2
2576 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2577 # Python 2's version is apparently totally broken
2579 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
2580 encoding
='utf-8', errors
='replace'):
2581 qs
, _coerce_result
= qs
, compat_str
2582 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
2584 for name_value
in pairs
:
2585 if not name_value
and not strict_parsing
:
2587 nv
= name_value
.split('=', 1)
2590 raise ValueError('bad query field: %r' % (name_value
,))
2591 # Handle case of a control-name with no equal sign
2592 if keep_blank_values
:
2596 if len(nv
[1]) or keep_blank_values
:
2597 name
= nv
[0].replace('+', ' ')
2598 name
= compat_urllib_parse_unquote(
2599 name
, encoding
=encoding
, errors
=errors
)
2600 name
= _coerce_result(name
)
2601 value
= nv
[1].replace('+', ' ')
2602 value
= compat_urllib_parse_unquote(
2603 value
, encoding
=encoding
, errors
=errors
)
2604 value
= _coerce_result(value
)
2605 r
.append((name
, value
))
2608 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
2609 encoding
='utf-8', errors
='replace'):
2611 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
2612 encoding
=encoding
, errors
=errors
)
2613 for name
, value
in pairs
:
2614 if name
in parsed_result
:
2615 parsed_result
[name
].append(value
)
2617 parsed_result
[name
] = [value
]
2618 return parsed_result
2621 compat_os_name
= os
._name
if os
.name
== 'java' else os
.name
2624 if compat_os_name
== 'nt':
2625 def compat_shlex_quote(s
):
2626 return s
if re
.match(r
'^[-_\w./]+$', s
) else '"%s"' % s
.replace('"', '\\"')
2629 from shlex
import quote
as compat_shlex_quote
2630 except ImportError: # Python < 3.3
2631 def compat_shlex_quote(s
):
2632 if re
.match(r
'^[-_\w./]+$', s
):
2635 return "'" + s
.replace("'", "'\"'\"'") + "'"
2639 args
= shlex
.split('äøę')
2640 assert (isinstance(args
, list) and
2641 isinstance(args
[0], compat_str
) and
2642 args
[0] == 'äøę')
2643 compat_shlex_split
= shlex
.split
2644 except (AssertionError, UnicodeEncodeError):
2645 # Working around shlex issue with unicode strings on some python 2
2646 # versions (see http://bugs.python.org/issue1548891)
2647 def compat_shlex_split(s
, comments
=False, posix
=True):
2648 if isinstance(s
, compat_str
):
2649 s
= s
.encode('utf-8')
2650 return list(map(lambda s
: s
.decode('utf-8'), shlex
.split(s
, comments
, posix
)))
2660 if sys
.version_info
>= (3, 0):
2661 compat_getenv
= os
.getenv
2662 compat_expanduser
= os
.path
.expanduser
2664 def compat_setenv(key
, value
, env
=os
.environ
):
2667 # Environment variables should be decoded with filesystem encoding.
2668 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2670 def compat_getenv(key
, default
=None):
2671 from .utils
import get_filesystem_encoding
2672 env
= os
.getenv(key
, default
)
2674 env
= env
.decode(get_filesystem_encoding())
2677 def compat_setenv(key
, value
, env
=os
.environ
):
2679 from .utils
import get_filesystem_encoding
2680 return v
.encode(get_filesystem_encoding()) if isinstance(v
, compat_str
) else v
2681 env
[encode(key
)] = encode(value
)
2683 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2684 # environment variables with filesystem encoding. We will work around this by
2685 # providing adjusted implementations.
2686 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2687 # for different platforms with correct environment variables decoding.
2689 if compat_os_name
== 'posix':
2690 def compat_expanduser(path
):
2691 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2693 if not path
.startswith('~'):
2695 i
= path
.find('/', 1)
2699 if 'HOME' not in os
.environ
:
2701 userhome
= pwd
.getpwuid(os
.getuid()).pw_dir
2703 userhome
= compat_getenv('HOME')
2707 pwent
= pwd
.getpwnam(path
[1:i
])
2710 userhome
= pwent
.pw_dir
2711 userhome
= userhome
.rstrip('/')
2712 return (userhome
+ path
[i
:]) or '/'
2713 elif compat_os_name
in ('nt', 'ce'):
2714 def compat_expanduser(path
):
2715 """Expand ~ and ~user constructs.
2717 If user or $HOME is unknown, do nothing."""
2721 while i
< n
and path
[i
] not in '/\\':
2724 if 'HOME' in os
.environ
:
2725 userhome
= compat_getenv('HOME')
2726 elif 'USERPROFILE' in os
.environ
:
2727 userhome
= compat_getenv('USERPROFILE')
2728 elif 'HOMEPATH' not in os
.environ
:
2732 drive
= compat_getenv('HOMEDRIVE')
2735 userhome
= os
.path
.join(drive
, compat_getenv('HOMEPATH'))
2738 userhome
= os
.path
.join(os
.path
.dirname(userhome
), path
[1:i
])
2740 return userhome
+ path
[i
:]
2742 compat_expanduser
= os
.path
.expanduser
2745 if sys
.version_info
< (3, 0):
2746 def compat_print(s
):
2747 from .utils
import preferredencoding
2748 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
2750 def compat_print(s
):
2751 assert isinstance(s
, compat_str
)
2755 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
2756 def compat_getpass(prompt
, *args
, **kwargs
):
2757 if isinstance(prompt
, compat_str
):
2758 from .utils
import preferredencoding
2759 prompt
= prompt
.encode(preferredencoding())
2760 return getpass
.getpass(prompt
, *args
, **kwargs
)
2762 compat_getpass
= getpass
.getpass
2765 compat_input
= raw_input
2766 except NameError: # Python 3
2767 compat_input
= input
2769 # Python < 2.6.5 require kwargs to be bytes
2773 _testfunc(**{'x': 0})
2775 def compat_kwargs(kwargs
):
2776 return dict((bytes(k
), v
) for k
, v
in kwargs
.items())
2778 compat_kwargs
= lambda kwargs
: kwargs
2782 compat_numeric_types
= (int, float, long, complex)
2783 except NameError: # Python 3
2784 compat_numeric_types
= (int, float, complex)
2787 if sys
.version_info
< (2, 7):
2788 def compat_socket_create_connection(address
, timeout
, source_address
=None):
2789 host
, port
= address
2791 for res
in socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
):
2792 af
, socktype
, proto
, canonname
, sa
= res
2795 sock
= socket
.socket(af
, socktype
, proto
)
2796 sock
.settimeout(timeout
)
2798 sock
.bind(source_address
)
2801 except socket
.error
as _
:
2803 if sock
is not None:
2808 raise socket
.error('getaddrinfo returns an empty list')
2810 compat_socket_create_connection
= socket
.create_connection
2813 # Fix https://github.com/rg3/youtube-dl/issues/4223
2814 # See http://bugs.python.org/issue9161 for what is broken
2815 def workaround_optparse_bug9161():
2816 op
= optparse
.OptionParser()
2817 og
= optparse
.OptionGroup(op
, 'foo')
2821 real_add_option
= optparse
.OptionGroup
.add_option
2823 def _compat_add_option(self
, *args
, **kwargs
):
2825 v
.encode('ascii', 'replace') if isinstance(v
, compat_str
)
2827 bargs
= [enc(a
) for a
in args
]
2829 (k
, enc(v
)) for k
, v
in kwargs
.items())
2830 return real_add_option(self
, *bargs
, **bkwargs
)
2831 optparse
.OptionGroup
.add_option
= _compat_add_option
2834 if hasattr(shutil
, 'get_terminal_size'): # Python >= 3.3
2835 compat_get_terminal_size
= shutil
.get_terminal_size
2837 _terminal_size
= collections
.namedtuple('terminal_size', ['columns', 'lines'])
2839 def compat_get_terminal_size(fallback
=(80, 24)):
2840 columns
= compat_getenv('COLUMNS')
2842 columns
= int(columns
)
2845 lines
= compat_getenv('LINES')
2851 if columns
is None or lines
is None or columns
<= 0 or lines
<= 0:
2853 sp
= subprocess
.Popen(
2855 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2856 out
, err
= sp
.communicate()
2857 _lines
, _columns
= map(int, out
.split())
2859 _columns
, _lines
= _terminal_size(*fallback
)
2861 if columns
is None or columns
<= 0:
2863 if lines
is None or lines
<= 0:
2865 return _terminal_size(columns
, lines
)
2868 itertools
.count(start
=0, step
=1)
2869 compat_itertools_count
= itertools
.count
2870 except TypeError: # Python 2.6
2871 def compat_itertools_count(start
=0, step
=1):
2877 if sys
.version_info
>= (3, 0):
2878 from tokenize
import tokenize
as compat_tokenize_tokenize
2880 from tokenize
import generate_tokens
as compat_tokenize_tokenize
2884 struct
.pack('!I', 0)
2886 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2887 # See https://bugs.python.org/issue19099
2888 def compat_struct_pack(spec
, *args
):
2889 if isinstance(spec
, compat_str
):
2890 spec
= spec
.encode('ascii')
2891 return struct
.pack(spec
, *args
)
2893 def compat_struct_unpack(spec
, *args
):
2894 if isinstance(spec
, compat_str
):
2895 spec
= spec
.encode('ascii')
2896 return struct
.unpack(spec
, *args
)
2898 compat_struct_pack
= struct
.pack
2899 compat_struct_unpack
= struct
.unpack
2902 from future_builtins
import zip as compat_zip
2903 except ImportError: # not 2.6+ or is 3.x
2905 from itertools
import izip
as compat_zip
# < 2.5 or 3.x
2910 'compat_HTMLParseError',
2911 'compat_HTMLParser',
2913 'compat_basestring',
2917 'compat_etree_fromstring',
2918 'compat_etree_register_namespace',
2919 'compat_expanduser',
2920 'compat_get_terminal_size',
2923 'compat_html_entities',
2924 'compat_html_entities_html5',
2925 'compat_http_client',
2926 'compat_http_server',
2928 'compat_itertools_count',
2930 'compat_numeric_types',
2936 'compat_shlex_quote',
2937 'compat_shlex_split',
2938 'compat_socket_create_connection',
2940 'compat_struct_pack',
2941 'compat_struct_unpack',
2942 'compat_subprocess_get_DEVNULL',
2943 'compat_tokenize_tokenize',
2944 'compat_urllib_error',
2945 'compat_urllib_parse',
2946 'compat_urllib_parse_unquote',
2947 'compat_urllib_parse_unquote_plus',
2948 'compat_urllib_parse_unquote_to_bytes',
2949 'compat_urllib_parse_urlencode',
2950 'compat_urllib_parse_urlparse',
2951 'compat_urllib_request',
2952 'compat_urllib_request_DataHandler',
2953 'compat_urllib_response',
2955 'compat_urlretrieve',
2956 'compat_xml_parse_error',
2959 'workaround_optparse_bug9161',