2 from __future__
import unicode_literals
22 import xml
.etree
.ElementTree
26 import urllib
.request
as compat_urllib_request
27 except ImportError: # Python 2
28 import urllib2
as compat_urllib_request
31 import urllib
.error
as compat_urllib_error
32 except ImportError: # Python 2
33 import urllib2
as compat_urllib_error
36 import urllib
.parse
as compat_urllib_parse
37 except ImportError: # Python 2
38 import urllib
as compat_urllib_parse
41 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
42 except ImportError: # Python 2
43 from urlparse
import urlparse
as compat_urllib_parse_urlparse
46 import urllib
.parse
as compat_urlparse
47 except ImportError: # Python 2
48 import urlparse
as compat_urlparse
51 import urllib
.response
as compat_urllib_response
52 except ImportError: # Python 2
53 import urllib
as compat_urllib_response
56 import http
.cookiejar
as compat_cookiejar
57 except ImportError: # Python 2
58 import cookielib
as compat_cookiejar
61 import http
.cookies
as compat_cookies
62 except ImportError: # Python 2
63 import Cookie
as compat_cookies
66 import html
.entities
as compat_html_entities
67 except ImportError: # Python 2
68 import htmlentitydefs
as compat_html_entities
71 compat_html_entities_html5
= compat_html_entities
.html5
72 except AttributeError:
73 # Copied from CPython 3.5.1 html/entities.py
74 compat_html_entities_html5
= {
83 'acE;': '\u223e\u0333',
103 'alefsym;': '\u2135',
118 'andslope;': '\u2a58',
124 'angmsdaa;': '\u29a8',
125 'angmsdab;': '\u29a9',
126 'angmsdac;': '\u29aa',
127 'angmsdad;': '\u29ab',
128 'angmsdae;': '\u29ac',
129 'angmsdaf;': '\u29ad',
130 'angmsdag;': '\u29ae',
131 'angmsdah;': '\u29af',
133 'angrtvb;': '\u22be',
134 'angrtvbd;': '\u299d',
137 'angzarr;': '\u237c',
140 'Aopf;': '\U0001d538',
141 'aopf;': '\U0001d552',
148 'ApplyFunction;': '\u2061',
150 'approxeq;': '\u224a',
155 'Ascr;': '\U0001d49c',
156 'ascr;': '\U0001d4b6',
160 'asympeq;': '\u224d',
169 'awconint;': '\u2233',
171 'backcong;': '\u224c',
172 'backepsilon;': '\u03f6',
173 'backprime;': '\u2035',
174 'backsim;': '\u223d',
175 'backsimeq;': '\u22cd',
176 'Backslash;': '\u2216',
181 'barwedge;': '\u2305',
183 'bbrktbrk;': '\u23b6',
189 'Because;': '\u2235',
190 'because;': '\u2235',
191 'bemptyv;': '\u29b0',
194 'Bernoullis;': '\u212c',
198 'between;': '\u226c',
199 'Bfr;': '\U0001d505',
200 'bfr;': '\U0001d51f',
202 'bigcirc;': '\u25ef',
204 'bigodot;': '\u2a00',
205 'bigoplus;': '\u2a01',
206 'bigotimes;': '\u2a02',
207 'bigsqcup;': '\u2a06',
208 'bigstar;': '\u2605',
209 'bigtriangledown;': '\u25bd',
210 'bigtriangleup;': '\u25b3',
211 'biguplus;': '\u2a04',
213 'bigwedge;': '\u22c0',
215 'blacklozenge;': '\u29eb',
216 'blacksquare;': '\u25aa',
217 'blacktriangle;': '\u25b4',
218 'blacktriangledown;': '\u25be',
219 'blacktriangleleft;': '\u25c2',
220 'blacktriangleright;': '\u25b8',
227 'bnequiv;': '\u2261\u20e5',
230 'Bopf;': '\U0001d539',
231 'bopf;': '\U0001d553',
254 'boxminus;': '\u229f',
255 'boxplus;': '\u229e',
256 'boxtimes;': '\u22a0',
285 'bscr;': '\U0001d4b7',
291 'bsolhsub;': '\u27c8',
304 'capbrcup;': '\u2a49',
308 'CapitalDifferentialD;': '\u2145',
309 'caps;': '\u2229\ufe00',
312 'Cayleys;': '\u212d',
322 'Cconint;': '\u2230',
324 'ccupssm;': '\u2a50',
330 'cemptyv;': '\u29b2',
333 'CenterDot;': '\xb7',
334 'centerdot;': '\xb7',
336 'cfr;': '\U0001d520',
340 'checkmark;': '\u2713',
346 'circlearrowleft;': '\u21ba',
347 'circlearrowright;': '\u21bb',
348 'circledast;': '\u229b',
349 'circledcirc;': '\u229a',
350 'circleddash;': '\u229d',
351 'CircleDot;': '\u2299',
353 'circledS;': '\u24c8',
354 'CircleMinus;': '\u2296',
355 'CirclePlus;': '\u2295',
356 'CircleTimes;': '\u2297',
359 'cirfnint;': '\u2a10',
361 'cirscir;': '\u29c2',
362 'ClockwiseContourIntegral;': '\u2232',
363 'CloseCurlyDoubleQuote;': '\u201d',
364 'CloseCurlyQuote;': '\u2019',
366 'clubsuit;': '\u2663',
371 'coloneq;': '\u2254',
376 'complement;': '\u2201',
377 'complexes;': '\u2102',
379 'congdot;': '\u2a6d',
380 'Congruent;': '\u2261',
383 'ContourIntegral;': '\u222e',
385 'copf;': '\U0001d554',
387 'Coproduct;': '\u2210',
393 'CounterClockwiseContourIntegral;': '\u2233',
397 'Cscr;': '\U0001d49e',
398 'cscr;': '\U0001d4b8',
404 'cudarrl;': '\u2938',
405 'cudarrr;': '\u2935',
409 'cularrp;': '\u293d',
412 'cupbrcap;': '\u2a48',
418 'cups;': '\u222a\ufe00',
420 'curarrm;': '\u293c',
421 'curlyeqprec;': '\u22de',
422 'curlyeqsucc;': '\u22df',
423 'curlyvee;': '\u22ce',
424 'curlywedge;': '\u22cf',
427 'curvearrowleft;': '\u21b6',
428 'curvearrowright;': '\u21b7',
431 'cwconint;': '\u2232',
443 'dbkarow;': '\u290f',
451 'ddagger;': '\u2021',
453 'DDotrahd;': '\u2911',
454 'ddotseq;': '\u2a77',
460 'demptyv;': '\u29b1',
462 'Dfr;': '\U0001d507',
463 'dfr;': '\U0001d521',
467 'DiacriticalAcute;': '\xb4',
468 'DiacriticalDot;': '\u02d9',
469 'DiacriticalDoubleAcute;': '\u02dd',
470 'DiacriticalGrave;': '`',
471 'DiacriticalTilde;': '\u02dc',
473 'Diamond;': '\u22c4',
474 'diamond;': '\u22c4',
475 'diamondsuit;': '\u2666',
478 'DifferentialD;': '\u2146',
479 'digamma;': '\u03dd',
484 'divideontimes;': '\u22c7',
491 'Dopf;': '\U0001d53b',
492 'dopf;': '\U0001d555',
497 'doteqdot;': '\u2251',
498 'DotEqual;': '\u2250',
499 'dotminus;': '\u2238',
500 'dotplus;': '\u2214',
501 'dotsquare;': '\u22a1',
502 'doublebarwedge;': '\u2306',
503 'DoubleContourIntegral;': '\u222f',
504 'DoubleDot;': '\xa8',
505 'DoubleDownArrow;': '\u21d3',
506 'DoubleLeftArrow;': '\u21d0',
507 'DoubleLeftRightArrow;': '\u21d4',
508 'DoubleLeftTee;': '\u2ae4',
509 'DoubleLongLeftArrow;': '\u27f8',
510 'DoubleLongLeftRightArrow;': '\u27fa',
511 'DoubleLongRightArrow;': '\u27f9',
512 'DoubleRightArrow;': '\u21d2',
513 'DoubleRightTee;': '\u22a8',
514 'DoubleUpArrow;': '\u21d1',
515 'DoubleUpDownArrow;': '\u21d5',
516 'DoubleVerticalBar;': '\u2225',
517 'DownArrow;': '\u2193',
518 'Downarrow;': '\u21d3',
519 'downarrow;': '\u2193',
520 'DownArrowBar;': '\u2913',
521 'DownArrowUpArrow;': '\u21f5',
522 'DownBreve;': '\u0311',
523 'downdownarrows;': '\u21ca',
524 'downharpoonleft;': '\u21c3',
525 'downharpoonright;': '\u21c2',
526 'DownLeftRightVector;': '\u2950',
527 'DownLeftTeeVector;': '\u295e',
528 'DownLeftVector;': '\u21bd',
529 'DownLeftVectorBar;': '\u2956',
530 'DownRightTeeVector;': '\u295f',
531 'DownRightVector;': '\u21c1',
532 'DownRightVectorBar;': '\u2957',
533 'DownTee;': '\u22a4',
534 'DownTeeArrow;': '\u21a7',
535 'drbkarow;': '\u2910',
538 'Dscr;': '\U0001d49f',
539 'dscr;': '\U0001d4b9',
550 'dwangle;': '\u29a6',
553 'dzigrarr;': '\u27ff',
575 'Efr;': '\U0001d508',
576 'efr;': '\U0001d522',
585 'Element;': '\u2208',
586 'elinters;': '\u23e7',
593 'emptyset;': '\u2205',
594 'EmptySmallSquare;': '\u25fb',
596 'EmptyVerySmallSquare;': '\u25ab',
605 'Eopf;': '\U0001d53c',
606 'eopf;': '\U0001d556',
611 'Epsilon;': '\u0395',
612 'epsilon;': '\u03b5',
615 'eqcolon;': '\u2255',
617 'eqslantgtr;': '\u2a96',
618 'eqslantless;': '\u2a95',
621 'EqualTilde;': '\u2242',
623 'Equilibrium;': '\u21cc',
625 'equivDD;': '\u2a78',
626 'eqvparsl;': '\u29e5',
648 'expectation;': '\u2130',
649 'ExponentialE;': '\u2147',
650 'exponentiale;': '\u2147',
651 'fallingdotseq;': '\u2252',
658 'Ffr;': '\U0001d509',
659 'ffr;': '\U0001d523',
661 'FilledSmallSquare;': '\u25fc',
662 'FilledVerySmallSquare;': '\u25aa',
668 'Fopf;': '\U0001d53d',
669 'fopf;': '\U0001d557',
674 'Fouriertrf;': '\u2131',
675 'fpartint;': '\u2a0d',
697 'fscr;': '\U0001d4bb',
719 'geqslant;': '\u2a7e',
723 'gesdoto;': '\u2a82',
724 'gesdotol;': '\u2a84',
725 'gesl;': '\u22db\ufe00',
727 'Gfr;': '\U0001d50a',
728 'gfr;': '\U0001d524',
740 'gnapprox;': '\u2a8a',
746 'Gopf;': '\U0001d53e',
747 'gopf;': '\U0001d558',
749 'GreaterEqual;': '\u2265',
750 'GreaterEqualLess;': '\u22db',
751 'GreaterFullEqual;': '\u2267',
752 'GreaterGreater;': '\u2aa2',
753 'GreaterLess;': '\u2277',
754 'GreaterSlantEqual;': '\u2a7e',
755 'GreaterTilde;': '\u2273',
756 'Gscr;': '\U0001d4a2',
770 'gtquest;': '\u2a7c',
771 'gtrapprox;': '\u2a86',
774 'gtreqless;': '\u22db',
775 'gtreqqless;': '\u2a8c',
776 'gtrless;': '\u2277',
778 'gvertneqq;': '\u2269\ufe00',
779 'gvnE;': '\u2269\ufe00',
788 'harrcir;': '\u2948',
795 'heartsuit;': '\u2665',
799 'hfr;': '\U0001d525',
800 'HilbertSpace;': '\u210b',
801 'hksearow;': '\u2925',
802 'hkswarow;': '\u2926',
805 'hookleftarrow;': '\u21a9',
806 'hookrightarrow;': '\u21aa',
808 'hopf;': '\U0001d559',
810 'HorizontalLine;': '\u2500',
812 'hscr;': '\U0001d4bd',
816 'HumpDownHump;': '\u224e',
817 'HumpEqual;': '\u224f',
838 'ifr;': '\U0001d526',
854 'ImaginaryI;': '\u2148',
855 'imagline;': '\u2110',
856 'imagpart;': '\u2111',
860 'Implies;': '\u21d2',
864 'infintie;': '\u29dd',
869 'integers;': '\u2124',
870 'Integral;': '\u222b',
871 'intercal;': '\u22ba',
872 'Intersection;': '\u22c2',
873 'intlarhk;': '\u2a17',
874 'intprod;': '\u2a3c',
875 'InvisibleComma;': '\u2063',
876 'InvisibleTimes;': '\u2062',
881 'Iopf;': '\U0001d540',
882 'iopf;': '\U0001d55a',
889 'iscr;': '\U0001d4be',
891 'isindot;': '\u22f5',
909 'Jfr;': '\U0001d50d',
910 'jfr;': '\U0001d527',
912 'Jopf;': '\U0001d541',
913 'jopf;': '\U0001d55b',
914 'Jscr;': '\U0001d4a5',
915 'jscr;': '\U0001d4bf',
927 'Kfr;': '\U0001d50e',
928 'kfr;': '\U0001d528',
934 'Kopf;': '\U0001d542',
935 'kopf;': '\U0001d55c',
936 'Kscr;': '\U0001d4a6',
937 'kscr;': '\U0001d4c0',
941 'laemptyv;': '\u29b4',
950 'Laplacetrf;': '\u2112',
957 'larrbfs;': '\u291f',
962 'larrsim;': '\u2973',
968 'lates;': '\u2aad\ufe00',
975 'lbrksld;': '\u298f',
976 'lbrkslu;': '\u298d',
988 'ldrdhar;': '\u2967',
989 'ldrushar;': '\u294b',
993 'LeftAngleBracket;': '\u27e8',
994 'LeftArrow;': '\u2190',
995 'Leftarrow;': '\u21d0',
996 'leftarrow;': '\u2190',
997 'LeftArrowBar;': '\u21e4',
998 'LeftArrowRightArrow;': '\u21c6',
999 'leftarrowtail;': '\u21a2',
1000 'LeftCeiling;': '\u2308',
1001 'LeftDoubleBracket;': '\u27e6',
1002 'LeftDownTeeVector;': '\u2961',
1003 'LeftDownVector;': '\u21c3',
1004 'LeftDownVectorBar;': '\u2959',
1005 'LeftFloor;': '\u230a',
1006 'leftharpoondown;': '\u21bd',
1007 'leftharpoonup;': '\u21bc',
1008 'leftleftarrows;': '\u21c7',
1009 'LeftRightArrow;': '\u2194',
1010 'Leftrightarrow;': '\u21d4',
1011 'leftrightarrow;': '\u2194',
1012 'leftrightarrows;': '\u21c6',
1013 'leftrightharpoons;': '\u21cb',
1014 'leftrightsquigarrow;': '\u21ad',
1015 'LeftRightVector;': '\u294e',
1016 'LeftTee;': '\u22a3',
1017 'LeftTeeArrow;': '\u21a4',
1018 'LeftTeeVector;': '\u295a',
1019 'leftthreetimes;': '\u22cb',
1020 'LeftTriangle;': '\u22b2',
1021 'LeftTriangleBar;': '\u29cf',
1022 'LeftTriangleEqual;': '\u22b4',
1023 'LeftUpDownVector;': '\u2951',
1024 'LeftUpTeeVector;': '\u2960',
1025 'LeftUpVector;': '\u21bf',
1026 'LeftUpVectorBar;': '\u2958',
1027 'LeftVector;': '\u21bc',
1028 'LeftVectorBar;': '\u2952',
1033 'leqslant;': '\u2a7d',
1036 'lesdot;': '\u2a7f',
1037 'lesdoto;': '\u2a81',
1038 'lesdotor;': '\u2a83',
1039 'lesg;': '\u22da\ufe00',
1040 'lesges;': '\u2a93',
1041 'lessapprox;': '\u2a85',
1042 'lessdot;': '\u22d6',
1043 'lesseqgtr;': '\u22da',
1044 'lesseqqgtr;': '\u2a8b',
1045 'LessEqualGreater;': '\u22da',
1046 'LessFullEqual;': '\u2266',
1047 'LessGreater;': '\u2276',
1048 'lessgtr;': '\u2276',
1049 'LessLess;': '\u2aa1',
1050 'lesssim;': '\u2272',
1051 'LessSlantEqual;': '\u2a7d',
1052 'LessTilde;': '\u2272',
1053 'lfisht;': '\u297c',
1054 'lfloor;': '\u230a',
1055 'Lfr;': '\U0001d50f',
1056 'lfr;': '\U0001d529',
1062 'lharul;': '\u296a',
1069 'llcorner;': '\u231e',
1070 'Lleftarrow;': '\u21da',
1071 'llhard;': '\u296b',
1073 'Lmidot;': '\u013f',
1074 'lmidot;': '\u0140',
1075 'lmoust;': '\u23b0',
1076 'lmoustache;': '\u23b0',
1078 'lnapprox;': '\u2a89',
1087 'LongLeftArrow;': '\u27f5',
1088 'Longleftarrow;': '\u27f8',
1089 'longleftarrow;': '\u27f5',
1090 'LongLeftRightArrow;': '\u27f7',
1091 'Longleftrightarrow;': '\u27fa',
1092 'longleftrightarrow;': '\u27f7',
1093 'longmapsto;': '\u27fc',
1094 'LongRightArrow;': '\u27f6',
1095 'Longrightarrow;': '\u27f9',
1096 'longrightarrow;': '\u27f6',
1097 'looparrowleft;': '\u21ab',
1098 'looparrowright;': '\u21ac',
1100 'Lopf;': '\U0001d543',
1101 'lopf;': '\U0001d55d',
1102 'loplus;': '\u2a2d',
1103 'lotimes;': '\u2a34',
1104 'lowast;': '\u2217',
1106 'LowerLeftArrow;': '\u2199',
1107 'LowerRightArrow;': '\u2198',
1109 'lozenge;': '\u25ca',
1112 'lparlt;': '\u2993',
1114 'lrcorner;': '\u231f',
1116 'lrhard;': '\u296d',
1119 'lsaquo;': '\u2039',
1121 'lscr;': '\U0001d4c1',
1129 'lsquor;': '\u201a',
1130 'Lstrok;': '\u0141',
1131 'lstrok;': '\u0142',
1140 'lthree;': '\u22cb',
1141 'ltimes;': '\u22c9',
1142 'ltlarr;': '\u2976',
1143 'ltquest;': '\u2a7b',
1147 'ltrPar;': '\u2996',
1148 'lurdshar;': '\u294a',
1149 'luruhar;': '\u2966',
1150 'lvertneqq;': '\u2268\ufe00',
1151 'lvnE;': '\u2268\ufe00',
1156 'maltese;': '\u2720',
1159 'mapsto;': '\u21a6',
1160 'mapstodown;': '\u21a7',
1161 'mapstoleft;': '\u21a4',
1162 'mapstoup;': '\u21a5',
1163 'marker;': '\u25ae',
1164 'mcomma;': '\u2a29',
1169 'measuredangle;': '\u2221',
1170 'MediumSpace;': '\u205f',
1171 'Mellintrf;': '\u2133',
1172 'Mfr;': '\U0001d510',
1173 'mfr;': '\U0001d52a',
1179 'midcir;': '\u2af0',
1183 'minusb;': '\u229f',
1184 'minusd;': '\u2238',
1185 'minusdu;': '\u2a2a',
1186 'MinusPlus;': '\u2213',
1189 'mnplus;': '\u2213',
1190 'models;': '\u22a7',
1191 'Mopf;': '\U0001d544',
1192 'mopf;': '\U0001d55e',
1195 'mscr;': '\U0001d4c2',
1196 'mstpos;': '\u223e',
1199 'multimap;': '\u22b8',
1202 'Nacute;': '\u0143',
1203 'nacute;': '\u0144',
1204 'nang;': '\u2220\u20d2',
1206 'napE;': '\u2a70\u0338',
1207 'napid;': '\u224b\u0338',
1209 'napprox;': '\u2249',
1211 'natural;': '\u266e',
1212 'naturals;': '\u2115',
1215 'nbump;': '\u224e\u0338',
1216 'nbumpe;': '\u224f\u0338',
1218 'Ncaron;': '\u0147',
1219 'ncaron;': '\u0148',
1220 'Ncedil;': '\u0145',
1221 'ncedil;': '\u0146',
1223 'ncongdot;': '\u2a6d\u0338',
1229 'nearhk;': '\u2924',
1232 'nearrow;': '\u2197',
1233 'nedot;': '\u2250\u0338',
1234 'NegativeMediumSpace;': '\u200b',
1235 'NegativeThickSpace;': '\u200b',
1236 'NegativeThinSpace;': '\u200b',
1237 'NegativeVeryThinSpace;': '\u200b',
1238 'nequiv;': '\u2262',
1239 'nesear;': '\u2928',
1240 'nesim;': '\u2242\u0338',
1241 'NestedGreaterGreater;': '\u226b',
1242 'NestedLessLess;': '\u226a',
1244 'nexist;': '\u2204',
1245 'nexists;': '\u2204',
1246 'Nfr;': '\U0001d511',
1247 'nfr;': '\U0001d52b',
1248 'ngE;': '\u2267\u0338',
1251 'ngeqq;': '\u2267\u0338',
1252 'ngeqslant;': '\u2a7e\u0338',
1253 'nges;': '\u2a7e\u0338',
1254 'nGg;': '\u22d9\u0338',
1256 'nGt;': '\u226b\u20d2',
1259 'nGtv;': '\u226b\u0338',
1272 'nlE;': '\u2266\u0338',
1274 'nLeftarrow;': '\u21cd',
1275 'nleftarrow;': '\u219a',
1276 'nLeftrightarrow;': '\u21ce',
1277 'nleftrightarrow;': '\u21ae',
1279 'nleqq;': '\u2266\u0338',
1280 'nleqslant;': '\u2a7d\u0338',
1281 'nles;': '\u2a7d\u0338',
1283 'nLl;': '\u22d8\u0338',
1285 'nLt;': '\u226a\u20d2',
1288 'nltrie;': '\u22ec',
1289 'nLtv;': '\u226a\u0338',
1291 'NoBreak;': '\u2060',
1292 'NonBreakingSpace;': '\xa0',
1294 'nopf;': '\U0001d55f',
1298 'NotCongruent;': '\u2262',
1299 'NotCupCap;': '\u226d',
1300 'NotDoubleVerticalBar;': '\u2226',
1301 'NotElement;': '\u2209',
1302 'NotEqual;': '\u2260',
1303 'NotEqualTilde;': '\u2242\u0338',
1304 'NotExists;': '\u2204',
1305 'NotGreater;': '\u226f',
1306 'NotGreaterEqual;': '\u2271',
1307 'NotGreaterFullEqual;': '\u2267\u0338',
1308 'NotGreaterGreater;': '\u226b\u0338',
1309 'NotGreaterLess;': '\u2279',
1310 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1311 'NotGreaterTilde;': '\u2275',
1312 'NotHumpDownHump;': '\u224e\u0338',
1313 'NotHumpEqual;': '\u224f\u0338',
1315 'notindot;': '\u22f5\u0338',
1316 'notinE;': '\u22f9\u0338',
1317 'notinva;': '\u2209',
1318 'notinvb;': '\u22f7',
1319 'notinvc;': '\u22f6',
1320 'NotLeftTriangle;': '\u22ea',
1321 'NotLeftTriangleBar;': '\u29cf\u0338',
1322 'NotLeftTriangleEqual;': '\u22ec',
1323 'NotLess;': '\u226e',
1324 'NotLessEqual;': '\u2270',
1325 'NotLessGreater;': '\u2278',
1326 'NotLessLess;': '\u226a\u0338',
1327 'NotLessSlantEqual;': '\u2a7d\u0338',
1328 'NotLessTilde;': '\u2274',
1329 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1330 'NotNestedLessLess;': '\u2aa1\u0338',
1332 'notniva;': '\u220c',
1333 'notnivb;': '\u22fe',
1334 'notnivc;': '\u22fd',
1335 'NotPrecedes;': '\u2280',
1336 'NotPrecedesEqual;': '\u2aaf\u0338',
1337 'NotPrecedesSlantEqual;': '\u22e0',
1338 'NotReverseElement;': '\u220c',
1339 'NotRightTriangle;': '\u22eb',
1340 'NotRightTriangleBar;': '\u29d0\u0338',
1341 'NotRightTriangleEqual;': '\u22ed',
1342 'NotSquareSubset;': '\u228f\u0338',
1343 'NotSquareSubsetEqual;': '\u22e2',
1344 'NotSquareSuperset;': '\u2290\u0338',
1345 'NotSquareSupersetEqual;': '\u22e3',
1346 'NotSubset;': '\u2282\u20d2',
1347 'NotSubsetEqual;': '\u2288',
1348 'NotSucceeds;': '\u2281',
1349 'NotSucceedsEqual;': '\u2ab0\u0338',
1350 'NotSucceedsSlantEqual;': '\u22e1',
1351 'NotSucceedsTilde;': '\u227f\u0338',
1352 'NotSuperset;': '\u2283\u20d2',
1353 'NotSupersetEqual;': '\u2289',
1354 'NotTilde;': '\u2241',
1355 'NotTildeEqual;': '\u2244',
1356 'NotTildeFullEqual;': '\u2247',
1357 'NotTildeTilde;': '\u2249',
1358 'NotVerticalBar;': '\u2224',
1360 'nparallel;': '\u2226',
1361 'nparsl;': '\u2afd\u20e5',
1362 'npart;': '\u2202\u0338',
1363 'npolint;': '\u2a14',
1365 'nprcue;': '\u22e0',
1366 'npre;': '\u2aaf\u0338',
1368 'npreceq;': '\u2aaf\u0338',
1371 'nrarrc;': '\u2933\u0338',
1372 'nrarrw;': '\u219d\u0338',
1373 'nRightarrow;': '\u21cf',
1374 'nrightarrow;': '\u219b',
1376 'nrtrie;': '\u22ed',
1378 'nsccue;': '\u22e1',
1379 'nsce;': '\u2ab0\u0338',
1380 'Nscr;': '\U0001d4a9',
1381 'nscr;': '\U0001d4c3',
1382 'nshortmid;': '\u2224',
1383 'nshortparallel;': '\u2226',
1386 'nsimeq;': '\u2244',
1389 'nsqsube;': '\u22e2',
1390 'nsqsupe;': '\u22e3',
1392 'nsubE;': '\u2ac5\u0338',
1394 'nsubset;': '\u2282\u20d2',
1395 'nsubseteq;': '\u2288',
1396 'nsubseteqq;': '\u2ac5\u0338',
1398 'nsucceq;': '\u2ab0\u0338',
1400 'nsupE;': '\u2ac6\u0338',
1402 'nsupset;': '\u2283\u20d2',
1403 'nsupseteq;': '\u2289',
1404 'nsupseteqq;': '\u2ac6\u0338',
1411 'ntriangleleft;': '\u22ea',
1412 'ntrianglelefteq;': '\u22ec',
1413 'ntriangleright;': '\u22eb',
1414 'ntrianglerighteq;': '\u22ed',
1418 'numero;': '\u2116',
1420 'nvap;': '\u224d\u20d2',
1421 'nVDash;': '\u22af',
1422 'nVdash;': '\u22ae',
1423 'nvDash;': '\u22ad',
1424 'nvdash;': '\u22ac',
1425 'nvge;': '\u2265\u20d2',
1427 'nvHarr;': '\u2904',
1428 'nvinfin;': '\u29de',
1429 'nvlArr;': '\u2902',
1430 'nvle;': '\u2264\u20d2',
1432 'nvltrie;': '\u22b4\u20d2',
1433 'nvrArr;': '\u2903',
1434 'nvrtrie;': '\u22b5\u20d2',
1435 'nvsim;': '\u223c\u20d2',
1436 'nwarhk;': '\u2923',
1439 'nwarrow;': '\u2196',
1440 'nwnear;': '\u2927',
1454 'Odblac;': '\u0150',
1455 'odblac;': '\u0151',
1458 'odsold;': '\u29bc',
1462 'Ofr;': '\U0001d512',
1463 'ofr;': '\U0001d52c',
1475 'olcross;': '\u29bb',
1482 'Omicron;': '\u039f',
1483 'omicron;': '\u03bf',
1485 'ominus;': '\u2296',
1486 'Oopf;': '\U0001d546',
1487 'oopf;': '\U0001d560',
1489 'OpenCurlyDoubleQuote;': '\u201c',
1490 'OpenCurlyQuote;': '\u2018',
1498 'orderof;': '\u2134',
1503 'origof;': '\u22b6',
1505 'orslope;': '\u2a57',
1508 'Oscr;': '\U0001d4aa',
1519 'Otimes;': '\u2a37',
1520 'otimes;': '\u2297',
1521 'otimesas;': '\u2a36',
1527 'OverBar;': '\u203e',
1528 'OverBrace;': '\u23de',
1529 'OverBracket;': '\u23b4',
1530 'OverParenthesis;': '\u23dc',
1534 'parallel;': '\u2225',
1535 'parsim;': '\u2af3',
1538 'PartialD;': '\u2202',
1543 'permil;': '\u2030',
1545 'pertenk;': '\u2031',
1546 'Pfr;': '\U0001d513',
1547 'pfr;': '\U0001d52d',
1551 'phmmat;': '\u2133',
1555 'pitchfork;': '\u22d4',
1557 'planck;': '\u210f',
1558 'planckh;': '\u210e',
1559 'plankv;': '\u210f',
1561 'plusacir;': '\u2a23',
1563 'pluscir;': '\u2a22',
1564 'plusdo;': '\u2214',
1565 'plusdu;': '\u2a25',
1567 'PlusMinus;': '\xb1',
1570 'plussim;': '\u2a26',
1571 'plustwo;': '\u2a27',
1573 'Poincareplane;': '\u210c',
1574 'pointint;': '\u2a15',
1576 'popf;': '\U0001d561',
1586 'precapprox;': '\u2ab7',
1587 'preccurlyeq;': '\u227c',
1588 'Precedes;': '\u227a',
1589 'PrecedesEqual;': '\u2aaf',
1590 'PrecedesSlantEqual;': '\u227c',
1591 'PrecedesTilde;': '\u227e',
1592 'preceq;': '\u2aaf',
1593 'precnapprox;': '\u2ab9',
1594 'precneqq;': '\u2ab5',
1595 'precnsim;': '\u22e8',
1596 'precsim;': '\u227e',
1599 'primes;': '\u2119',
1602 'prnsim;': '\u22e8',
1604 'Product;': '\u220f',
1605 'profalar;': '\u232e',
1606 'profline;': '\u2312',
1607 'profsurf;': '\u2313',
1609 'Proportion;': '\u2237',
1610 'Proportional;': '\u221d',
1611 'propto;': '\u221d',
1613 'prurel;': '\u22b0',
1614 'Pscr;': '\U0001d4ab',
1615 'pscr;': '\U0001d4c5',
1618 'puncsp;': '\u2008',
1619 'Qfr;': '\U0001d514',
1620 'qfr;': '\U0001d52e',
1623 'qopf;': '\U0001d562',
1624 'qprime;': '\u2057',
1625 'Qscr;': '\U0001d4ac',
1626 'qscr;': '\U0001d4c6',
1627 'quaternions;': '\u210d',
1628 'quatint;': '\u2a16',
1630 'questeq;': '\u225f',
1636 'race;': '\u223d\u0331',
1637 'Racute;': '\u0154',
1638 'racute;': '\u0155',
1640 'raemptyv;': '\u29b3',
1645 'rangle;': '\u27e9',
1651 'rarrap;': '\u2975',
1653 'rarrbfs;': '\u2920',
1655 'rarrfs;': '\u291e',
1656 'rarrhk;': '\u21aa',
1657 'rarrlp;': '\u21ac',
1658 'rarrpl;': '\u2945',
1659 'rarrsim;': '\u2974',
1660 'Rarrtl;': '\u2916',
1661 'rarrtl;': '\u21a3',
1663 'rAtail;': '\u291c',
1664 'ratail;': '\u291a',
1666 'rationals;': '\u211a',
1674 'rbrksld;': '\u298e',
1675 'rbrkslu;': '\u2990',
1676 'Rcaron;': '\u0158',
1677 'rcaron;': '\u0159',
1678 'Rcedil;': '\u0156',
1679 'rcedil;': '\u0157',
1685 'rdldhar;': '\u2969',
1687 'rdquor;': '\u201d',
1691 'realine;': '\u211b',
1692 'realpart;': '\u211c',
1699 'ReverseElement;': '\u220b',
1700 'ReverseEquilibrium;': '\u21cb',
1701 'ReverseUpEquilibrium;': '\u296f',
1702 'rfisht;': '\u297d',
1703 'rfloor;': '\u230b',
1705 'rfr;': '\U0001d52f',
1709 'rharul;': '\u296c',
1713 'RightAngleBracket;': '\u27e9',
1714 'RightArrow;': '\u2192',
1715 'Rightarrow;': '\u21d2',
1716 'rightarrow;': '\u2192',
1717 'RightArrowBar;': '\u21e5',
1718 'RightArrowLeftArrow;': '\u21c4',
1719 'rightarrowtail;': '\u21a3',
1720 'RightCeiling;': '\u2309',
1721 'RightDoubleBracket;': '\u27e7',
1722 'RightDownTeeVector;': '\u295d',
1723 'RightDownVector;': '\u21c2',
1724 'RightDownVectorBar;': '\u2955',
1725 'RightFloor;': '\u230b',
1726 'rightharpoondown;': '\u21c1',
1727 'rightharpoonup;': '\u21c0',
1728 'rightleftarrows;': '\u21c4',
1729 'rightleftharpoons;': '\u21cc',
1730 'rightrightarrows;': '\u21c9',
1731 'rightsquigarrow;': '\u219d',
1732 'RightTee;': '\u22a2',
1733 'RightTeeArrow;': '\u21a6',
1734 'RightTeeVector;': '\u295b',
1735 'rightthreetimes;': '\u22cc',
1736 'RightTriangle;': '\u22b3',
1737 'RightTriangleBar;': '\u29d0',
1738 'RightTriangleEqual;': '\u22b5',
1739 'RightUpDownVector;': '\u294f',
1740 'RightUpTeeVector;': '\u295c',
1741 'RightUpVector;': '\u21be',
1742 'RightUpVectorBar;': '\u2954',
1743 'RightVector;': '\u21c0',
1744 'RightVectorBar;': '\u2953',
1746 'risingdotseq;': '\u2253',
1750 'rmoust;': '\u23b1',
1751 'rmoustache;': '\u23b1',
1758 'ropf;': '\U0001d563',
1759 'roplus;': '\u2a2e',
1760 'rotimes;': '\u2a35',
1761 'RoundImplies;': '\u2970',
1763 'rpargt;': '\u2994',
1764 'rppolint;': '\u2a12',
1766 'Rrightarrow;': '\u21db',
1767 'rsaquo;': '\u203a',
1769 'rscr;': '\U0001d4c7',
1774 'rsquor;': '\u2019',
1775 'rthree;': '\u22cc',
1776 'rtimes;': '\u22ca',
1780 'rtriltri;': '\u29ce',
1781 'RuleDelayed;': '\u29f4',
1782 'ruluhar;': '\u2968',
1784 'Sacute;': '\u015a',
1785 'sacute;': '\u015b',
1790 'Scaron;': '\u0160',
1791 'scaron;': '\u0161',
1795 'Scedil;': '\u015e',
1796 'scedil;': '\u015f',
1801 'scnsim;': '\u22e9',
1802 'scpolint;': '\u2a13',
1809 'searhk;': '\u2925',
1812 'searrow;': '\u2198',
1816 'seswar;': '\u2929',
1817 'setminus;': '\u2216',
1820 'Sfr;': '\U0001d516',
1821 'sfr;': '\U0001d530',
1822 'sfrown;': '\u2322',
1824 'SHCHcy;': '\u0429',
1825 'shchcy;': '\u0449',
1828 'ShortDownArrow;': '\u2193',
1829 'ShortLeftArrow;': '\u2190',
1830 'shortmid;': '\u2223',
1831 'shortparallel;': '\u2225',
1832 'ShortRightArrow;': '\u2192',
1833 'ShortUpArrow;': '\u2191',
1838 'sigmaf;': '\u03c2',
1839 'sigmav;': '\u03c2',
1841 'simdot;': '\u2a6a',
1849 'simplus;': '\u2a24',
1850 'simrarr;': '\u2972',
1852 'SmallCircle;': '\u2218',
1853 'smallsetminus;': '\u2216',
1854 'smashp;': '\u2a33',
1855 'smeparsl;': '\u29e4',
1860 'smtes;': '\u2aac\ufe00',
1861 'SOFTcy;': '\u042c',
1862 'softcy;': '\u044c',
1865 'solbar;': '\u233f',
1866 'Sopf;': '\U0001d54a',
1867 'sopf;': '\U0001d564',
1868 'spades;': '\u2660',
1869 'spadesuit;': '\u2660',
1872 'sqcaps;': '\u2293\ufe00',
1874 'sqcups;': '\u2294\ufe00',
1877 'sqsube;': '\u2291',
1878 'sqsubset;': '\u228f',
1879 'sqsubseteq;': '\u2291',
1881 'sqsupe;': '\u2292',
1882 'sqsupset;': '\u2290',
1883 'sqsupseteq;': '\u2292',
1885 'Square;': '\u25a1',
1886 'square;': '\u25a1',
1887 'SquareIntersection;': '\u2293',
1888 'SquareSubset;': '\u228f',
1889 'SquareSubsetEqual;': '\u2291',
1890 'SquareSuperset;': '\u2290',
1891 'SquareSupersetEqual;': '\u2292',
1892 'SquareUnion;': '\u2294',
1893 'squarf;': '\u25aa',
1896 'Sscr;': '\U0001d4ae',
1897 'sscr;': '\U0001d4c8',
1898 'ssetmn;': '\u2216',
1899 'ssmile;': '\u2323',
1900 'sstarf;': '\u22c6',
1904 'straightepsilon;': '\u03f5',
1905 'straightphi;': '\u03d5',
1909 'subdot;': '\u2abd',
1912 'subedot;': '\u2ac3',
1913 'submult;': '\u2ac1',
1916 'subplus;': '\u2abf',
1917 'subrarr;': '\u2979',
1918 'Subset;': '\u22d0',
1919 'subset;': '\u2282',
1920 'subseteq;': '\u2286',
1921 'subseteqq;': '\u2ac5',
1922 'SubsetEqual;': '\u2286',
1923 'subsetneq;': '\u228a',
1924 'subsetneqq;': '\u2acb',
1925 'subsim;': '\u2ac7',
1926 'subsub;': '\u2ad5',
1927 'subsup;': '\u2ad3',
1929 'succapprox;': '\u2ab8',
1930 'succcurlyeq;': '\u227d',
1931 'Succeeds;': '\u227b',
1932 'SucceedsEqual;': '\u2ab0',
1933 'SucceedsSlantEqual;': '\u227d',
1934 'SucceedsTilde;': '\u227f',
1935 'succeq;': '\u2ab0',
1936 'succnapprox;': '\u2aba',
1937 'succneqq;': '\u2ab6',
1938 'succnsim;': '\u22e9',
1939 'succsim;': '\u227f',
1940 'SuchThat;': '\u220b',
1952 'supdot;': '\u2abe',
1953 'supdsub;': '\u2ad8',
1956 'supedot;': '\u2ac4',
1957 'Superset;': '\u2283',
1958 'SupersetEqual;': '\u2287',
1959 'suphsol;': '\u27c9',
1960 'suphsub;': '\u2ad7',
1961 'suplarr;': '\u297b',
1962 'supmult;': '\u2ac2',
1965 'supplus;': '\u2ac0',
1966 'Supset;': '\u22d1',
1967 'supset;': '\u2283',
1968 'supseteq;': '\u2287',
1969 'supseteqq;': '\u2ac6',
1970 'supsetneq;': '\u228b',
1971 'supsetneqq;': '\u2acc',
1972 'supsim;': '\u2ac8',
1973 'supsub;': '\u2ad4',
1974 'supsup;': '\u2ad6',
1975 'swarhk;': '\u2926',
1978 'swarrow;': '\u2199',
1979 'swnwar;': '\u292a',
1983 'target;': '\u2316',
1987 'Tcaron;': '\u0164',
1988 'tcaron;': '\u0165',
1989 'Tcedil;': '\u0162',
1990 'tcedil;': '\u0163',
1994 'telrec;': '\u2315',
1995 'Tfr;': '\U0001d517',
1996 'tfr;': '\U0001d531',
1997 'there4;': '\u2234',
1998 'Therefore;': '\u2234',
1999 'therefore;': '\u2234',
2002 'thetasym;': '\u03d1',
2003 'thetav;': '\u03d1',
2004 'thickapprox;': '\u2248',
2005 'thicksim;': '\u223c',
2006 'ThickSpace;': '\u205f\u200a',
2007 'thinsp;': '\u2009',
2008 'ThinSpace;': '\u2009',
2010 'thksim;': '\u223c',
2017 'TildeEqual;': '\u2243',
2018 'TildeFullEqual;': '\u2245',
2019 'TildeTilde;': '\u2248',
2022 'timesb;': '\u22a0',
2023 'timesbar;': '\u2a31',
2024 'timesd;': '\u2a30',
2028 'topbot;': '\u2336',
2029 'topcir;': '\u2af1',
2030 'Topf;': '\U0001d54b',
2031 'topf;': '\U0001d565',
2032 'topfork;': '\u2ada',
2034 'tprime;': '\u2034',
2037 'triangle;': '\u25b5',
2038 'triangledown;': '\u25bf',
2039 'triangleleft;': '\u25c3',
2040 'trianglelefteq;': '\u22b4',
2041 'triangleq;': '\u225c',
2042 'triangleright;': '\u25b9',
2043 'trianglerighteq;': '\u22b5',
2044 'tridot;': '\u25ec',
2046 'triminus;': '\u2a3a',
2047 'TripleDot;': '\u20db',
2048 'triplus;': '\u2a39',
2050 'tritime;': '\u2a3b',
2051 'trpezium;': '\u23e2',
2052 'Tscr;': '\U0001d4af',
2053 'tscr;': '\U0001d4c9',
2058 'Tstrok;': '\u0166',
2059 'tstrok;': '\u0167',
2061 'twoheadleftarrow;': '\u219e',
2062 'twoheadrightarrow;': '\u21a0',
2070 'Uarrocir;': '\u2949',
2073 'Ubreve;': '\u016c',
2074 'ubreve;': '\u016d',
2082 'Udblac;': '\u0170',
2083 'udblac;': '\u0171',
2085 'ufisht;': '\u297e',
2086 'Ufr;': '\U0001d518',
2087 'ufr;': '\U0001d532',
2096 'ulcorn;': '\u231c',
2097 'ulcorner;': '\u231c',
2098 'ulcrop;': '\u230f',
2105 'UnderBrace;': '\u23df',
2106 'UnderBracket;': '\u23b5',
2107 'UnderParenthesis;': '\u23dd',
2109 'UnionPlus;': '\u228e',
2112 'Uopf;': '\U0001d54c',
2113 'uopf;': '\U0001d566',
2114 'UpArrow;': '\u2191',
2115 'Uparrow;': '\u21d1',
2116 'uparrow;': '\u2191',
2117 'UpArrowBar;': '\u2912',
2118 'UpArrowDownArrow;': '\u21c5',
2119 'UpDownArrow;': '\u2195',
2120 'Updownarrow;': '\u21d5',
2121 'updownarrow;': '\u2195',
2122 'UpEquilibrium;': '\u296e',
2123 'upharpoonleft;': '\u21bf',
2124 'upharpoonright;': '\u21be',
2126 'UpperLeftArrow;': '\u2196',
2127 'UpperRightArrow;': '\u2197',
2131 'Upsilon;': '\u03a5',
2132 'upsilon;': '\u03c5',
2134 'UpTeeArrow;': '\u21a5',
2135 'upuparrows;': '\u21c8',
2136 'urcorn;': '\u231d',
2137 'urcorner;': '\u231d',
2138 'urcrop;': '\u230e',
2142 'Uscr;': '\U0001d4b0',
2143 'uscr;': '\U0001d4ca',
2145 'Utilde;': '\u0168',
2146 'utilde;': '\u0169',
2154 'uwangle;': '\u29a7',
2155 'vangrt;': '\u299c',
2156 'varepsilon;': '\u03f5',
2157 'varkappa;': '\u03f0',
2158 'varnothing;': '\u2205',
2159 'varphi;': '\u03d5',
2161 'varpropto;': '\u221d',
2164 'varrho;': '\u03f1',
2165 'varsigma;': '\u03c2',
2166 'varsubsetneq;': '\u228a\ufe00',
2167 'varsubsetneqq;': '\u2acb\ufe00',
2168 'varsupsetneq;': '\u228b\ufe00',
2169 'varsupsetneqq;': '\u2acc\ufe00',
2170 'vartheta;': '\u03d1',
2171 'vartriangleleft;': '\u22b2',
2172 'vartriangleright;': '\u22b3',
2182 'Vdashl;': '\u2ae6',
2185 'veebar;': '\u22bb',
2187 'vellip;': '\u22ee',
2188 'Verbar;': '\u2016',
2192 'VerticalBar;': '\u2223',
2193 'VerticalLine;': '|',
2194 'VerticalSeparator;': '\u2758',
2195 'VerticalTilde;': '\u2240',
2196 'VeryThinSpace;': '\u200a',
2197 'Vfr;': '\U0001d519',
2198 'vfr;': '\U0001d533',
2200 'vnsub;': '\u2282\u20d2',
2201 'vnsup;': '\u2283\u20d2',
2202 'Vopf;': '\U0001d54d',
2203 'vopf;': '\U0001d567',
2206 'Vscr;': '\U0001d4b1',
2207 'vscr;': '\U0001d4cb',
2208 'vsubnE;': '\u2acb\ufe00',
2209 'vsubne;': '\u228a\ufe00',
2210 'vsupnE;': '\u2acc\ufe00',
2211 'vsupne;': '\u228b\ufe00',
2212 'Vvdash;': '\u22aa',
2213 'vzigzag;': '\u299a',
2216 'wedbar;': '\u2a5f',
2219 'wedgeq;': '\u2259',
2220 'weierp;': '\u2118',
2221 'Wfr;': '\U0001d51a',
2222 'wfr;': '\U0001d534',
2223 'Wopf;': '\U0001d54e',
2224 'wopf;': '\U0001d568',
2227 'wreath;': '\u2240',
2228 'Wscr;': '\U0001d4b2',
2229 'wscr;': '\U0001d4cc',
2234 'Xfr;': '\U0001d51b',
2235 'xfr;': '\U0001d535',
2245 'Xopf;': '\U0001d54f',
2246 'xopf;': '\U0001d569',
2247 'xoplus;': '\u2a01',
2248 'xotime;': '\u2a02',
2251 'Xscr;': '\U0001d4b3',
2252 'xscr;': '\U0001d4cd',
2253 'xsqcup;': '\u2a06',
2254 'xuplus;': '\u2a04',
2257 'xwedge;': '\u22c0',
2270 'Yfr;': '\U0001d51c',
2271 'yfr;': '\U0001d536',
2274 'Yopf;': '\U0001d550',
2275 'yopf;': '\U0001d56a',
2276 'Yscr;': '\U0001d4b4',
2277 'yscr;': '\U0001d4ce',
2283 'Zacute;': '\u0179',
2284 'zacute;': '\u017a',
2285 'Zcaron;': '\u017d',
2286 'zcaron;': '\u017e',
2291 'zeetrf;': '\u2128',
2292 'ZeroWidthSpace;': '\u200b',
2296 'zfr;': '\U0001d537',
2299 'zigrarr;': '\u21dd',
2301 'zopf;': '\U0001d56b',
2302 'Zscr;': '\U0001d4b5',
2303 'zscr;': '\U0001d4cf',
2309 import http
.client
as compat_http_client
2310 except ImportError: # Python 2
2311 import httplib
as compat_http_client
2314 from urllib
.error
import HTTPError
as compat_HTTPError
2315 except ImportError: # Python 2
2316 from urllib2
import HTTPError
as compat_HTTPError
2319 from urllib
.request
import urlretrieve
as compat_urlretrieve
2320 except ImportError: # Python 2
2321 from urllib
import urlretrieve
as compat_urlretrieve
2324 from html
.parser
import HTMLParser
as compat_HTMLParser
2325 except ImportError: # Python 2
2326 from HTMLParser
import HTMLParser
as compat_HTMLParser
2329 from HTMLParser
import HTMLParseError
as compat_HTMLParseError
2330 except ImportError: # Python <3.4
2332 from html
.parser
import HTMLParseError
as compat_HTMLParseError
2333 except ImportError: # Python >3.4
2335 # HTMLParseError has been deprecated in Python 3.3 and removed in
2336 # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
2337 # and uniform cross-version exceptiong handling
2338 class compat_HTMLParseError(Exception):
2342 from subprocess
import DEVNULL
2343 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
2345 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
2348 import http
.server
as compat_http_server
2350 import BaseHTTPServer
as compat_http_server
2353 compat_str
= unicode # Python 2
2358 from urllib
.parse
import unquote_to_bytes
as compat_urllib_parse_unquote_to_bytes
2359 from urllib
.parse
import unquote
as compat_urllib_parse_unquote
2360 from urllib
.parse
import unquote_plus
as compat_urllib_parse_unquote_plus
2361 except ImportError: # Python 2
2362 _asciire
= (compat_urllib_parse
._asciire
if hasattr(compat_urllib_parse
, '_asciire')
2363 else re
.compile(r
'([\x00-\x7f]+)'))
2365 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2366 # implementations from cpython 3.4.3's stdlib. Python 2's version
2367 # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
2369 def compat_urllib_parse_unquote_to_bytes(string
):
2370 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2371 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2372 # unescaped non-ASCII characters, which URIs should not.
2374 # Is it a string-like object?
2377 if isinstance(string
, compat_str
):
2378 string
= string
.encode('utf-8')
2379 bits
= string
.split(b
'%')
2384 for item
in bits
[1:]:
2386 append(compat_urllib_parse
._hextochr
[item
[:2]])
2391 return b
''.join(res
)
2393 def compat_urllib_parse_unquote(string
, encoding
='utf-8', errors
='replace'):
2394 """Replace %xx escapes by their single-character equivalent. The optional
2395 encoding and errors parameters specify how to decode percent-encoded
2396 sequences into Unicode characters, as accepted by the bytes.decode()
2398 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2399 sequences are replaced by a placeholder character.
2401 unquote('abc%20def') -> 'abc def'.
2403 if '%' not in string
:
2406 if encoding
is None:
2410 bits
= _asciire
.split(string
)
2413 for i
in range(1, len(bits
), 2):
2414 append(compat_urllib_parse_unquote_to_bytes(bits
[i
]).decode(encoding
, errors
))
2418 def compat_urllib_parse_unquote_plus(string
, encoding
='utf-8', errors
='replace'):
2419 """Like unquote(), but also replace plus signs by spaces, as required for
2420 unquoting HTML form values.
2422 unquote_plus('%7e/abc+def') -> '~/abc def'
2424 string
= string
.replace('+', ' ')
2425 return compat_urllib_parse_unquote(string
, encoding
, errors
)
2428 from urllib
.parse
import urlencode
as compat_urllib_parse_urlencode
2429 except ImportError: # Python 2
2430 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2431 # Possible solutions are to either port it from python 3 with all
2432 # the friends or manually ensure input query contains only byte strings.
2433 # We will stick with latter thus recursively encoding the whole query.
2434 def compat_urllib_parse_urlencode(query
, doseq
=0, encoding
='utf-8'):
2436 if isinstance(e
, dict):
2438 elif isinstance(e
, (list, tuple,)):
2439 list_e
= encode_list(e
)
2440 e
= tuple(list_e
) if isinstance(e
, tuple) else list_e
2441 elif isinstance(e
, compat_str
):
2442 e
= e
.encode(encoding
)
2446 return dict((encode_elem(k
), encode_elem(v
)) for k
, v
in d
.items())
2449 return [encode_elem(e
) for e
in l
]
2451 return compat_urllib_parse
.urlencode(encode_elem(query
), doseq
=doseq
)
2454 from urllib
.request
import DataHandler
as compat_urllib_request_DataHandler
2455 except ImportError: # Python < 3.4
2456 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2457 class compat_urllib_request_DataHandler(compat_urllib_request
.BaseHandler
):
2458 def data_open(self
, req
):
2459 # data URLs as specified in RFC 2397.
2461 # ignores POSTed data
2464 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2465 # mediatype := [ type "/" subtype ] *( ";" parameter )
2467 # parameter := attribute "=" value
2468 url
= req
.get_full_url()
2470 scheme
, data
= url
.split(':', 1)
2471 mediatype
, data
= data
.split(',', 1)
2473 # even base64 encoded data URLs might be quoted so unquote in any case:
2474 data
= compat_urllib_parse_unquote_to_bytes(data
)
2475 if mediatype
.endswith(';base64'):
2476 data
= binascii
.a2b_base64(data
)
2477 mediatype
= mediatype
[:-7]
2480 mediatype
= 'text/plain;charset=US-ASCII'
2482 headers
= email
.message_from_string(
2483 'Content-type: %s\nContent-length: %d\n' % (mediatype
, len(data
)))
2485 return compat_urllib_response
.addinfourl(io
.BytesIO(data
), headers
, url
)
2488 compat_basestring
= basestring
# Python 2
2490 compat_basestring
= str
2493 compat_chr
= unichr # Python 2
2498 from xml
.etree
.ElementTree
import ParseError
as compat_xml_parse_error
2499 except ImportError: # Python 2.6
2500 from xml
.parsers
.expat
import ExpatError
as compat_xml_parse_error
2503 etree
= xml
.etree
.ElementTree
2506 class _TreeBuilder(etree
.TreeBuilder
):
2507 def doctype(self
, name
, pubid
, system
):
2512 # xml.etree.ElementTree.Element is a method in Python <=2.6 and
2513 # the following will crash with:
2514 # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
2515 isinstance(None, xml
.etree
.ElementTree
.Element
)
2516 from xml
.etree
.ElementTree
import Element
as compat_etree_Element
2517 except TypeError: # Python <=2.6
2518 from xml
.etree
.ElementTree
import _ElementInterface
as compat_etree_Element
2520 if sys
.version_info
[0] >= 3:
2521 def compat_etree_fromstring(text
):
2522 return etree
.XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder()))
2524 # python 2.x tries to encode unicode strings with ascii (see the
2525 # XMLParser._fixtext method)
2527 _etree_iter
= etree
.Element
.iter
2528 except AttributeError: # Python <=2.6
2529 def _etree_iter(root
):
2530 for el
in root
.findall('*'):
2532 for sub
in _etree_iter(el
):
2535 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2537 def _XML(text
, parser
=None):
2539 parser
= etree
.XMLParser(target
=_TreeBuilder())
2541 return parser
.close()
2543 def _element_factory(*args
, **kwargs
):
2544 el
= etree
.Element(*args
, **kwargs
)
2545 for k
, v
in el
.items():
2546 if isinstance(v
, bytes):
2547 el
.set(k
, v
.decode('utf-8'))
2550 def compat_etree_fromstring(text
):
2551 doc
= _XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder(element_factory
=_element_factory
)))
2552 for el
in _etree_iter(doc
):
2553 if el
.text
is not None and isinstance(el
.text
, bytes):
2554 el
.text
= el
.text
.decode('utf-8')
2557 if hasattr(etree
, 'register_namespace'):
2558 compat_etree_register_namespace
= etree
.register_namespace
2560 def compat_etree_register_namespace(prefix
, uri
):
2561 """Register a namespace prefix.
2562 The registry is global, and any existing mapping for either the
2563 given prefix or the namespace URI will be removed.
2564 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2565 attributes in this namespace will be serialized with prefix if possible.
2566 ValueError is raised if prefix is reserved or is invalid.
2568 if re
.match(r
"ns\d+$", prefix
):
2569 raise ValueError("Prefix format reserved for internal use")
2570 for k
, v
in list(etree
._namespace
_map
.items()):
2571 if k
== uri
or v
== prefix
:
2572 del etree
._namespace
_map
[k
]
2573 etree
._namespace
_map
[uri
] = prefix
2575 if sys
.version_info
< (2, 7):
2576 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2577 # .//node does not match if a node is a direct child of . !
2578 def compat_xpath(xpath
):
2579 if isinstance(xpath
, compat_str
):
2580 xpath
= xpath
.encode('ascii')
2583 compat_xpath
= lambda xpath
: xpath
2586 from urllib
.parse
import parse_qs
as compat_parse_qs
2587 except ImportError: # Python 2
2588 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2589 # Python 2's version is apparently totally broken
2591 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
2592 encoding
='utf-8', errors
='replace'):
2593 qs
, _coerce_result
= qs
, compat_str
2594 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
2596 for name_value
in pairs
:
2597 if not name_value
and not strict_parsing
:
2599 nv
= name_value
.split('=', 1)
2602 raise ValueError('bad query field: %r' % (name_value
,))
2603 # Handle case of a control-name with no equal sign
2604 if keep_blank_values
:
2608 if len(nv
[1]) or keep_blank_values
:
2609 name
= nv
[0].replace('+', ' ')
2610 name
= compat_urllib_parse_unquote(
2611 name
, encoding
=encoding
, errors
=errors
)
2612 name
= _coerce_result(name
)
2613 value
= nv
[1].replace('+', ' ')
2614 value
= compat_urllib_parse_unquote(
2615 value
, encoding
=encoding
, errors
=errors
)
2616 value
= _coerce_result(value
)
2617 r
.append((name
, value
))
2620 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
2621 encoding
='utf-8', errors
='replace'):
2623 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
2624 encoding
=encoding
, errors
=errors
)
2625 for name
, value
in pairs
:
2626 if name
in parsed_result
:
2627 parsed_result
[name
].append(value
)
2629 parsed_result
[name
] = [value
]
2630 return parsed_result
2633 compat_os_name
= os
._name
if os
.name
== 'java' else os
.name
2636 if compat_os_name
== 'nt':
2637 def compat_shlex_quote(s
):
2638 return s
if re
.match(r
'^[-_\w./]+$', s
) else '"%s"' % s
.replace('"', '\\"')
2641 from shlex
import quote
as compat_shlex_quote
2642 except ImportError: # Python < 3.3
2643 def compat_shlex_quote(s
):
2644 if re
.match(r
'^[-_\w./]+$', s
):
2647 return "'" + s
.replace("'", "'\"'\"'") + "'"
2651 args
= shlex
.split('äøę')
2652 assert (isinstance(args
, list)
2653 and isinstance(args
[0], compat_str
)
2654 and args
[0] == 'äøę')
2655 compat_shlex_split
= shlex
.split
2656 except (AssertionError, UnicodeEncodeError):
2657 # Working around shlex issue with unicode strings on some python 2
2658 # versions (see http://bugs.python.org/issue1548891)
2659 def compat_shlex_split(s
, comments
=False, posix
=True):
2660 if isinstance(s
, compat_str
):
2661 s
= s
.encode('utf-8')
2662 return list(map(lambda s
: s
.decode('utf-8'), shlex
.split(s
, comments
, posix
)))
2672 if sys
.version_info
>= (3, 0):
2673 compat_getenv
= os
.getenv
2674 compat_expanduser
= os
.path
.expanduser
2676 def compat_setenv(key
, value
, env
=os
.environ
):
2679 # Environment variables should be decoded with filesystem encoding.
2680 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2682 def compat_getenv(key
, default
=None):
2683 from .utils
import get_filesystem_encoding
2684 env
= os
.getenv(key
, default
)
2686 env
= env
.decode(get_filesystem_encoding())
2689 def compat_setenv(key
, value
, env
=os
.environ
):
2691 from .utils
import get_filesystem_encoding
2692 return v
.encode(get_filesystem_encoding()) if isinstance(v
, compat_str
) else v
2693 env
[encode(key
)] = encode(value
)
2695 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2696 # environment variables with filesystem encoding. We will work around this by
2697 # providing adjusted implementations.
2698 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2699 # for different platforms with correct environment variables decoding.
2701 if compat_os_name
== 'posix':
2702 def compat_expanduser(path
):
2703 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2705 if not path
.startswith('~'):
2707 i
= path
.find('/', 1)
2711 if 'HOME' not in os
.environ
:
2713 userhome
= pwd
.getpwuid(os
.getuid()).pw_dir
2715 userhome
= compat_getenv('HOME')
2719 pwent
= pwd
.getpwnam(path
[1:i
])
2722 userhome
= pwent
.pw_dir
2723 userhome
= userhome
.rstrip('/')
2724 return (userhome
+ path
[i
:]) or '/'
2725 elif compat_os_name
in ('nt', 'ce'):
2726 def compat_expanduser(path
):
2727 """Expand ~ and ~user constructs.
2729 If user or $HOME is unknown, do nothing."""
2733 while i
< n
and path
[i
] not in '/\\':
2736 if 'HOME' in os
.environ
:
2737 userhome
= compat_getenv('HOME')
2738 elif 'USERPROFILE' in os
.environ
:
2739 userhome
= compat_getenv('USERPROFILE')
2740 elif 'HOMEPATH' not in os
.environ
:
2744 drive
= compat_getenv('HOMEDRIVE')
2747 userhome
= os
.path
.join(drive
, compat_getenv('HOMEPATH'))
2750 userhome
= os
.path
.join(os
.path
.dirname(userhome
), path
[1:i
])
2752 return userhome
+ path
[i
:]
2754 compat_expanduser
= os
.path
.expanduser
2757 if compat_os_name
== 'nt' and sys
.version_info
< (3, 8):
2758 # os.path.realpath on Windows does not follow symbolic links
2759 # prior to Python 3.8 (see https://bugs.python.org/issue9949)
2760 def compat_realpath(path
):
2761 while os
.path
.islink(path
):
2762 path
= os
.path
.abspath(os
.readlink(path
))
2765 compat_realpath
= os
.path
.realpath
2768 if sys
.version_info
< (3, 0):
2769 def compat_print(s
):
2770 from .utils
import preferredencoding
2771 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
2773 def compat_print(s
):
2774 assert isinstance(s
, compat_str
)
2778 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
2779 def compat_getpass(prompt
, *args
, **kwargs
):
2780 if isinstance(prompt
, compat_str
):
2781 from .utils
import preferredencoding
2782 prompt
= prompt
.encode(preferredencoding())
2783 return getpass
.getpass(prompt
, *args
, **kwargs
)
2785 compat_getpass
= getpass
.getpass
2788 compat_input
= raw_input
2789 except NameError: # Python 3
2790 compat_input
= input
2792 # Python < 2.6.5 require kwargs to be bytes
2796 _testfunc(**{'x': 0})
2798 def compat_kwargs(kwargs
):
2799 return dict((bytes(k
), v
) for k
, v
in kwargs
.items())
2801 compat_kwargs
= lambda kwargs
: kwargs
2805 compat_numeric_types
= (int, float, long, complex)
2806 except NameError: # Python 3
2807 compat_numeric_types
= (int, float, complex)
2811 compat_integer_types
= (int, long)
2812 except NameError: # Python 3
2813 compat_integer_types
= (int, )
2816 if sys
.version_info
< (2, 7):
2817 def compat_socket_create_connection(address
, timeout
, source_address
=None):
2818 host
, port
= address
2820 for res
in socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
):
2821 af
, socktype
, proto
, canonname
, sa
= res
2824 sock
= socket
.socket(af
, socktype
, proto
)
2825 sock
.settimeout(timeout
)
2827 sock
.bind(source_address
)
2830 except socket
.error
as _
:
2832 if sock
is not None:
2837 raise socket
.error('getaddrinfo returns an empty list')
2839 compat_socket_create_connection
= socket
.create_connection
2842 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
2843 # See http://bugs.python.org/issue9161 for what is broken
2844 def workaround_optparse_bug9161():
2845 op
= optparse
.OptionParser()
2846 og
= optparse
.OptionGroup(op
, 'foo')
2850 real_add_option
= optparse
.OptionGroup
.add_option
2852 def _compat_add_option(self
, *args
, **kwargs
):
2854 v
.encode('ascii', 'replace') if isinstance(v
, compat_str
)
2856 bargs
= [enc(a
) for a
in args
]
2858 (k
, enc(v
)) for k
, v
in kwargs
.items())
2859 return real_add_option(self
, *bargs
, **bkwargs
)
2860 optparse
.OptionGroup
.add_option
= _compat_add_option
2863 if hasattr(shutil
, 'get_terminal_size'): # Python >= 3.3
2864 compat_get_terminal_size
= shutil
.get_terminal_size
2866 _terminal_size
= collections
.namedtuple('terminal_size', ['columns', 'lines'])
2868 def compat_get_terminal_size(fallback
=(80, 24)):
2869 columns
= compat_getenv('COLUMNS')
2871 columns
= int(columns
)
2874 lines
= compat_getenv('LINES')
2880 if columns
is None or lines
is None or columns
<= 0 or lines
<= 0:
2882 sp
= subprocess
.Popen(
2884 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2885 out
, err
= sp
.communicate()
2886 _lines
, _columns
= map(int, out
.split())
2888 _columns
, _lines
= _terminal_size(*fallback
)
2890 if columns
is None or columns
<= 0:
2892 if lines
is None or lines
<= 0:
2894 return _terminal_size(columns
, lines
)
2897 itertools
.count(start
=0, step
=1)
2898 compat_itertools_count
= itertools
.count
2899 except TypeError: # Python 2.6
2900 def compat_itertools_count(start
=0, step
=1):
2906 if sys
.version_info
>= (3, 0):
2907 from tokenize
import tokenize
as compat_tokenize_tokenize
2909 from tokenize
import generate_tokens
as compat_tokenize_tokenize
2913 struct
.pack('!I', 0)
2915 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2916 # See https://bugs.python.org/issue19099
2917 def compat_struct_pack(spec
, *args
):
2918 if isinstance(spec
, compat_str
):
2919 spec
= spec
.encode('ascii')
2920 return struct
.pack(spec
, *args
)
2922 def compat_struct_unpack(spec
, *args
):
2923 if isinstance(spec
, compat_str
):
2924 spec
= spec
.encode('ascii')
2925 return struct
.unpack(spec
, *args
)
2927 class compat_Struct(struct
.Struct
):
2928 def __init__(self
, fmt
):
2929 if isinstance(fmt
, compat_str
):
2930 fmt
= fmt
.encode('ascii')
2931 super(compat_Struct
, self
).__init
__(fmt
)
2933 compat_struct_pack
= struct
.pack
2934 compat_struct_unpack
= struct
.unpack
2935 if platform
.python_implementation() == 'IronPython' and sys
.version_info
< (2, 7, 8):
2936 class compat_Struct(struct
.Struct
):
2937 def unpack(self
, string
):
2938 if not isinstance(string
, buffer): # noqa: F821
2939 string
= buffer(string
) # noqa: F821
2940 return super(compat_Struct
, self
).unpack(string
)
2942 compat_Struct
= struct
.Struct
2946 from future_builtins
import zip as compat_zip
2947 except ImportError: # not 2.6+ or is 3.x
2949 from itertools
import izip
as compat_zip
# < 2.5 or 3.x
2954 if sys
.version_info
< (3, 3):
2955 def compat_b64decode(s
, *args
, **kwargs
):
2956 if isinstance(s
, compat_str
):
2957 s
= s
.encode('ascii')
2958 return base64
.b64decode(s
, *args
, **kwargs
)
2960 compat_b64decode
= base64
.b64decode
2963 if platform
.python_implementation() == 'PyPy' and sys
.pypy_version_info
< (5, 4, 0):
2964 # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
2965 # names, see the original PyPy issue [1] and the youtube-dl one [2].
2966 # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
2967 # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
2968 def compat_ctypes_WINFUNCTYPE(*args
, **kwargs
):
2969 real
= ctypes
.WINFUNCTYPE(*args
, **kwargs
)
2971 def resf(tpl
, *args
, **kwargs
):
2973 return real((str(funcname
), dll
), *args
, **kwargs
)
2977 def compat_ctypes_WINFUNCTYPE(*args
, **kwargs
):
2978 return ctypes
.WINFUNCTYPE(*args
, **kwargs
)
2982 'compat_HTMLParseError',
2983 'compat_HTMLParser',
2987 'compat_basestring',
2991 'compat_ctypes_WINFUNCTYPE',
2992 'compat_etree_Element',
2993 'compat_etree_fromstring',
2994 'compat_etree_register_namespace',
2995 'compat_expanduser',
2996 'compat_get_terminal_size',
2999 'compat_html_entities',
3000 'compat_html_entities_html5',
3001 'compat_http_client',
3002 'compat_http_server',
3004 'compat_integer_types',
3005 'compat_itertools_count',
3007 'compat_numeric_types',
3014 'compat_shlex_quote',
3015 'compat_shlex_split',
3016 'compat_socket_create_connection',
3018 'compat_struct_pack',
3019 'compat_struct_unpack',
3020 'compat_subprocess_get_DEVNULL',
3021 'compat_tokenize_tokenize',
3022 'compat_urllib_error',
3023 'compat_urllib_parse',
3024 'compat_urllib_parse_unquote',
3025 'compat_urllib_parse_unquote_plus',
3026 'compat_urllib_parse_unquote_to_bytes',
3027 'compat_urllib_parse_urlencode',
3028 'compat_urllib_parse_urlparse',
3029 'compat_urllib_request',
3030 'compat_urllib_request_DataHandler',
3031 'compat_urllib_response',
3033 'compat_urlretrieve',
3034 'compat_xml_parse_error',
3037 'workaround_optparse_bug9161',