2 from __future__
import unicode_literals
22 import xml
.etree
.ElementTree
26 import urllib
.request
as compat_urllib_request
27 except ImportError: # Python 2
28 import urllib2
as compat_urllib_request
31 import urllib
.error
as compat_urllib_error
32 except ImportError: # Python 2
33 import urllib2
as compat_urllib_error
36 import urllib
.parse
as compat_urllib_parse
37 except ImportError: # Python 2
38 import urllib
as compat_urllib_parse
41 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
42 except ImportError: # Python 2
43 from urlparse
import urlparse
as compat_urllib_parse_urlparse
46 import urllib
.parse
as compat_urlparse
47 except ImportError: # Python 2
48 import urlparse
as compat_urlparse
51 import urllib
.response
as compat_urllib_response
52 except ImportError: # Python 2
53 import urllib
as compat_urllib_response
56 import http
.cookiejar
as compat_cookiejar
57 except ImportError: # Python 2
58 import cookielib
as compat_cookiejar
61 import http
.cookies
as compat_cookies
62 except ImportError: # Python 2
63 import Cookie
as compat_cookies
66 import html
.entities
as compat_html_entities
67 except ImportError: # Python 2
68 import htmlentitydefs
as compat_html_entities
71 compat_html_entities_html5
= compat_html_entities
.html5
72 except AttributeError:
73 # Copied from CPython 3.5.1 html/entities.py
74 compat_html_entities_html5
= {
83 'acE;': '\u223e\u0333',
103 'alefsym;': '\u2135',
118 'andslope;': '\u2a58',
124 'angmsdaa;': '\u29a8',
125 'angmsdab;': '\u29a9',
126 'angmsdac;': '\u29aa',
127 'angmsdad;': '\u29ab',
128 'angmsdae;': '\u29ac',
129 'angmsdaf;': '\u29ad',
130 'angmsdag;': '\u29ae',
131 'angmsdah;': '\u29af',
133 'angrtvb;': '\u22be',
134 'angrtvbd;': '\u299d',
137 'angzarr;': '\u237c',
140 'Aopf;': '\U0001d538',
141 'aopf;': '\U0001d552',
148 'ApplyFunction;': '\u2061',
150 'approxeq;': '\u224a',
155 'Ascr;': '\U0001d49c',
156 'ascr;': '\U0001d4b6',
160 'asympeq;': '\u224d',
169 'awconint;': '\u2233',
171 'backcong;': '\u224c',
172 'backepsilon;': '\u03f6',
173 'backprime;': '\u2035',
174 'backsim;': '\u223d',
175 'backsimeq;': '\u22cd',
176 'Backslash;': '\u2216',
181 'barwedge;': '\u2305',
183 'bbrktbrk;': '\u23b6',
189 'Because;': '\u2235',
190 'because;': '\u2235',
191 'bemptyv;': '\u29b0',
194 'Bernoullis;': '\u212c',
198 'between;': '\u226c',
199 'Bfr;': '\U0001d505',
200 'bfr;': '\U0001d51f',
202 'bigcirc;': '\u25ef',
204 'bigodot;': '\u2a00',
205 'bigoplus;': '\u2a01',
206 'bigotimes;': '\u2a02',
207 'bigsqcup;': '\u2a06',
208 'bigstar;': '\u2605',
209 'bigtriangledown;': '\u25bd',
210 'bigtriangleup;': '\u25b3',
211 'biguplus;': '\u2a04',
213 'bigwedge;': '\u22c0',
215 'blacklozenge;': '\u29eb',
216 'blacksquare;': '\u25aa',
217 'blacktriangle;': '\u25b4',
218 'blacktriangledown;': '\u25be',
219 'blacktriangleleft;': '\u25c2',
220 'blacktriangleright;': '\u25b8',
227 'bnequiv;': '\u2261\u20e5',
230 'Bopf;': '\U0001d539',
231 'bopf;': '\U0001d553',
254 'boxminus;': '\u229f',
255 'boxplus;': '\u229e',
256 'boxtimes;': '\u22a0',
285 'bscr;': '\U0001d4b7',
291 'bsolhsub;': '\u27c8',
304 'capbrcup;': '\u2a49',
308 'CapitalDifferentialD;': '\u2145',
309 'caps;': '\u2229\ufe00',
312 'Cayleys;': '\u212d',
322 'Cconint;': '\u2230',
324 'ccupssm;': '\u2a50',
330 'cemptyv;': '\u29b2',
333 'CenterDot;': '\xb7',
334 'centerdot;': '\xb7',
336 'cfr;': '\U0001d520',
340 'checkmark;': '\u2713',
346 'circlearrowleft;': '\u21ba',
347 'circlearrowright;': '\u21bb',
348 'circledast;': '\u229b',
349 'circledcirc;': '\u229a',
350 'circleddash;': '\u229d',
351 'CircleDot;': '\u2299',
353 'circledS;': '\u24c8',
354 'CircleMinus;': '\u2296',
355 'CirclePlus;': '\u2295',
356 'CircleTimes;': '\u2297',
359 'cirfnint;': '\u2a10',
361 'cirscir;': '\u29c2',
362 'ClockwiseContourIntegral;': '\u2232',
363 'CloseCurlyDoubleQuote;': '\u201d',
364 'CloseCurlyQuote;': '\u2019',
366 'clubsuit;': '\u2663',
371 'coloneq;': '\u2254',
376 'complement;': '\u2201',
377 'complexes;': '\u2102',
379 'congdot;': '\u2a6d',
380 'Congruent;': '\u2261',
383 'ContourIntegral;': '\u222e',
385 'copf;': '\U0001d554',
387 'Coproduct;': '\u2210',
393 'CounterClockwiseContourIntegral;': '\u2233',
397 'Cscr;': '\U0001d49e',
398 'cscr;': '\U0001d4b8',
404 'cudarrl;': '\u2938',
405 'cudarrr;': '\u2935',
409 'cularrp;': '\u293d',
412 'cupbrcap;': '\u2a48',
418 'cups;': '\u222a\ufe00',
420 'curarrm;': '\u293c',
421 'curlyeqprec;': '\u22de',
422 'curlyeqsucc;': '\u22df',
423 'curlyvee;': '\u22ce',
424 'curlywedge;': '\u22cf',
427 'curvearrowleft;': '\u21b6',
428 'curvearrowright;': '\u21b7',
431 'cwconint;': '\u2232',
443 'dbkarow;': '\u290f',
451 'ddagger;': '\u2021',
453 'DDotrahd;': '\u2911',
454 'ddotseq;': '\u2a77',
460 'demptyv;': '\u29b1',
462 'Dfr;': '\U0001d507',
463 'dfr;': '\U0001d521',
467 'DiacriticalAcute;': '\xb4',
468 'DiacriticalDot;': '\u02d9',
469 'DiacriticalDoubleAcute;': '\u02dd',
470 'DiacriticalGrave;': '`',
471 'DiacriticalTilde;': '\u02dc',
473 'Diamond;': '\u22c4',
474 'diamond;': '\u22c4',
475 'diamondsuit;': '\u2666',
478 'DifferentialD;': '\u2146',
479 'digamma;': '\u03dd',
484 'divideontimes;': '\u22c7',
491 'Dopf;': '\U0001d53b',
492 'dopf;': '\U0001d555',
497 'doteqdot;': '\u2251',
498 'DotEqual;': '\u2250',
499 'dotminus;': '\u2238',
500 'dotplus;': '\u2214',
501 'dotsquare;': '\u22a1',
502 'doublebarwedge;': '\u2306',
503 'DoubleContourIntegral;': '\u222f',
504 'DoubleDot;': '\xa8',
505 'DoubleDownArrow;': '\u21d3',
506 'DoubleLeftArrow;': '\u21d0',
507 'DoubleLeftRightArrow;': '\u21d4',
508 'DoubleLeftTee;': '\u2ae4',
509 'DoubleLongLeftArrow;': '\u27f8',
510 'DoubleLongLeftRightArrow;': '\u27fa',
511 'DoubleLongRightArrow;': '\u27f9',
512 'DoubleRightArrow;': '\u21d2',
513 'DoubleRightTee;': '\u22a8',
514 'DoubleUpArrow;': '\u21d1',
515 'DoubleUpDownArrow;': '\u21d5',
516 'DoubleVerticalBar;': '\u2225',
517 'DownArrow;': '\u2193',
518 'Downarrow;': '\u21d3',
519 'downarrow;': '\u2193',
520 'DownArrowBar;': '\u2913',
521 'DownArrowUpArrow;': '\u21f5',
522 'DownBreve;': '\u0311',
523 'downdownarrows;': '\u21ca',
524 'downharpoonleft;': '\u21c3',
525 'downharpoonright;': '\u21c2',
526 'DownLeftRightVector;': '\u2950',
527 'DownLeftTeeVector;': '\u295e',
528 'DownLeftVector;': '\u21bd',
529 'DownLeftVectorBar;': '\u2956',
530 'DownRightTeeVector;': '\u295f',
531 'DownRightVector;': '\u21c1',
532 'DownRightVectorBar;': '\u2957',
533 'DownTee;': '\u22a4',
534 'DownTeeArrow;': '\u21a7',
535 'drbkarow;': '\u2910',
538 'Dscr;': '\U0001d49f',
539 'dscr;': '\U0001d4b9',
550 'dwangle;': '\u29a6',
553 'dzigrarr;': '\u27ff',
575 'Efr;': '\U0001d508',
576 'efr;': '\U0001d522',
585 'Element;': '\u2208',
586 'elinters;': '\u23e7',
593 'emptyset;': '\u2205',
594 'EmptySmallSquare;': '\u25fb',
596 'EmptyVerySmallSquare;': '\u25ab',
605 'Eopf;': '\U0001d53c',
606 'eopf;': '\U0001d556',
611 'Epsilon;': '\u0395',
612 'epsilon;': '\u03b5',
615 'eqcolon;': '\u2255',
617 'eqslantgtr;': '\u2a96',
618 'eqslantless;': '\u2a95',
621 'EqualTilde;': '\u2242',
623 'Equilibrium;': '\u21cc',
625 'equivDD;': '\u2a78',
626 'eqvparsl;': '\u29e5',
648 'expectation;': '\u2130',
649 'ExponentialE;': '\u2147',
650 'exponentiale;': '\u2147',
651 'fallingdotseq;': '\u2252',
658 'Ffr;': '\U0001d509',
659 'ffr;': '\U0001d523',
661 'FilledSmallSquare;': '\u25fc',
662 'FilledVerySmallSquare;': '\u25aa',
668 'Fopf;': '\U0001d53d',
669 'fopf;': '\U0001d557',
674 'Fouriertrf;': '\u2131',
675 'fpartint;': '\u2a0d',
697 'fscr;': '\U0001d4bb',
719 'geqslant;': '\u2a7e',
723 'gesdoto;': '\u2a82',
724 'gesdotol;': '\u2a84',
725 'gesl;': '\u22db\ufe00',
727 'Gfr;': '\U0001d50a',
728 'gfr;': '\U0001d524',
740 'gnapprox;': '\u2a8a',
746 'Gopf;': '\U0001d53e',
747 'gopf;': '\U0001d558',
749 'GreaterEqual;': '\u2265',
750 'GreaterEqualLess;': '\u22db',
751 'GreaterFullEqual;': '\u2267',
752 'GreaterGreater;': '\u2aa2',
753 'GreaterLess;': '\u2277',
754 'GreaterSlantEqual;': '\u2a7e',
755 'GreaterTilde;': '\u2273',
756 'Gscr;': '\U0001d4a2',
770 'gtquest;': '\u2a7c',
771 'gtrapprox;': '\u2a86',
774 'gtreqless;': '\u22db',
775 'gtreqqless;': '\u2a8c',
776 'gtrless;': '\u2277',
778 'gvertneqq;': '\u2269\ufe00',
779 'gvnE;': '\u2269\ufe00',
788 'harrcir;': '\u2948',
795 'heartsuit;': '\u2665',
799 'hfr;': '\U0001d525',
800 'HilbertSpace;': '\u210b',
801 'hksearow;': '\u2925',
802 'hkswarow;': '\u2926',
805 'hookleftarrow;': '\u21a9',
806 'hookrightarrow;': '\u21aa',
808 'hopf;': '\U0001d559',
810 'HorizontalLine;': '\u2500',
812 'hscr;': '\U0001d4bd',
816 'HumpDownHump;': '\u224e',
817 'HumpEqual;': '\u224f',
838 'ifr;': '\U0001d526',
854 'ImaginaryI;': '\u2148',
855 'imagline;': '\u2110',
856 'imagpart;': '\u2111',
860 'Implies;': '\u21d2',
864 'infintie;': '\u29dd',
869 'integers;': '\u2124',
870 'Integral;': '\u222b',
871 'intercal;': '\u22ba',
872 'Intersection;': '\u22c2',
873 'intlarhk;': '\u2a17',
874 'intprod;': '\u2a3c',
875 'InvisibleComma;': '\u2063',
876 'InvisibleTimes;': '\u2062',
881 'Iopf;': '\U0001d540',
882 'iopf;': '\U0001d55a',
889 'iscr;': '\U0001d4be',
891 'isindot;': '\u22f5',
909 'Jfr;': '\U0001d50d',
910 'jfr;': '\U0001d527',
912 'Jopf;': '\U0001d541',
913 'jopf;': '\U0001d55b',
914 'Jscr;': '\U0001d4a5',
915 'jscr;': '\U0001d4bf',
927 'Kfr;': '\U0001d50e',
928 'kfr;': '\U0001d528',
934 'Kopf;': '\U0001d542',
935 'kopf;': '\U0001d55c',
936 'Kscr;': '\U0001d4a6',
937 'kscr;': '\U0001d4c0',
941 'laemptyv;': '\u29b4',
950 'Laplacetrf;': '\u2112',
957 'larrbfs;': '\u291f',
962 'larrsim;': '\u2973',
968 'lates;': '\u2aad\ufe00',
975 'lbrksld;': '\u298f',
976 'lbrkslu;': '\u298d',
988 'ldrdhar;': '\u2967',
989 'ldrushar;': '\u294b',
993 'LeftAngleBracket;': '\u27e8',
994 'LeftArrow;': '\u2190',
995 'Leftarrow;': '\u21d0',
996 'leftarrow;': '\u2190',
997 'LeftArrowBar;': '\u21e4',
998 'LeftArrowRightArrow;': '\u21c6',
999 'leftarrowtail;': '\u21a2',
1000 'LeftCeiling;': '\u2308',
1001 'LeftDoubleBracket;': '\u27e6',
1002 'LeftDownTeeVector;': '\u2961',
1003 'LeftDownVector;': '\u21c3',
1004 'LeftDownVectorBar;': '\u2959',
1005 'LeftFloor;': '\u230a',
1006 'leftharpoondown;': '\u21bd',
1007 'leftharpoonup;': '\u21bc',
1008 'leftleftarrows;': '\u21c7',
1009 'LeftRightArrow;': '\u2194',
1010 'Leftrightarrow;': '\u21d4',
1011 'leftrightarrow;': '\u2194',
1012 'leftrightarrows;': '\u21c6',
1013 'leftrightharpoons;': '\u21cb',
1014 'leftrightsquigarrow;': '\u21ad',
1015 'LeftRightVector;': '\u294e',
1016 'LeftTee;': '\u22a3',
1017 'LeftTeeArrow;': '\u21a4',
1018 'LeftTeeVector;': '\u295a',
1019 'leftthreetimes;': '\u22cb',
1020 'LeftTriangle;': '\u22b2',
1021 'LeftTriangleBar;': '\u29cf',
1022 'LeftTriangleEqual;': '\u22b4',
1023 'LeftUpDownVector;': '\u2951',
1024 'LeftUpTeeVector;': '\u2960',
1025 'LeftUpVector;': '\u21bf',
1026 'LeftUpVectorBar;': '\u2958',
1027 'LeftVector;': '\u21bc',
1028 'LeftVectorBar;': '\u2952',
1033 'leqslant;': '\u2a7d',
1036 'lesdot;': '\u2a7f',
1037 'lesdoto;': '\u2a81',
1038 'lesdotor;': '\u2a83',
1039 'lesg;': '\u22da\ufe00',
1040 'lesges;': '\u2a93',
1041 'lessapprox;': '\u2a85',
1042 'lessdot;': '\u22d6',
1043 'lesseqgtr;': '\u22da',
1044 'lesseqqgtr;': '\u2a8b',
1045 'LessEqualGreater;': '\u22da',
1046 'LessFullEqual;': '\u2266',
1047 'LessGreater;': '\u2276',
1048 'lessgtr;': '\u2276',
1049 'LessLess;': '\u2aa1',
1050 'lesssim;': '\u2272',
1051 'LessSlantEqual;': '\u2a7d',
1052 'LessTilde;': '\u2272',
1053 'lfisht;': '\u297c',
1054 'lfloor;': '\u230a',
1055 'Lfr;': '\U0001d50f',
1056 'lfr;': '\U0001d529',
1062 'lharul;': '\u296a',
1069 'llcorner;': '\u231e',
1070 'Lleftarrow;': '\u21da',
1071 'llhard;': '\u296b',
1073 'Lmidot;': '\u013f',
1074 'lmidot;': '\u0140',
1075 'lmoust;': '\u23b0',
1076 'lmoustache;': '\u23b0',
1078 'lnapprox;': '\u2a89',
1087 'LongLeftArrow;': '\u27f5',
1088 'Longleftarrow;': '\u27f8',
1089 'longleftarrow;': '\u27f5',
1090 'LongLeftRightArrow;': '\u27f7',
1091 'Longleftrightarrow;': '\u27fa',
1092 'longleftrightarrow;': '\u27f7',
1093 'longmapsto;': '\u27fc',
1094 'LongRightArrow;': '\u27f6',
1095 'Longrightarrow;': '\u27f9',
1096 'longrightarrow;': '\u27f6',
1097 'looparrowleft;': '\u21ab',
1098 'looparrowright;': '\u21ac',
1100 'Lopf;': '\U0001d543',
1101 'lopf;': '\U0001d55d',
1102 'loplus;': '\u2a2d',
1103 'lotimes;': '\u2a34',
1104 'lowast;': '\u2217',
1106 'LowerLeftArrow;': '\u2199',
1107 'LowerRightArrow;': '\u2198',
1109 'lozenge;': '\u25ca',
1112 'lparlt;': '\u2993',
1114 'lrcorner;': '\u231f',
1116 'lrhard;': '\u296d',
1119 'lsaquo;': '\u2039',
1121 'lscr;': '\U0001d4c1',
1129 'lsquor;': '\u201a',
1130 'Lstrok;': '\u0141',
1131 'lstrok;': '\u0142',
1140 'lthree;': '\u22cb',
1141 'ltimes;': '\u22c9',
1142 'ltlarr;': '\u2976',
1143 'ltquest;': '\u2a7b',
1147 'ltrPar;': '\u2996',
1148 'lurdshar;': '\u294a',
1149 'luruhar;': '\u2966',
1150 'lvertneqq;': '\u2268\ufe00',
1151 'lvnE;': '\u2268\ufe00',
1156 'maltese;': '\u2720',
1159 'mapsto;': '\u21a6',
1160 'mapstodown;': '\u21a7',
1161 'mapstoleft;': '\u21a4',
1162 'mapstoup;': '\u21a5',
1163 'marker;': '\u25ae',
1164 'mcomma;': '\u2a29',
1169 'measuredangle;': '\u2221',
1170 'MediumSpace;': '\u205f',
1171 'Mellintrf;': '\u2133',
1172 'Mfr;': '\U0001d510',
1173 'mfr;': '\U0001d52a',
1179 'midcir;': '\u2af0',
1183 'minusb;': '\u229f',
1184 'minusd;': '\u2238',
1185 'minusdu;': '\u2a2a',
1186 'MinusPlus;': '\u2213',
1189 'mnplus;': '\u2213',
1190 'models;': '\u22a7',
1191 'Mopf;': '\U0001d544',
1192 'mopf;': '\U0001d55e',
1195 'mscr;': '\U0001d4c2',
1196 'mstpos;': '\u223e',
1199 'multimap;': '\u22b8',
1202 'Nacute;': '\u0143',
1203 'nacute;': '\u0144',
1204 'nang;': '\u2220\u20d2',
1206 'napE;': '\u2a70\u0338',
1207 'napid;': '\u224b\u0338',
1209 'napprox;': '\u2249',
1211 'natural;': '\u266e',
1212 'naturals;': '\u2115',
1215 'nbump;': '\u224e\u0338',
1216 'nbumpe;': '\u224f\u0338',
1218 'Ncaron;': '\u0147',
1219 'ncaron;': '\u0148',
1220 'Ncedil;': '\u0145',
1221 'ncedil;': '\u0146',
1223 'ncongdot;': '\u2a6d\u0338',
1229 'nearhk;': '\u2924',
1232 'nearrow;': '\u2197',
1233 'nedot;': '\u2250\u0338',
1234 'NegativeMediumSpace;': '\u200b',
1235 'NegativeThickSpace;': '\u200b',
1236 'NegativeThinSpace;': '\u200b',
1237 'NegativeVeryThinSpace;': '\u200b',
1238 'nequiv;': '\u2262',
1239 'nesear;': '\u2928',
1240 'nesim;': '\u2242\u0338',
1241 'NestedGreaterGreater;': '\u226b',
1242 'NestedLessLess;': '\u226a',
1244 'nexist;': '\u2204',
1245 'nexists;': '\u2204',
1246 'Nfr;': '\U0001d511',
1247 'nfr;': '\U0001d52b',
1248 'ngE;': '\u2267\u0338',
1251 'ngeqq;': '\u2267\u0338',
1252 'ngeqslant;': '\u2a7e\u0338',
1253 'nges;': '\u2a7e\u0338',
1254 'nGg;': '\u22d9\u0338',
1256 'nGt;': '\u226b\u20d2',
1259 'nGtv;': '\u226b\u0338',
1272 'nlE;': '\u2266\u0338',
1274 'nLeftarrow;': '\u21cd',
1275 'nleftarrow;': '\u219a',
1276 'nLeftrightarrow;': '\u21ce',
1277 'nleftrightarrow;': '\u21ae',
1279 'nleqq;': '\u2266\u0338',
1280 'nleqslant;': '\u2a7d\u0338',
1281 'nles;': '\u2a7d\u0338',
1283 'nLl;': '\u22d8\u0338',
1285 'nLt;': '\u226a\u20d2',
1288 'nltrie;': '\u22ec',
1289 'nLtv;': '\u226a\u0338',
1291 'NoBreak;': '\u2060',
1292 'NonBreakingSpace;': '\xa0',
1294 'nopf;': '\U0001d55f',
1298 'NotCongruent;': '\u2262',
1299 'NotCupCap;': '\u226d',
1300 'NotDoubleVerticalBar;': '\u2226',
1301 'NotElement;': '\u2209',
1302 'NotEqual;': '\u2260',
1303 'NotEqualTilde;': '\u2242\u0338',
1304 'NotExists;': '\u2204',
1305 'NotGreater;': '\u226f',
1306 'NotGreaterEqual;': '\u2271',
1307 'NotGreaterFullEqual;': '\u2267\u0338',
1308 'NotGreaterGreater;': '\u226b\u0338',
1309 'NotGreaterLess;': '\u2279',
1310 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1311 'NotGreaterTilde;': '\u2275',
1312 'NotHumpDownHump;': '\u224e\u0338',
1313 'NotHumpEqual;': '\u224f\u0338',
1315 'notindot;': '\u22f5\u0338',
1316 'notinE;': '\u22f9\u0338',
1317 'notinva;': '\u2209',
1318 'notinvb;': '\u22f7',
1319 'notinvc;': '\u22f6',
1320 'NotLeftTriangle;': '\u22ea',
1321 'NotLeftTriangleBar;': '\u29cf\u0338',
1322 'NotLeftTriangleEqual;': '\u22ec',
1323 'NotLess;': '\u226e',
1324 'NotLessEqual;': '\u2270',
1325 'NotLessGreater;': '\u2278',
1326 'NotLessLess;': '\u226a\u0338',
1327 'NotLessSlantEqual;': '\u2a7d\u0338',
1328 'NotLessTilde;': '\u2274',
1329 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1330 'NotNestedLessLess;': '\u2aa1\u0338',
1332 'notniva;': '\u220c',
1333 'notnivb;': '\u22fe',
1334 'notnivc;': '\u22fd',
1335 'NotPrecedes;': '\u2280',
1336 'NotPrecedesEqual;': '\u2aaf\u0338',
1337 'NotPrecedesSlantEqual;': '\u22e0',
1338 'NotReverseElement;': '\u220c',
1339 'NotRightTriangle;': '\u22eb',
1340 'NotRightTriangleBar;': '\u29d0\u0338',
1341 'NotRightTriangleEqual;': '\u22ed',
1342 'NotSquareSubset;': '\u228f\u0338',
1343 'NotSquareSubsetEqual;': '\u22e2',
1344 'NotSquareSuperset;': '\u2290\u0338',
1345 'NotSquareSupersetEqual;': '\u22e3',
1346 'NotSubset;': '\u2282\u20d2',
1347 'NotSubsetEqual;': '\u2288',
1348 'NotSucceeds;': '\u2281',
1349 'NotSucceedsEqual;': '\u2ab0\u0338',
1350 'NotSucceedsSlantEqual;': '\u22e1',
1351 'NotSucceedsTilde;': '\u227f\u0338',
1352 'NotSuperset;': '\u2283\u20d2',
1353 'NotSupersetEqual;': '\u2289',
1354 'NotTilde;': '\u2241',
1355 'NotTildeEqual;': '\u2244',
1356 'NotTildeFullEqual;': '\u2247',
1357 'NotTildeTilde;': '\u2249',
1358 'NotVerticalBar;': '\u2224',
1360 'nparallel;': '\u2226',
1361 'nparsl;': '\u2afd\u20e5',
1362 'npart;': '\u2202\u0338',
1363 'npolint;': '\u2a14',
1365 'nprcue;': '\u22e0',
1366 'npre;': '\u2aaf\u0338',
1368 'npreceq;': '\u2aaf\u0338',
1371 'nrarrc;': '\u2933\u0338',
1372 'nrarrw;': '\u219d\u0338',
1373 'nRightarrow;': '\u21cf',
1374 'nrightarrow;': '\u219b',
1376 'nrtrie;': '\u22ed',
1378 'nsccue;': '\u22e1',
1379 'nsce;': '\u2ab0\u0338',
1380 'Nscr;': '\U0001d4a9',
1381 'nscr;': '\U0001d4c3',
1382 'nshortmid;': '\u2224',
1383 'nshortparallel;': '\u2226',
1386 'nsimeq;': '\u2244',
1389 'nsqsube;': '\u22e2',
1390 'nsqsupe;': '\u22e3',
1392 'nsubE;': '\u2ac5\u0338',
1394 'nsubset;': '\u2282\u20d2',
1395 'nsubseteq;': '\u2288',
1396 'nsubseteqq;': '\u2ac5\u0338',
1398 'nsucceq;': '\u2ab0\u0338',
1400 'nsupE;': '\u2ac6\u0338',
1402 'nsupset;': '\u2283\u20d2',
1403 'nsupseteq;': '\u2289',
1404 'nsupseteqq;': '\u2ac6\u0338',
1411 'ntriangleleft;': '\u22ea',
1412 'ntrianglelefteq;': '\u22ec',
1413 'ntriangleright;': '\u22eb',
1414 'ntrianglerighteq;': '\u22ed',
1418 'numero;': '\u2116',
1420 'nvap;': '\u224d\u20d2',
1421 'nVDash;': '\u22af',
1422 'nVdash;': '\u22ae',
1423 'nvDash;': '\u22ad',
1424 'nvdash;': '\u22ac',
1425 'nvge;': '\u2265\u20d2',
1427 'nvHarr;': '\u2904',
1428 'nvinfin;': '\u29de',
1429 'nvlArr;': '\u2902',
1430 'nvle;': '\u2264\u20d2',
1432 'nvltrie;': '\u22b4\u20d2',
1433 'nvrArr;': '\u2903',
1434 'nvrtrie;': '\u22b5\u20d2',
1435 'nvsim;': '\u223c\u20d2',
1436 'nwarhk;': '\u2923',
1439 'nwarrow;': '\u2196',
1440 'nwnear;': '\u2927',
1454 'Odblac;': '\u0150',
1455 'odblac;': '\u0151',
1458 'odsold;': '\u29bc',
1462 'Ofr;': '\U0001d512',
1463 'ofr;': '\U0001d52c',
1475 'olcross;': '\u29bb',
1482 'Omicron;': '\u039f',
1483 'omicron;': '\u03bf',
1485 'ominus;': '\u2296',
1486 'Oopf;': '\U0001d546',
1487 'oopf;': '\U0001d560',
1489 'OpenCurlyDoubleQuote;': '\u201c',
1490 'OpenCurlyQuote;': '\u2018',
1498 'orderof;': '\u2134',
1503 'origof;': '\u22b6',
1505 'orslope;': '\u2a57',
1508 'Oscr;': '\U0001d4aa',
1519 'Otimes;': '\u2a37',
1520 'otimes;': '\u2297',
1521 'otimesas;': '\u2a36',
1527 'OverBar;': '\u203e',
1528 'OverBrace;': '\u23de',
1529 'OverBracket;': '\u23b4',
1530 'OverParenthesis;': '\u23dc',
1534 'parallel;': '\u2225',
1535 'parsim;': '\u2af3',
1538 'PartialD;': '\u2202',
1543 'permil;': '\u2030',
1545 'pertenk;': '\u2031',
1546 'Pfr;': '\U0001d513',
1547 'pfr;': '\U0001d52d',
1551 'phmmat;': '\u2133',
1555 'pitchfork;': '\u22d4',
1557 'planck;': '\u210f',
1558 'planckh;': '\u210e',
1559 'plankv;': '\u210f',
1561 'plusacir;': '\u2a23',
1563 'pluscir;': '\u2a22',
1564 'plusdo;': '\u2214',
1565 'plusdu;': '\u2a25',
1567 'PlusMinus;': '\xb1',
1570 'plussim;': '\u2a26',
1571 'plustwo;': '\u2a27',
1573 'Poincareplane;': '\u210c',
1574 'pointint;': '\u2a15',
1576 'popf;': '\U0001d561',
1586 'precapprox;': '\u2ab7',
1587 'preccurlyeq;': '\u227c',
1588 'Precedes;': '\u227a',
1589 'PrecedesEqual;': '\u2aaf',
1590 'PrecedesSlantEqual;': '\u227c',
1591 'PrecedesTilde;': '\u227e',
1592 'preceq;': '\u2aaf',
1593 'precnapprox;': '\u2ab9',
1594 'precneqq;': '\u2ab5',
1595 'precnsim;': '\u22e8',
1596 'precsim;': '\u227e',
1599 'primes;': '\u2119',
1602 'prnsim;': '\u22e8',
1604 'Product;': '\u220f',
1605 'profalar;': '\u232e',
1606 'profline;': '\u2312',
1607 'profsurf;': '\u2313',
1609 'Proportion;': '\u2237',
1610 'Proportional;': '\u221d',
1611 'propto;': '\u221d',
1613 'prurel;': '\u22b0',
1614 'Pscr;': '\U0001d4ab',
1615 'pscr;': '\U0001d4c5',
1618 'puncsp;': '\u2008',
1619 'Qfr;': '\U0001d514',
1620 'qfr;': '\U0001d52e',
1623 'qopf;': '\U0001d562',
1624 'qprime;': '\u2057',
1625 'Qscr;': '\U0001d4ac',
1626 'qscr;': '\U0001d4c6',
1627 'quaternions;': '\u210d',
1628 'quatint;': '\u2a16',
1630 'questeq;': '\u225f',
1636 'race;': '\u223d\u0331',
1637 'Racute;': '\u0154',
1638 'racute;': '\u0155',
1640 'raemptyv;': '\u29b3',
1645 'rangle;': '\u27e9',
1651 'rarrap;': '\u2975',
1653 'rarrbfs;': '\u2920',
1655 'rarrfs;': '\u291e',
1656 'rarrhk;': '\u21aa',
1657 'rarrlp;': '\u21ac',
1658 'rarrpl;': '\u2945',
1659 'rarrsim;': '\u2974',
1660 'Rarrtl;': '\u2916',
1661 'rarrtl;': '\u21a3',
1663 'rAtail;': '\u291c',
1664 'ratail;': '\u291a',
1666 'rationals;': '\u211a',
1674 'rbrksld;': '\u298e',
1675 'rbrkslu;': '\u2990',
1676 'Rcaron;': '\u0158',
1677 'rcaron;': '\u0159',
1678 'Rcedil;': '\u0156',
1679 'rcedil;': '\u0157',
1685 'rdldhar;': '\u2969',
1687 'rdquor;': '\u201d',
1691 'realine;': '\u211b',
1692 'realpart;': '\u211c',
1699 'ReverseElement;': '\u220b',
1700 'ReverseEquilibrium;': '\u21cb',
1701 'ReverseUpEquilibrium;': '\u296f',
1702 'rfisht;': '\u297d',
1703 'rfloor;': '\u230b',
1705 'rfr;': '\U0001d52f',
1709 'rharul;': '\u296c',
1713 'RightAngleBracket;': '\u27e9',
1714 'RightArrow;': '\u2192',
1715 'Rightarrow;': '\u21d2',
1716 'rightarrow;': '\u2192',
1717 'RightArrowBar;': '\u21e5',
1718 'RightArrowLeftArrow;': '\u21c4',
1719 'rightarrowtail;': '\u21a3',
1720 'RightCeiling;': '\u2309',
1721 'RightDoubleBracket;': '\u27e7',
1722 'RightDownTeeVector;': '\u295d',
1723 'RightDownVector;': '\u21c2',
1724 'RightDownVectorBar;': '\u2955',
1725 'RightFloor;': '\u230b',
1726 'rightharpoondown;': '\u21c1',
1727 'rightharpoonup;': '\u21c0',
1728 'rightleftarrows;': '\u21c4',
1729 'rightleftharpoons;': '\u21cc',
1730 'rightrightarrows;': '\u21c9',
1731 'rightsquigarrow;': '\u219d',
1732 'RightTee;': '\u22a2',
1733 'RightTeeArrow;': '\u21a6',
1734 'RightTeeVector;': '\u295b',
1735 'rightthreetimes;': '\u22cc',
1736 'RightTriangle;': '\u22b3',
1737 'RightTriangleBar;': '\u29d0',
1738 'RightTriangleEqual;': '\u22b5',
1739 'RightUpDownVector;': '\u294f',
1740 'RightUpTeeVector;': '\u295c',
1741 'RightUpVector;': '\u21be',
1742 'RightUpVectorBar;': '\u2954',
1743 'RightVector;': '\u21c0',
1744 'RightVectorBar;': '\u2953',
1746 'risingdotseq;': '\u2253',
1750 'rmoust;': '\u23b1',
1751 'rmoustache;': '\u23b1',
1758 'ropf;': '\U0001d563',
1759 'roplus;': '\u2a2e',
1760 'rotimes;': '\u2a35',
1761 'RoundImplies;': '\u2970',
1763 'rpargt;': '\u2994',
1764 'rppolint;': '\u2a12',
1766 'Rrightarrow;': '\u21db',
1767 'rsaquo;': '\u203a',
1769 'rscr;': '\U0001d4c7',
1774 'rsquor;': '\u2019',
1775 'rthree;': '\u22cc',
1776 'rtimes;': '\u22ca',
1780 'rtriltri;': '\u29ce',
1781 'RuleDelayed;': '\u29f4',
1782 'ruluhar;': '\u2968',
1784 'Sacute;': '\u015a',
1785 'sacute;': '\u015b',
1790 'Scaron;': '\u0160',
1791 'scaron;': '\u0161',
1795 'Scedil;': '\u015e',
1796 'scedil;': '\u015f',
1801 'scnsim;': '\u22e9',
1802 'scpolint;': '\u2a13',
1809 'searhk;': '\u2925',
1812 'searrow;': '\u2198',
1816 'seswar;': '\u2929',
1817 'setminus;': '\u2216',
1820 'Sfr;': '\U0001d516',
1821 'sfr;': '\U0001d530',
1822 'sfrown;': '\u2322',
1824 'SHCHcy;': '\u0429',
1825 'shchcy;': '\u0449',
1828 'ShortDownArrow;': '\u2193',
1829 'ShortLeftArrow;': '\u2190',
1830 'shortmid;': '\u2223',
1831 'shortparallel;': '\u2225',
1832 'ShortRightArrow;': '\u2192',
1833 'ShortUpArrow;': '\u2191',
1838 'sigmaf;': '\u03c2',
1839 'sigmav;': '\u03c2',
1841 'simdot;': '\u2a6a',
1849 'simplus;': '\u2a24',
1850 'simrarr;': '\u2972',
1852 'SmallCircle;': '\u2218',
1853 'smallsetminus;': '\u2216',
1854 'smashp;': '\u2a33',
1855 'smeparsl;': '\u29e4',
1860 'smtes;': '\u2aac\ufe00',
1861 'SOFTcy;': '\u042c',
1862 'softcy;': '\u044c',
1865 'solbar;': '\u233f',
1866 'Sopf;': '\U0001d54a',
1867 'sopf;': '\U0001d564',
1868 'spades;': '\u2660',
1869 'spadesuit;': '\u2660',
1872 'sqcaps;': '\u2293\ufe00',
1874 'sqcups;': '\u2294\ufe00',
1877 'sqsube;': '\u2291',
1878 'sqsubset;': '\u228f',
1879 'sqsubseteq;': '\u2291',
1881 'sqsupe;': '\u2292',
1882 'sqsupset;': '\u2290',
1883 'sqsupseteq;': '\u2292',
1885 'Square;': '\u25a1',
1886 'square;': '\u25a1',
1887 'SquareIntersection;': '\u2293',
1888 'SquareSubset;': '\u228f',
1889 'SquareSubsetEqual;': '\u2291',
1890 'SquareSuperset;': '\u2290',
1891 'SquareSupersetEqual;': '\u2292',
1892 'SquareUnion;': '\u2294',
1893 'squarf;': '\u25aa',
1896 'Sscr;': '\U0001d4ae',
1897 'sscr;': '\U0001d4c8',
1898 'ssetmn;': '\u2216',
1899 'ssmile;': '\u2323',
1900 'sstarf;': '\u22c6',
1904 'straightepsilon;': '\u03f5',
1905 'straightphi;': '\u03d5',
1909 'subdot;': '\u2abd',
1912 'subedot;': '\u2ac3',
1913 'submult;': '\u2ac1',
1916 'subplus;': '\u2abf',
1917 'subrarr;': '\u2979',
1918 'Subset;': '\u22d0',
1919 'subset;': '\u2282',
1920 'subseteq;': '\u2286',
1921 'subseteqq;': '\u2ac5',
1922 'SubsetEqual;': '\u2286',
1923 'subsetneq;': '\u228a',
1924 'subsetneqq;': '\u2acb',
1925 'subsim;': '\u2ac7',
1926 'subsub;': '\u2ad5',
1927 'subsup;': '\u2ad3',
1929 'succapprox;': '\u2ab8',
1930 'succcurlyeq;': '\u227d',
1931 'Succeeds;': '\u227b',
1932 'SucceedsEqual;': '\u2ab0',
1933 'SucceedsSlantEqual;': '\u227d',
1934 'SucceedsTilde;': '\u227f',
1935 'succeq;': '\u2ab0',
1936 'succnapprox;': '\u2aba',
1937 'succneqq;': '\u2ab6',
1938 'succnsim;': '\u22e9',
1939 'succsim;': '\u227f',
1940 'SuchThat;': '\u220b',
1952 'supdot;': '\u2abe',
1953 'supdsub;': '\u2ad8',
1956 'supedot;': '\u2ac4',
1957 'Superset;': '\u2283',
1958 'SupersetEqual;': '\u2287',
1959 'suphsol;': '\u27c9',
1960 'suphsub;': '\u2ad7',
1961 'suplarr;': '\u297b',
1962 'supmult;': '\u2ac2',
1965 'supplus;': '\u2ac0',
1966 'Supset;': '\u22d1',
1967 'supset;': '\u2283',
1968 'supseteq;': '\u2287',
1969 'supseteqq;': '\u2ac6',
1970 'supsetneq;': '\u228b',
1971 'supsetneqq;': '\u2acc',
1972 'supsim;': '\u2ac8',
1973 'supsub;': '\u2ad4',
1974 'supsup;': '\u2ad6',
1975 'swarhk;': '\u2926',
1978 'swarrow;': '\u2199',
1979 'swnwar;': '\u292a',
1983 'target;': '\u2316',
1987 'Tcaron;': '\u0164',
1988 'tcaron;': '\u0165',
1989 'Tcedil;': '\u0162',
1990 'tcedil;': '\u0163',
1994 'telrec;': '\u2315',
1995 'Tfr;': '\U0001d517',
1996 'tfr;': '\U0001d531',
1997 'there4;': '\u2234',
1998 'Therefore;': '\u2234',
1999 'therefore;': '\u2234',
2002 'thetasym;': '\u03d1',
2003 'thetav;': '\u03d1',
2004 'thickapprox;': '\u2248',
2005 'thicksim;': '\u223c',
2006 'ThickSpace;': '\u205f\u200a',
2007 'thinsp;': '\u2009',
2008 'ThinSpace;': '\u2009',
2010 'thksim;': '\u223c',
2017 'TildeEqual;': '\u2243',
2018 'TildeFullEqual;': '\u2245',
2019 'TildeTilde;': '\u2248',
2022 'timesb;': '\u22a0',
2023 'timesbar;': '\u2a31',
2024 'timesd;': '\u2a30',
2028 'topbot;': '\u2336',
2029 'topcir;': '\u2af1',
2030 'Topf;': '\U0001d54b',
2031 'topf;': '\U0001d565',
2032 'topfork;': '\u2ada',
2034 'tprime;': '\u2034',
2037 'triangle;': '\u25b5',
2038 'triangledown;': '\u25bf',
2039 'triangleleft;': '\u25c3',
2040 'trianglelefteq;': '\u22b4',
2041 'triangleq;': '\u225c',
2042 'triangleright;': '\u25b9',
2043 'trianglerighteq;': '\u22b5',
2044 'tridot;': '\u25ec',
2046 'triminus;': '\u2a3a',
2047 'TripleDot;': '\u20db',
2048 'triplus;': '\u2a39',
2050 'tritime;': '\u2a3b',
2051 'trpezium;': '\u23e2',
2052 'Tscr;': '\U0001d4af',
2053 'tscr;': '\U0001d4c9',
2058 'Tstrok;': '\u0166',
2059 'tstrok;': '\u0167',
2061 'twoheadleftarrow;': '\u219e',
2062 'twoheadrightarrow;': '\u21a0',
2070 'Uarrocir;': '\u2949',
2073 'Ubreve;': '\u016c',
2074 'ubreve;': '\u016d',
2082 'Udblac;': '\u0170',
2083 'udblac;': '\u0171',
2085 'ufisht;': '\u297e',
2086 'Ufr;': '\U0001d518',
2087 'ufr;': '\U0001d532',
2096 'ulcorn;': '\u231c',
2097 'ulcorner;': '\u231c',
2098 'ulcrop;': '\u230f',
2105 'UnderBrace;': '\u23df',
2106 'UnderBracket;': '\u23b5',
2107 'UnderParenthesis;': '\u23dd',
2109 'UnionPlus;': '\u228e',
2112 'Uopf;': '\U0001d54c',
2113 'uopf;': '\U0001d566',
2114 'UpArrow;': '\u2191',
2115 'Uparrow;': '\u21d1',
2116 'uparrow;': '\u2191',
2117 'UpArrowBar;': '\u2912',
2118 'UpArrowDownArrow;': '\u21c5',
2119 'UpDownArrow;': '\u2195',
2120 'Updownarrow;': '\u21d5',
2121 'updownarrow;': '\u2195',
2122 'UpEquilibrium;': '\u296e',
2123 'upharpoonleft;': '\u21bf',
2124 'upharpoonright;': '\u21be',
2126 'UpperLeftArrow;': '\u2196',
2127 'UpperRightArrow;': '\u2197',
2131 'Upsilon;': '\u03a5',
2132 'upsilon;': '\u03c5',
2134 'UpTeeArrow;': '\u21a5',
2135 'upuparrows;': '\u21c8',
2136 'urcorn;': '\u231d',
2137 'urcorner;': '\u231d',
2138 'urcrop;': '\u230e',
2142 'Uscr;': '\U0001d4b0',
2143 'uscr;': '\U0001d4ca',
2145 'Utilde;': '\u0168',
2146 'utilde;': '\u0169',
2154 'uwangle;': '\u29a7',
2155 'vangrt;': '\u299c',
2156 'varepsilon;': '\u03f5',
2157 'varkappa;': '\u03f0',
2158 'varnothing;': '\u2205',
2159 'varphi;': '\u03d5',
2161 'varpropto;': '\u221d',
2164 'varrho;': '\u03f1',
2165 'varsigma;': '\u03c2',
2166 'varsubsetneq;': '\u228a\ufe00',
2167 'varsubsetneqq;': '\u2acb\ufe00',
2168 'varsupsetneq;': '\u228b\ufe00',
2169 'varsupsetneqq;': '\u2acc\ufe00',
2170 'vartheta;': '\u03d1',
2171 'vartriangleleft;': '\u22b2',
2172 'vartriangleright;': '\u22b3',
2182 'Vdashl;': '\u2ae6',
2185 'veebar;': '\u22bb',
2187 'vellip;': '\u22ee',
2188 'Verbar;': '\u2016',
2192 'VerticalBar;': '\u2223',
2193 'VerticalLine;': '|',
2194 'VerticalSeparator;': '\u2758',
2195 'VerticalTilde;': '\u2240',
2196 'VeryThinSpace;': '\u200a',
2197 'Vfr;': '\U0001d519',
2198 'vfr;': '\U0001d533',
2200 'vnsub;': '\u2282\u20d2',
2201 'vnsup;': '\u2283\u20d2',
2202 'Vopf;': '\U0001d54d',
2203 'vopf;': '\U0001d567',
2206 'Vscr;': '\U0001d4b1',
2207 'vscr;': '\U0001d4cb',
2208 'vsubnE;': '\u2acb\ufe00',
2209 'vsubne;': '\u228a\ufe00',
2210 'vsupnE;': '\u2acc\ufe00',
2211 'vsupne;': '\u228b\ufe00',
2212 'Vvdash;': '\u22aa',
2213 'vzigzag;': '\u299a',
2216 'wedbar;': '\u2a5f',
2219 'wedgeq;': '\u2259',
2220 'weierp;': '\u2118',
2221 'Wfr;': '\U0001d51a',
2222 'wfr;': '\U0001d534',
2223 'Wopf;': '\U0001d54e',
2224 'wopf;': '\U0001d568',
2227 'wreath;': '\u2240',
2228 'Wscr;': '\U0001d4b2',
2229 'wscr;': '\U0001d4cc',
2234 'Xfr;': '\U0001d51b',
2235 'xfr;': '\U0001d535',
2245 'Xopf;': '\U0001d54f',
2246 'xopf;': '\U0001d569',
2247 'xoplus;': '\u2a01',
2248 'xotime;': '\u2a02',
2251 'Xscr;': '\U0001d4b3',
2252 'xscr;': '\U0001d4cd',
2253 'xsqcup;': '\u2a06',
2254 'xuplus;': '\u2a04',
2257 'xwedge;': '\u22c0',
2270 'Yfr;': '\U0001d51c',
2271 'yfr;': '\U0001d536',
2274 'Yopf;': '\U0001d550',
2275 'yopf;': '\U0001d56a',
2276 'Yscr;': '\U0001d4b4',
2277 'yscr;': '\U0001d4ce',
2283 'Zacute;': '\u0179',
2284 'zacute;': '\u017a',
2285 'Zcaron;': '\u017d',
2286 'zcaron;': '\u017e',
2291 'zeetrf;': '\u2128',
2292 'ZeroWidthSpace;': '\u200b',
2296 'zfr;': '\U0001d537',
2299 'zigrarr;': '\u21dd',
2301 'zopf;': '\U0001d56b',
2302 'Zscr;': '\U0001d4b5',
2303 'zscr;': '\U0001d4cf',
2309 import http
.client
as compat_http_client
2310 except ImportError: # Python 2
2311 import httplib
as compat_http_client
2314 from urllib
.error
import HTTPError
as compat_HTTPError
2315 except ImportError: # Python 2
2316 from urllib2
import HTTPError
as compat_HTTPError
2319 from urllib
.request
import urlretrieve
as compat_urlretrieve
2320 except ImportError: # Python 2
2321 from urllib
import urlretrieve
as compat_urlretrieve
2324 from html
.parser
import HTMLParser
as compat_HTMLParser
2325 except ImportError: # Python 2
2326 from HTMLParser
import HTMLParser
as compat_HTMLParser
2329 from HTMLParser
import HTMLParseError
as compat_HTMLParseError
2330 except ImportError: # Python <3.4
2332 from html
.parser
import HTMLParseError
as compat_HTMLParseError
2333 except ImportError: # Python >3.4
2335 # HTMLParseError has been deprecated in Python 3.3 and removed in
2336 # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
2337 # and uniform cross-version exceptiong handling
2338 class compat_HTMLParseError(Exception):
2342 from subprocess
import DEVNULL
2343 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
2345 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
2348 import http
.server
as compat_http_server
2350 import BaseHTTPServer
as compat_http_server
2353 compat_str
= unicode # Python 2
2358 from urllib
.parse
import unquote_to_bytes
as compat_urllib_parse_unquote_to_bytes
2359 from urllib
.parse
import unquote
as compat_urllib_parse_unquote
2360 from urllib
.parse
import unquote_plus
as compat_urllib_parse_unquote_plus
2361 except ImportError: # Python 2
2362 _asciire
= (compat_urllib_parse
._asciire
if hasattr(compat_urllib_parse
, '_asciire')
2363 else re
.compile(r
'([\x00-\x7f]+)'))
2365 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2366 # implementations from cpython 3.4.3's stdlib. Python 2's version
2367 # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
2369 def compat_urllib_parse_unquote_to_bytes(string
):
2370 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2371 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2372 # unescaped non-ASCII characters, which URIs should not.
2374 # Is it a string-like object?
2377 if isinstance(string
, compat_str
):
2378 string
= string
.encode('utf-8')
2379 bits
= string
.split(b
'%')
2384 for item
in bits
[1:]:
2386 append(compat_urllib_parse
._hextochr
[item
[:2]])
2391 return b
''.join(res
)
2393 def compat_urllib_parse_unquote(string
, encoding
='utf-8', errors
='replace'):
2394 """Replace %xx escapes by their single-character equivalent. The optional
2395 encoding and errors parameters specify how to decode percent-encoded
2396 sequences into Unicode characters, as accepted by the bytes.decode()
2398 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2399 sequences are replaced by a placeholder character.
2401 unquote('abc%20def') -> 'abc def'.
2403 if '%' not in string
:
2406 if encoding
is None:
2410 bits
= _asciire
.split(string
)
2413 for i
in range(1, len(bits
), 2):
2414 append(compat_urllib_parse_unquote_to_bytes(bits
[i
]).decode(encoding
, errors
))
2418 def compat_urllib_parse_unquote_plus(string
, encoding
='utf-8', errors
='replace'):
2419 """Like unquote(), but also replace plus signs by spaces, as required for
2420 unquoting HTML form values.
2422 unquote_plus('%7e/abc+def') -> '~/abc def'
2424 string
= string
.replace('+', ' ')
2425 return compat_urllib_parse_unquote(string
, encoding
, errors
)
2428 from urllib
.parse
import urlencode
as compat_urllib_parse_urlencode
2429 except ImportError: # Python 2
2430 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2431 # Possible solutions are to either port it from python 3 with all
2432 # the friends or manually ensure input query contains only byte strings.
2433 # We will stick with latter thus recursively encoding the whole query.
2434 def compat_urllib_parse_urlencode(query
, doseq
=0, encoding
='utf-8'):
2436 if isinstance(e
, dict):
2438 elif isinstance(e
, (list, tuple,)):
2439 list_e
= encode_list(e
)
2440 e
= tuple(list_e
) if isinstance(e
, tuple) else list_e
2441 elif isinstance(e
, compat_str
):
2442 e
= e
.encode(encoding
)
2446 return dict((encode_elem(k
), encode_elem(v
)) for k
, v
in d
.items())
2449 return [encode_elem(e
) for e
in l
]
2451 return compat_urllib_parse
.urlencode(encode_elem(query
), doseq
=doseq
)
2454 from urllib
.request
import DataHandler
as compat_urllib_request_DataHandler
2455 except ImportError: # Python < 3.4
2456 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2457 class compat_urllib_request_DataHandler(compat_urllib_request
.BaseHandler
):
2458 def data_open(self
, req
):
2459 # data URLs as specified in RFC 2397.
2461 # ignores POSTed data
2464 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2465 # mediatype := [ type "/" subtype ] *( ";" parameter )
2467 # parameter := attribute "=" value
2468 url
= req
.get_full_url()
2470 scheme
, data
= url
.split(':', 1)
2471 mediatype
, data
= data
.split(',', 1)
2473 # even base64 encoded data URLs might be quoted so unquote in any case:
2474 data
= compat_urllib_parse_unquote_to_bytes(data
)
2475 if mediatype
.endswith(';base64'):
2476 data
= binascii
.a2b_base64(data
)
2477 mediatype
= mediatype
[:-7]
2480 mediatype
= 'text/plain;charset=US-ASCII'
2482 headers
= email
.message_from_string(
2483 'Content-type: %s\nContent-length: %d\n' % (mediatype
, len(data
)))
2485 return compat_urllib_response
.addinfourl(io
.BytesIO(data
), headers
, url
)
2488 compat_basestring
= basestring
# Python 2
2490 compat_basestring
= str
2493 compat_chr
= unichr # Python 2
2498 from xml
.etree
.ElementTree
import ParseError
as compat_xml_parse_error
2499 except ImportError: # Python 2.6
2500 from xml
.parsers
.expat
import ExpatError
as compat_xml_parse_error
2503 etree
= xml
.etree
.ElementTree
2506 class _TreeBuilder(etree
.TreeBuilder
):
2507 def doctype(self
, name
, pubid
, system
):
2512 # xml.etree.ElementTree.Element is a method in Python <=2.6 and
2513 # the following will crash with:
2514 # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
2515 isinstance(None, xml
.etree
.ElementTree
.Element
)
2516 from xml
.etree
.ElementTree
import Element
as compat_etree_Element
2517 except TypeError: # Python <=2.6
2518 from xml
.etree
.ElementTree
import _ElementInterface
as compat_etree_Element
2520 if sys
.version_info
[0] >= 3:
2521 def compat_etree_fromstring(text
):
2522 return etree
.XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder()))
2524 # python 2.x tries to encode unicode strings with ascii (see the
2525 # XMLParser._fixtext method)
2527 _etree_iter
= etree
.Element
.iter
2528 except AttributeError: # Python <=2.6
2529 def _etree_iter(root
):
2530 for el
in root
.findall('*'):
2532 for sub
in _etree_iter(el
):
2535 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2537 def _XML(text
, parser
=None):
2539 parser
= etree
.XMLParser(target
=_TreeBuilder())
2541 return parser
.close()
2543 def _element_factory(*args
, **kwargs
):
2544 el
= etree
.Element(*args
, **kwargs
)
2545 for k
, v
in el
.items():
2546 if isinstance(v
, bytes):
2547 el
.set(k
, v
.decode('utf-8'))
2550 def compat_etree_fromstring(text
):
2551 doc
= _XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder(element_factory
=_element_factory
)))
2552 for el
in _etree_iter(doc
):
2553 if el
.text
is not None and isinstance(el
.text
, bytes):
2554 el
.text
= el
.text
.decode('utf-8')
2557 if hasattr(etree
, 'register_namespace'):
2558 compat_etree_register_namespace
= etree
.register_namespace
2560 def compat_etree_register_namespace(prefix
, uri
):
2561 """Register a namespace prefix.
2562 The registry is global, and any existing mapping for either the
2563 given prefix or the namespace URI will be removed.
2564 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2565 attributes in this namespace will be serialized with prefix if possible.
2566 ValueError is raised if prefix is reserved or is invalid.
2568 if re
.match(r
"ns\d+$", prefix
):
2569 raise ValueError("Prefix format reserved for internal use")
2570 for k
, v
in list(etree
._namespace
_map
.items()):
2571 if k
== uri
or v
== prefix
:
2572 del etree
._namespace
_map
[k
]
2573 etree
._namespace
_map
[uri
] = prefix
2575 if sys
.version_info
< (2, 7):
2576 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2577 # .//node does not match if a node is a direct child of . !
2578 def compat_xpath(xpath
):
2579 if isinstance(xpath
, compat_str
):
2580 xpath
= xpath
.encode('ascii')
2583 compat_xpath
= lambda xpath
: xpath
2586 from urllib
.parse
import parse_qs
as compat_parse_qs
2587 except ImportError: # Python 2
2588 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2589 # Python 2's version is apparently totally broken
2591 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
2592 encoding
='utf-8', errors
='replace'):
2593 qs
, _coerce_result
= qs
, compat_str
2594 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
2596 for name_value
in pairs
:
2597 if not name_value
and not strict_parsing
:
2599 nv
= name_value
.split('=', 1)
2602 raise ValueError('bad query field: %r' % (name_value
,))
2603 # Handle case of a control-name with no equal sign
2604 if keep_blank_values
:
2608 if len(nv
[1]) or keep_blank_values
:
2609 name
= nv
[0].replace('+', ' ')
2610 name
= compat_urllib_parse_unquote(
2611 name
, encoding
=encoding
, errors
=errors
)
2612 name
= _coerce_result(name
)
2613 value
= nv
[1].replace('+', ' ')
2614 value
= compat_urllib_parse_unquote(
2615 value
, encoding
=encoding
, errors
=errors
)
2616 value
= _coerce_result(value
)
2617 r
.append((name
, value
))
2620 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
2621 encoding
='utf-8', errors
='replace'):
2623 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
2624 encoding
=encoding
, errors
=errors
)
2625 for name
, value
in pairs
:
2626 if name
in parsed_result
:
2627 parsed_result
[name
].append(value
)
2629 parsed_result
[name
] = [value
]
2630 return parsed_result
2633 compat_os_name
= os
._name
if os
.name
== 'java' else os
.name
2636 if compat_os_name
== 'nt':
2637 def compat_shlex_quote(s
):
2638 return s
if re
.match(r
'^[-_\w./]+$', s
) else '"%s"' % s
.replace('"', '\\"')
2641 from shlex
import quote
as compat_shlex_quote
2642 except ImportError: # Python < 3.3
2643 def compat_shlex_quote(s
):
2644 if re
.match(r
'^[-_\w./]+$', s
):
2647 return "'" + s
.replace("'", "'\"'\"'") + "'"
2651 args
= shlex
.split('äøę')
2652 assert (isinstance(args
, list)
2653 and isinstance(args
[0], compat_str
)
2654 and args
[0] == 'äøę')
2655 compat_shlex_split
= shlex
.split
2656 except (AssertionError, UnicodeEncodeError):
2657 # Working around shlex issue with unicode strings on some python 2
2658 # versions (see http://bugs.python.org/issue1548891)
2659 def compat_shlex_split(s
, comments
=False, posix
=True):
2660 if isinstance(s
, compat_str
):
2661 s
= s
.encode('utf-8')
2662 return list(map(lambda s
: s
.decode('utf-8'), shlex
.split(s
, comments
, posix
)))
2672 if sys
.version_info
>= (3, 0):
2673 compat_getenv
= os
.getenv
2674 compat_expanduser
= os
.path
.expanduser
2676 def compat_setenv(key
, value
, env
=os
.environ
):
2679 # Environment variables should be decoded with filesystem encoding.
2680 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2682 def compat_getenv(key
, default
=None):
2683 from .utils
import get_filesystem_encoding
2684 env
= os
.getenv(key
, default
)
2686 env
= env
.decode(get_filesystem_encoding())
2689 def compat_setenv(key
, value
, env
=os
.environ
):
2691 from .utils
import get_filesystem_encoding
2692 return v
.encode(get_filesystem_encoding()) if isinstance(v
, compat_str
) else v
2693 env
[encode(key
)] = encode(value
)
2695 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2696 # environment variables with filesystem encoding. We will work around this by
2697 # providing adjusted implementations.
2698 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2699 # for different platforms with correct environment variables decoding.
2701 if compat_os_name
== 'posix':
2702 def compat_expanduser(path
):
2703 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2705 if not path
.startswith('~'):
2707 i
= path
.find('/', 1)
2711 if 'HOME' not in os
.environ
:
2713 userhome
= pwd
.getpwuid(os
.getuid()).pw_dir
2715 userhome
= compat_getenv('HOME')
2719 pwent
= pwd
.getpwnam(path
[1:i
])
2722 userhome
= pwent
.pw_dir
2723 userhome
= userhome
.rstrip('/')
2724 return (userhome
+ path
[i
:]) or '/'
2725 elif compat_os_name
in ('nt', 'ce'):
2726 def compat_expanduser(path
):
2727 """Expand ~ and ~user constructs.
2729 If user or $HOME is unknown, do nothing."""
2733 while i
< n
and path
[i
] not in '/\\':
2736 if 'HOME' in os
.environ
:
2737 userhome
= compat_getenv('HOME')
2738 elif 'USERPROFILE' in os
.environ
:
2739 userhome
= compat_getenv('USERPROFILE')
2740 elif 'HOMEPATH' not in os
.environ
:
2744 drive
= compat_getenv('HOMEDRIVE')
2747 userhome
= os
.path
.join(drive
, compat_getenv('HOMEPATH'))
2750 userhome
= os
.path
.join(os
.path
.dirname(userhome
), path
[1:i
])
2752 return userhome
+ path
[i
:]
2754 compat_expanduser
= os
.path
.expanduser
2757 if sys
.version_info
< (3, 0):
2758 def compat_print(s
):
2759 from .utils
import preferredencoding
2760 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
2762 def compat_print(s
):
2763 assert isinstance(s
, compat_str
)
2767 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
2768 def compat_getpass(prompt
, *args
, **kwargs
):
2769 if isinstance(prompt
, compat_str
):
2770 from .utils
import preferredencoding
2771 prompt
= prompt
.encode(preferredencoding())
2772 return getpass
.getpass(prompt
, *args
, **kwargs
)
2774 compat_getpass
= getpass
.getpass
2777 compat_input
= raw_input
2778 except NameError: # Python 3
2779 compat_input
= input
2781 # Python < 2.6.5 require kwargs to be bytes
2785 _testfunc(**{'x': 0})
2787 def compat_kwargs(kwargs
):
2788 return dict((bytes(k
), v
) for k
, v
in kwargs
.items())
2790 compat_kwargs
= lambda kwargs
: kwargs
2794 compat_numeric_types
= (int, float, long, complex)
2795 except NameError: # Python 3
2796 compat_numeric_types
= (int, float, complex)
2800 compat_integer_types
= (int, long)
2801 except NameError: # Python 3
2802 compat_integer_types
= (int, )
2805 if sys
.version_info
< (2, 7):
2806 def compat_socket_create_connection(address
, timeout
, source_address
=None):
2807 host
, port
= address
2809 for res
in socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
):
2810 af
, socktype
, proto
, canonname
, sa
= res
2813 sock
= socket
.socket(af
, socktype
, proto
)
2814 sock
.settimeout(timeout
)
2816 sock
.bind(source_address
)
2819 except socket
.error
as _
:
2821 if sock
is not None:
2826 raise socket
.error('getaddrinfo returns an empty list')
2828 compat_socket_create_connection
= socket
.create_connection
2831 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
2832 # See http://bugs.python.org/issue9161 for what is broken
2833 def workaround_optparse_bug9161():
2834 op
= optparse
.OptionParser()
2835 og
= optparse
.OptionGroup(op
, 'foo')
2839 real_add_option
= optparse
.OptionGroup
.add_option
2841 def _compat_add_option(self
, *args
, **kwargs
):
2843 v
.encode('ascii', 'replace') if isinstance(v
, compat_str
)
2845 bargs
= [enc(a
) for a
in args
]
2847 (k
, enc(v
)) for k
, v
in kwargs
.items())
2848 return real_add_option(self
, *bargs
, **bkwargs
)
2849 optparse
.OptionGroup
.add_option
= _compat_add_option
2852 if hasattr(shutil
, 'get_terminal_size'): # Python >= 3.3
2853 compat_get_terminal_size
= shutil
.get_terminal_size
2855 _terminal_size
= collections
.namedtuple('terminal_size', ['columns', 'lines'])
2857 def compat_get_terminal_size(fallback
=(80, 24)):
2858 columns
= compat_getenv('COLUMNS')
2860 columns
= int(columns
)
2863 lines
= compat_getenv('LINES')
2869 if columns
is None or lines
is None or columns
<= 0 or lines
<= 0:
2871 sp
= subprocess
.Popen(
2873 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2874 out
, err
= sp
.communicate()
2875 _lines
, _columns
= map(int, out
.split())
2877 _columns
, _lines
= _terminal_size(*fallback
)
2879 if columns
is None or columns
<= 0:
2881 if lines
is None or lines
<= 0:
2883 return _terminal_size(columns
, lines
)
2886 itertools
.count(start
=0, step
=1)
2887 compat_itertools_count
= itertools
.count
2888 except TypeError: # Python 2.6
2889 def compat_itertools_count(start
=0, step
=1):
2895 if sys
.version_info
>= (3, 0):
2896 from tokenize
import tokenize
as compat_tokenize_tokenize
2898 from tokenize
import generate_tokens
as compat_tokenize_tokenize
2902 struct
.pack('!I', 0)
2904 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2905 # See https://bugs.python.org/issue19099
2906 def compat_struct_pack(spec
, *args
):
2907 if isinstance(spec
, compat_str
):
2908 spec
= spec
.encode('ascii')
2909 return struct
.pack(spec
, *args
)
2911 def compat_struct_unpack(spec
, *args
):
2912 if isinstance(spec
, compat_str
):
2913 spec
= spec
.encode('ascii')
2914 return struct
.unpack(spec
, *args
)
2916 class compat_Struct(struct
.Struct
):
2917 def __init__(self
, fmt
):
2918 if isinstance(fmt
, compat_str
):
2919 fmt
= fmt
.encode('ascii')
2920 super(compat_Struct
, self
).__init
__(fmt
)
2922 compat_struct_pack
= struct
.pack
2923 compat_struct_unpack
= struct
.unpack
2924 if platform
.python_implementation() == 'IronPython' and sys
.version_info
< (2, 7, 8):
2925 class compat_Struct(struct
.Struct
):
2926 def unpack(self
, string
):
2927 if not isinstance(string
, buffer): # noqa: F821
2928 string
= buffer(string
) # noqa: F821
2929 return super(compat_Struct
, self
).unpack(string
)
2931 compat_Struct
= struct
.Struct
2935 from future_builtins
import zip as compat_zip
2936 except ImportError: # not 2.6+ or is 3.x
2938 from itertools
import izip
as compat_zip
# < 2.5 or 3.x
2943 if sys
.version_info
< (3, 3):
2944 def compat_b64decode(s
, *args
, **kwargs
):
2945 if isinstance(s
, compat_str
):
2946 s
= s
.encode('ascii')
2947 return base64
.b64decode(s
, *args
, **kwargs
)
2949 compat_b64decode
= base64
.b64decode
2952 if platform
.python_implementation() == 'PyPy' and sys
.pypy_version_info
< (5, 4, 0):
2953 # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
2954 # names, see the original PyPy issue [1] and the youtube-dl one [2].
2955 # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
2956 # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
2957 def compat_ctypes_WINFUNCTYPE(*args
, **kwargs
):
2958 real
= ctypes
.WINFUNCTYPE(*args
, **kwargs
)
2960 def resf(tpl
, *args
, **kwargs
):
2962 return real((str(funcname
), dll
), *args
, **kwargs
)
2966 def compat_ctypes_WINFUNCTYPE(*args
, **kwargs
):
2967 return ctypes
.WINFUNCTYPE(*args
, **kwargs
)
2971 'compat_HTMLParseError',
2972 'compat_HTMLParser',
2976 'compat_basestring',
2980 'compat_ctypes_WINFUNCTYPE',
2981 'compat_etree_Element',
2982 'compat_etree_fromstring',
2983 'compat_etree_register_namespace',
2984 'compat_expanduser',
2985 'compat_get_terminal_size',
2988 'compat_html_entities',
2989 'compat_html_entities_html5',
2990 'compat_http_client',
2991 'compat_http_server',
2993 'compat_integer_types',
2994 'compat_itertools_count',
2996 'compat_numeric_types',
3002 'compat_shlex_quote',
3003 'compat_shlex_split',
3004 'compat_socket_create_connection',
3006 'compat_struct_pack',
3007 'compat_struct_unpack',
3008 'compat_subprocess_get_DEVNULL',
3009 'compat_tokenize_tokenize',
3010 'compat_urllib_error',
3011 'compat_urllib_parse',
3012 'compat_urllib_parse_unquote',
3013 'compat_urllib_parse_unquote_plus',
3014 'compat_urllib_parse_unquote_to_bytes',
3015 'compat_urllib_parse_urlencode',
3016 'compat_urllib_parse_urlparse',
3017 'compat_urllib_request',
3018 'compat_urllib_request_DataHandler',
3019 'compat_urllib_response',
3021 'compat_urlretrieve',
3022 'compat_xml_parse_error',
3025 'workaround_optparse_bug9161',