2 from __future__
import unicode_literals
22 import xml
.etree
.ElementTree
26 import urllib
.request
as compat_urllib_request
27 except ImportError: # Python 2
28 import urllib2
as compat_urllib_request
31 import urllib
.error
as compat_urllib_error
32 except ImportError: # Python 2
33 import urllib2
as compat_urllib_error
36 import urllib
.parse
as compat_urllib_parse
37 except ImportError: # Python 2
38 import urllib
as compat_urllib_parse
41 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
42 except ImportError: # Python 2
43 from urlparse
import urlparse
as compat_urllib_parse_urlparse
46 import urllib
.parse
as compat_urlparse
47 except ImportError: # Python 2
48 import urlparse
as compat_urlparse
51 import urllib
.response
as compat_urllib_response
52 except ImportError: # Python 2
53 import urllib
as compat_urllib_response
56 import http
.cookiejar
as compat_cookiejar
57 except ImportError: # Python 2
58 import cookielib
as compat_cookiejar
61 import http
.cookies
as compat_cookies
62 except ImportError: # Python 2
63 import Cookie
as compat_cookies
66 import html
.entities
as compat_html_entities
67 except ImportError: # Python 2
68 import htmlentitydefs
as compat_html_entities
71 compat_html_entities_html5
= compat_html_entities
.html5
72 except AttributeError:
73 # Copied from CPython 3.5.1 html/entities.py
74 compat_html_entities_html5
= {
83 'acE;': '\u223e\u0333',
103 'alefsym;': '\u2135',
118 'andslope;': '\u2a58',
124 'angmsdaa;': '\u29a8',
125 'angmsdab;': '\u29a9',
126 'angmsdac;': '\u29aa',
127 'angmsdad;': '\u29ab',
128 'angmsdae;': '\u29ac',
129 'angmsdaf;': '\u29ad',
130 'angmsdag;': '\u29ae',
131 'angmsdah;': '\u29af',
133 'angrtvb;': '\u22be',
134 'angrtvbd;': '\u299d',
137 'angzarr;': '\u237c',
140 'Aopf;': '\U0001d538',
141 'aopf;': '\U0001d552',
148 'ApplyFunction;': '\u2061',
150 'approxeq;': '\u224a',
155 'Ascr;': '\U0001d49c',
156 'ascr;': '\U0001d4b6',
160 'asympeq;': '\u224d',
169 'awconint;': '\u2233',
171 'backcong;': '\u224c',
172 'backepsilon;': '\u03f6',
173 'backprime;': '\u2035',
174 'backsim;': '\u223d',
175 'backsimeq;': '\u22cd',
176 'Backslash;': '\u2216',
181 'barwedge;': '\u2305',
183 'bbrktbrk;': '\u23b6',
189 'Because;': '\u2235',
190 'because;': '\u2235',
191 'bemptyv;': '\u29b0',
194 'Bernoullis;': '\u212c',
198 'between;': '\u226c',
199 'Bfr;': '\U0001d505',
200 'bfr;': '\U0001d51f',
202 'bigcirc;': '\u25ef',
204 'bigodot;': '\u2a00',
205 'bigoplus;': '\u2a01',
206 'bigotimes;': '\u2a02',
207 'bigsqcup;': '\u2a06',
208 'bigstar;': '\u2605',
209 'bigtriangledown;': '\u25bd',
210 'bigtriangleup;': '\u25b3',
211 'biguplus;': '\u2a04',
213 'bigwedge;': '\u22c0',
215 'blacklozenge;': '\u29eb',
216 'blacksquare;': '\u25aa',
217 'blacktriangle;': '\u25b4',
218 'blacktriangledown;': '\u25be',
219 'blacktriangleleft;': '\u25c2',
220 'blacktriangleright;': '\u25b8',
227 'bnequiv;': '\u2261\u20e5',
230 'Bopf;': '\U0001d539',
231 'bopf;': '\U0001d553',
254 'boxminus;': '\u229f',
255 'boxplus;': '\u229e',
256 'boxtimes;': '\u22a0',
285 'bscr;': '\U0001d4b7',
291 'bsolhsub;': '\u27c8',
304 'capbrcup;': '\u2a49',
308 'CapitalDifferentialD;': '\u2145',
309 'caps;': '\u2229\ufe00',
312 'Cayleys;': '\u212d',
322 'Cconint;': '\u2230',
324 'ccupssm;': '\u2a50',
330 'cemptyv;': '\u29b2',
333 'CenterDot;': '\xb7',
334 'centerdot;': '\xb7',
336 'cfr;': '\U0001d520',
340 'checkmark;': '\u2713',
346 'circlearrowleft;': '\u21ba',
347 'circlearrowright;': '\u21bb',
348 'circledast;': '\u229b',
349 'circledcirc;': '\u229a',
350 'circleddash;': '\u229d',
351 'CircleDot;': '\u2299',
353 'circledS;': '\u24c8',
354 'CircleMinus;': '\u2296',
355 'CirclePlus;': '\u2295',
356 'CircleTimes;': '\u2297',
359 'cirfnint;': '\u2a10',
361 'cirscir;': '\u29c2',
362 'ClockwiseContourIntegral;': '\u2232',
363 'CloseCurlyDoubleQuote;': '\u201d',
364 'CloseCurlyQuote;': '\u2019',
366 'clubsuit;': '\u2663',
371 'coloneq;': '\u2254',
376 'complement;': '\u2201',
377 'complexes;': '\u2102',
379 'congdot;': '\u2a6d',
380 'Congruent;': '\u2261',
383 'ContourIntegral;': '\u222e',
385 'copf;': '\U0001d554',
387 'Coproduct;': '\u2210',
393 'CounterClockwiseContourIntegral;': '\u2233',
397 'Cscr;': '\U0001d49e',
398 'cscr;': '\U0001d4b8',
404 'cudarrl;': '\u2938',
405 'cudarrr;': '\u2935',
409 'cularrp;': '\u293d',
412 'cupbrcap;': '\u2a48',
418 'cups;': '\u222a\ufe00',
420 'curarrm;': '\u293c',
421 'curlyeqprec;': '\u22de',
422 'curlyeqsucc;': '\u22df',
423 'curlyvee;': '\u22ce',
424 'curlywedge;': '\u22cf',
427 'curvearrowleft;': '\u21b6',
428 'curvearrowright;': '\u21b7',
431 'cwconint;': '\u2232',
443 'dbkarow;': '\u290f',
451 'ddagger;': '\u2021',
453 'DDotrahd;': '\u2911',
454 'ddotseq;': '\u2a77',
460 'demptyv;': '\u29b1',
462 'Dfr;': '\U0001d507',
463 'dfr;': '\U0001d521',
467 'DiacriticalAcute;': '\xb4',
468 'DiacriticalDot;': '\u02d9',
469 'DiacriticalDoubleAcute;': '\u02dd',
470 'DiacriticalGrave;': '`',
471 'DiacriticalTilde;': '\u02dc',
473 'Diamond;': '\u22c4',
474 'diamond;': '\u22c4',
475 'diamondsuit;': '\u2666',
478 'DifferentialD;': '\u2146',
479 'digamma;': '\u03dd',
484 'divideontimes;': '\u22c7',
491 'Dopf;': '\U0001d53b',
492 'dopf;': '\U0001d555',
497 'doteqdot;': '\u2251',
498 'DotEqual;': '\u2250',
499 'dotminus;': '\u2238',
500 'dotplus;': '\u2214',
501 'dotsquare;': '\u22a1',
502 'doublebarwedge;': '\u2306',
503 'DoubleContourIntegral;': '\u222f',
504 'DoubleDot;': '\xa8',
505 'DoubleDownArrow;': '\u21d3',
506 'DoubleLeftArrow;': '\u21d0',
507 'DoubleLeftRightArrow;': '\u21d4',
508 'DoubleLeftTee;': '\u2ae4',
509 'DoubleLongLeftArrow;': '\u27f8',
510 'DoubleLongLeftRightArrow;': '\u27fa',
511 'DoubleLongRightArrow;': '\u27f9',
512 'DoubleRightArrow;': '\u21d2',
513 'DoubleRightTee;': '\u22a8',
514 'DoubleUpArrow;': '\u21d1',
515 'DoubleUpDownArrow;': '\u21d5',
516 'DoubleVerticalBar;': '\u2225',
517 'DownArrow;': '\u2193',
518 'Downarrow;': '\u21d3',
519 'downarrow;': '\u2193',
520 'DownArrowBar;': '\u2913',
521 'DownArrowUpArrow;': '\u21f5',
522 'DownBreve;': '\u0311',
523 'downdownarrows;': '\u21ca',
524 'downharpoonleft;': '\u21c3',
525 'downharpoonright;': '\u21c2',
526 'DownLeftRightVector;': '\u2950',
527 'DownLeftTeeVector;': '\u295e',
528 'DownLeftVector;': '\u21bd',
529 'DownLeftVectorBar;': '\u2956',
530 'DownRightTeeVector;': '\u295f',
531 'DownRightVector;': '\u21c1',
532 'DownRightVectorBar;': '\u2957',
533 'DownTee;': '\u22a4',
534 'DownTeeArrow;': '\u21a7',
535 'drbkarow;': '\u2910',
538 'Dscr;': '\U0001d49f',
539 'dscr;': '\U0001d4b9',
550 'dwangle;': '\u29a6',
553 'dzigrarr;': '\u27ff',
575 'Efr;': '\U0001d508',
576 'efr;': '\U0001d522',
585 'Element;': '\u2208',
586 'elinters;': '\u23e7',
593 'emptyset;': '\u2205',
594 'EmptySmallSquare;': '\u25fb',
596 'EmptyVerySmallSquare;': '\u25ab',
605 'Eopf;': '\U0001d53c',
606 'eopf;': '\U0001d556',
611 'Epsilon;': '\u0395',
612 'epsilon;': '\u03b5',
615 'eqcolon;': '\u2255',
617 'eqslantgtr;': '\u2a96',
618 'eqslantless;': '\u2a95',
621 'EqualTilde;': '\u2242',
623 'Equilibrium;': '\u21cc',
625 'equivDD;': '\u2a78',
626 'eqvparsl;': '\u29e5',
648 'expectation;': '\u2130',
649 'ExponentialE;': '\u2147',
650 'exponentiale;': '\u2147',
651 'fallingdotseq;': '\u2252',
658 'Ffr;': '\U0001d509',
659 'ffr;': '\U0001d523',
661 'FilledSmallSquare;': '\u25fc',
662 'FilledVerySmallSquare;': '\u25aa',
668 'Fopf;': '\U0001d53d',
669 'fopf;': '\U0001d557',
674 'Fouriertrf;': '\u2131',
675 'fpartint;': '\u2a0d',
697 'fscr;': '\U0001d4bb',
719 'geqslant;': '\u2a7e',
723 'gesdoto;': '\u2a82',
724 'gesdotol;': '\u2a84',
725 'gesl;': '\u22db\ufe00',
727 'Gfr;': '\U0001d50a',
728 'gfr;': '\U0001d524',
740 'gnapprox;': '\u2a8a',
746 'Gopf;': '\U0001d53e',
747 'gopf;': '\U0001d558',
749 'GreaterEqual;': '\u2265',
750 'GreaterEqualLess;': '\u22db',
751 'GreaterFullEqual;': '\u2267',
752 'GreaterGreater;': '\u2aa2',
753 'GreaterLess;': '\u2277',
754 'GreaterSlantEqual;': '\u2a7e',
755 'GreaterTilde;': '\u2273',
756 'Gscr;': '\U0001d4a2',
770 'gtquest;': '\u2a7c',
771 'gtrapprox;': '\u2a86',
774 'gtreqless;': '\u22db',
775 'gtreqqless;': '\u2a8c',
776 'gtrless;': '\u2277',
778 'gvertneqq;': '\u2269\ufe00',
779 'gvnE;': '\u2269\ufe00',
788 'harrcir;': '\u2948',
795 'heartsuit;': '\u2665',
799 'hfr;': '\U0001d525',
800 'HilbertSpace;': '\u210b',
801 'hksearow;': '\u2925',
802 'hkswarow;': '\u2926',
805 'hookleftarrow;': '\u21a9',
806 'hookrightarrow;': '\u21aa',
808 'hopf;': '\U0001d559',
810 'HorizontalLine;': '\u2500',
812 'hscr;': '\U0001d4bd',
816 'HumpDownHump;': '\u224e',
817 'HumpEqual;': '\u224f',
838 'ifr;': '\U0001d526',
854 'ImaginaryI;': '\u2148',
855 'imagline;': '\u2110',
856 'imagpart;': '\u2111',
860 'Implies;': '\u21d2',
864 'infintie;': '\u29dd',
869 'integers;': '\u2124',
870 'Integral;': '\u222b',
871 'intercal;': '\u22ba',
872 'Intersection;': '\u22c2',
873 'intlarhk;': '\u2a17',
874 'intprod;': '\u2a3c',
875 'InvisibleComma;': '\u2063',
876 'InvisibleTimes;': '\u2062',
881 'Iopf;': '\U0001d540',
882 'iopf;': '\U0001d55a',
889 'iscr;': '\U0001d4be',
891 'isindot;': '\u22f5',
909 'Jfr;': '\U0001d50d',
910 'jfr;': '\U0001d527',
912 'Jopf;': '\U0001d541',
913 'jopf;': '\U0001d55b',
914 'Jscr;': '\U0001d4a5',
915 'jscr;': '\U0001d4bf',
927 'Kfr;': '\U0001d50e',
928 'kfr;': '\U0001d528',
934 'Kopf;': '\U0001d542',
935 'kopf;': '\U0001d55c',
936 'Kscr;': '\U0001d4a6',
937 'kscr;': '\U0001d4c0',
941 'laemptyv;': '\u29b4',
950 'Laplacetrf;': '\u2112',
957 'larrbfs;': '\u291f',
962 'larrsim;': '\u2973',
968 'lates;': '\u2aad\ufe00',
975 'lbrksld;': '\u298f',
976 'lbrkslu;': '\u298d',
988 'ldrdhar;': '\u2967',
989 'ldrushar;': '\u294b',
993 'LeftAngleBracket;': '\u27e8',
994 'LeftArrow;': '\u2190',
995 'Leftarrow;': '\u21d0',
996 'leftarrow;': '\u2190',
997 'LeftArrowBar;': '\u21e4',
998 'LeftArrowRightArrow;': '\u21c6',
999 'leftarrowtail;': '\u21a2',
1000 'LeftCeiling;': '\u2308',
1001 'LeftDoubleBracket;': '\u27e6',
1002 'LeftDownTeeVector;': '\u2961',
1003 'LeftDownVector;': '\u21c3',
1004 'LeftDownVectorBar;': '\u2959',
1005 'LeftFloor;': '\u230a',
1006 'leftharpoondown;': '\u21bd',
1007 'leftharpoonup;': '\u21bc',
1008 'leftleftarrows;': '\u21c7',
1009 'LeftRightArrow;': '\u2194',
1010 'Leftrightarrow;': '\u21d4',
1011 'leftrightarrow;': '\u2194',
1012 'leftrightarrows;': '\u21c6',
1013 'leftrightharpoons;': '\u21cb',
1014 'leftrightsquigarrow;': '\u21ad',
1015 'LeftRightVector;': '\u294e',
1016 'LeftTee;': '\u22a3',
1017 'LeftTeeArrow;': '\u21a4',
1018 'LeftTeeVector;': '\u295a',
1019 'leftthreetimes;': '\u22cb',
1020 'LeftTriangle;': '\u22b2',
1021 'LeftTriangleBar;': '\u29cf',
1022 'LeftTriangleEqual;': '\u22b4',
1023 'LeftUpDownVector;': '\u2951',
1024 'LeftUpTeeVector;': '\u2960',
1025 'LeftUpVector;': '\u21bf',
1026 'LeftUpVectorBar;': '\u2958',
1027 'LeftVector;': '\u21bc',
1028 'LeftVectorBar;': '\u2952',
1033 'leqslant;': '\u2a7d',
1036 'lesdot;': '\u2a7f',
1037 'lesdoto;': '\u2a81',
1038 'lesdotor;': '\u2a83',
1039 'lesg;': '\u22da\ufe00',
1040 'lesges;': '\u2a93',
1041 'lessapprox;': '\u2a85',
1042 'lessdot;': '\u22d6',
1043 'lesseqgtr;': '\u22da',
1044 'lesseqqgtr;': '\u2a8b',
1045 'LessEqualGreater;': '\u22da',
1046 'LessFullEqual;': '\u2266',
1047 'LessGreater;': '\u2276',
1048 'lessgtr;': '\u2276',
1049 'LessLess;': '\u2aa1',
1050 'lesssim;': '\u2272',
1051 'LessSlantEqual;': '\u2a7d',
1052 'LessTilde;': '\u2272',
1053 'lfisht;': '\u297c',
1054 'lfloor;': '\u230a',
1055 'Lfr;': '\U0001d50f',
1056 'lfr;': '\U0001d529',
1062 'lharul;': '\u296a',
1069 'llcorner;': '\u231e',
1070 'Lleftarrow;': '\u21da',
1071 'llhard;': '\u296b',
1073 'Lmidot;': '\u013f',
1074 'lmidot;': '\u0140',
1075 'lmoust;': '\u23b0',
1076 'lmoustache;': '\u23b0',
1078 'lnapprox;': '\u2a89',
1087 'LongLeftArrow;': '\u27f5',
1088 'Longleftarrow;': '\u27f8',
1089 'longleftarrow;': '\u27f5',
1090 'LongLeftRightArrow;': '\u27f7',
1091 'Longleftrightarrow;': '\u27fa',
1092 'longleftrightarrow;': '\u27f7',
1093 'longmapsto;': '\u27fc',
1094 'LongRightArrow;': '\u27f6',
1095 'Longrightarrow;': '\u27f9',
1096 'longrightarrow;': '\u27f6',
1097 'looparrowleft;': '\u21ab',
1098 'looparrowright;': '\u21ac',
1100 'Lopf;': '\U0001d543',
1101 'lopf;': '\U0001d55d',
1102 'loplus;': '\u2a2d',
1103 'lotimes;': '\u2a34',
1104 'lowast;': '\u2217',
1106 'LowerLeftArrow;': '\u2199',
1107 'LowerRightArrow;': '\u2198',
1109 'lozenge;': '\u25ca',
1112 'lparlt;': '\u2993',
1114 'lrcorner;': '\u231f',
1116 'lrhard;': '\u296d',
1119 'lsaquo;': '\u2039',
1121 'lscr;': '\U0001d4c1',
1129 'lsquor;': '\u201a',
1130 'Lstrok;': '\u0141',
1131 'lstrok;': '\u0142',
1140 'lthree;': '\u22cb',
1141 'ltimes;': '\u22c9',
1142 'ltlarr;': '\u2976',
1143 'ltquest;': '\u2a7b',
1147 'ltrPar;': '\u2996',
1148 'lurdshar;': '\u294a',
1149 'luruhar;': '\u2966',
1150 'lvertneqq;': '\u2268\ufe00',
1151 'lvnE;': '\u2268\ufe00',
1156 'maltese;': '\u2720',
1159 'mapsto;': '\u21a6',
1160 'mapstodown;': '\u21a7',
1161 'mapstoleft;': '\u21a4',
1162 'mapstoup;': '\u21a5',
1163 'marker;': '\u25ae',
1164 'mcomma;': '\u2a29',
1169 'measuredangle;': '\u2221',
1170 'MediumSpace;': '\u205f',
1171 'Mellintrf;': '\u2133',
1172 'Mfr;': '\U0001d510',
1173 'mfr;': '\U0001d52a',
1179 'midcir;': '\u2af0',
1183 'minusb;': '\u229f',
1184 'minusd;': '\u2238',
1185 'minusdu;': '\u2a2a',
1186 'MinusPlus;': '\u2213',
1189 'mnplus;': '\u2213',
1190 'models;': '\u22a7',
1191 'Mopf;': '\U0001d544',
1192 'mopf;': '\U0001d55e',
1195 'mscr;': '\U0001d4c2',
1196 'mstpos;': '\u223e',
1199 'multimap;': '\u22b8',
1202 'Nacute;': '\u0143',
1203 'nacute;': '\u0144',
1204 'nang;': '\u2220\u20d2',
1206 'napE;': '\u2a70\u0338',
1207 'napid;': '\u224b\u0338',
1209 'napprox;': '\u2249',
1211 'natural;': '\u266e',
1212 'naturals;': '\u2115',
1215 'nbump;': '\u224e\u0338',
1216 'nbumpe;': '\u224f\u0338',
1218 'Ncaron;': '\u0147',
1219 'ncaron;': '\u0148',
1220 'Ncedil;': '\u0145',
1221 'ncedil;': '\u0146',
1223 'ncongdot;': '\u2a6d\u0338',
1229 'nearhk;': '\u2924',
1232 'nearrow;': '\u2197',
1233 'nedot;': '\u2250\u0338',
1234 'NegativeMediumSpace;': '\u200b',
1235 'NegativeThickSpace;': '\u200b',
1236 'NegativeThinSpace;': '\u200b',
1237 'NegativeVeryThinSpace;': '\u200b',
1238 'nequiv;': '\u2262',
1239 'nesear;': '\u2928',
1240 'nesim;': '\u2242\u0338',
1241 'NestedGreaterGreater;': '\u226b',
1242 'NestedLessLess;': '\u226a',
1244 'nexist;': '\u2204',
1245 'nexists;': '\u2204',
1246 'Nfr;': '\U0001d511',
1247 'nfr;': '\U0001d52b',
1248 'ngE;': '\u2267\u0338',
1251 'ngeqq;': '\u2267\u0338',
1252 'ngeqslant;': '\u2a7e\u0338',
1253 'nges;': '\u2a7e\u0338',
1254 'nGg;': '\u22d9\u0338',
1256 'nGt;': '\u226b\u20d2',
1259 'nGtv;': '\u226b\u0338',
1272 'nlE;': '\u2266\u0338',
1274 'nLeftarrow;': '\u21cd',
1275 'nleftarrow;': '\u219a',
1276 'nLeftrightarrow;': '\u21ce',
1277 'nleftrightarrow;': '\u21ae',
1279 'nleqq;': '\u2266\u0338',
1280 'nleqslant;': '\u2a7d\u0338',
1281 'nles;': '\u2a7d\u0338',
1283 'nLl;': '\u22d8\u0338',
1285 'nLt;': '\u226a\u20d2',
1288 'nltrie;': '\u22ec',
1289 'nLtv;': '\u226a\u0338',
1291 'NoBreak;': '\u2060',
1292 'NonBreakingSpace;': '\xa0',
1294 'nopf;': '\U0001d55f',
1298 'NotCongruent;': '\u2262',
1299 'NotCupCap;': '\u226d',
1300 'NotDoubleVerticalBar;': '\u2226',
1301 'NotElement;': '\u2209',
1302 'NotEqual;': '\u2260',
1303 'NotEqualTilde;': '\u2242\u0338',
1304 'NotExists;': '\u2204',
1305 'NotGreater;': '\u226f',
1306 'NotGreaterEqual;': '\u2271',
1307 'NotGreaterFullEqual;': '\u2267\u0338',
1308 'NotGreaterGreater;': '\u226b\u0338',
1309 'NotGreaterLess;': '\u2279',
1310 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1311 'NotGreaterTilde;': '\u2275',
1312 'NotHumpDownHump;': '\u224e\u0338',
1313 'NotHumpEqual;': '\u224f\u0338',
1315 'notindot;': '\u22f5\u0338',
1316 'notinE;': '\u22f9\u0338',
1317 'notinva;': '\u2209',
1318 'notinvb;': '\u22f7',
1319 'notinvc;': '\u22f6',
1320 'NotLeftTriangle;': '\u22ea',
1321 'NotLeftTriangleBar;': '\u29cf\u0338',
1322 'NotLeftTriangleEqual;': '\u22ec',
1323 'NotLess;': '\u226e',
1324 'NotLessEqual;': '\u2270',
1325 'NotLessGreater;': '\u2278',
1326 'NotLessLess;': '\u226a\u0338',
1327 'NotLessSlantEqual;': '\u2a7d\u0338',
1328 'NotLessTilde;': '\u2274',
1329 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1330 'NotNestedLessLess;': '\u2aa1\u0338',
1332 'notniva;': '\u220c',
1333 'notnivb;': '\u22fe',
1334 'notnivc;': '\u22fd',
1335 'NotPrecedes;': '\u2280',
1336 'NotPrecedesEqual;': '\u2aaf\u0338',
1337 'NotPrecedesSlantEqual;': '\u22e0',
1338 'NotReverseElement;': '\u220c',
1339 'NotRightTriangle;': '\u22eb',
1340 'NotRightTriangleBar;': '\u29d0\u0338',
1341 'NotRightTriangleEqual;': '\u22ed',
1342 'NotSquareSubset;': '\u228f\u0338',
1343 'NotSquareSubsetEqual;': '\u22e2',
1344 'NotSquareSuperset;': '\u2290\u0338',
1345 'NotSquareSupersetEqual;': '\u22e3',
1346 'NotSubset;': '\u2282\u20d2',
1347 'NotSubsetEqual;': '\u2288',
1348 'NotSucceeds;': '\u2281',
1349 'NotSucceedsEqual;': '\u2ab0\u0338',
1350 'NotSucceedsSlantEqual;': '\u22e1',
1351 'NotSucceedsTilde;': '\u227f\u0338',
1352 'NotSuperset;': '\u2283\u20d2',
1353 'NotSupersetEqual;': '\u2289',
1354 'NotTilde;': '\u2241',
1355 'NotTildeEqual;': '\u2244',
1356 'NotTildeFullEqual;': '\u2247',
1357 'NotTildeTilde;': '\u2249',
1358 'NotVerticalBar;': '\u2224',
1360 'nparallel;': '\u2226',
1361 'nparsl;': '\u2afd\u20e5',
1362 'npart;': '\u2202\u0338',
1363 'npolint;': '\u2a14',
1365 'nprcue;': '\u22e0',
1366 'npre;': '\u2aaf\u0338',
1368 'npreceq;': '\u2aaf\u0338',
1371 'nrarrc;': '\u2933\u0338',
1372 'nrarrw;': '\u219d\u0338',
1373 'nRightarrow;': '\u21cf',
1374 'nrightarrow;': '\u219b',
1376 'nrtrie;': '\u22ed',
1378 'nsccue;': '\u22e1',
1379 'nsce;': '\u2ab0\u0338',
1380 'Nscr;': '\U0001d4a9',
1381 'nscr;': '\U0001d4c3',
1382 'nshortmid;': '\u2224',
1383 'nshortparallel;': '\u2226',
1386 'nsimeq;': '\u2244',
1389 'nsqsube;': '\u22e2',
1390 'nsqsupe;': '\u22e3',
1392 'nsubE;': '\u2ac5\u0338',
1394 'nsubset;': '\u2282\u20d2',
1395 'nsubseteq;': '\u2288',
1396 'nsubseteqq;': '\u2ac5\u0338',
1398 'nsucceq;': '\u2ab0\u0338',
1400 'nsupE;': '\u2ac6\u0338',
1402 'nsupset;': '\u2283\u20d2',
1403 'nsupseteq;': '\u2289',
1404 'nsupseteqq;': '\u2ac6\u0338',
1411 'ntriangleleft;': '\u22ea',
1412 'ntrianglelefteq;': '\u22ec',
1413 'ntriangleright;': '\u22eb',
1414 'ntrianglerighteq;': '\u22ed',
1418 'numero;': '\u2116',
1420 'nvap;': '\u224d\u20d2',
1421 'nVDash;': '\u22af',
1422 'nVdash;': '\u22ae',
1423 'nvDash;': '\u22ad',
1424 'nvdash;': '\u22ac',
1425 'nvge;': '\u2265\u20d2',
1427 'nvHarr;': '\u2904',
1428 'nvinfin;': '\u29de',
1429 'nvlArr;': '\u2902',
1430 'nvle;': '\u2264\u20d2',
1432 'nvltrie;': '\u22b4\u20d2',
1433 'nvrArr;': '\u2903',
1434 'nvrtrie;': '\u22b5\u20d2',
1435 'nvsim;': '\u223c\u20d2',
1436 'nwarhk;': '\u2923',
1439 'nwarrow;': '\u2196',
1440 'nwnear;': '\u2927',
1454 'Odblac;': '\u0150',
1455 'odblac;': '\u0151',
1458 'odsold;': '\u29bc',
1462 'Ofr;': '\U0001d512',
1463 'ofr;': '\U0001d52c',
1475 'olcross;': '\u29bb',
1482 'Omicron;': '\u039f',
1483 'omicron;': '\u03bf',
1485 'ominus;': '\u2296',
1486 'Oopf;': '\U0001d546',
1487 'oopf;': '\U0001d560',
1489 'OpenCurlyDoubleQuote;': '\u201c',
1490 'OpenCurlyQuote;': '\u2018',
1498 'orderof;': '\u2134',
1503 'origof;': '\u22b6',
1505 'orslope;': '\u2a57',
1508 'Oscr;': '\U0001d4aa',
1519 'Otimes;': '\u2a37',
1520 'otimes;': '\u2297',
1521 'otimesas;': '\u2a36',
1527 'OverBar;': '\u203e',
1528 'OverBrace;': '\u23de',
1529 'OverBracket;': '\u23b4',
1530 'OverParenthesis;': '\u23dc',
1534 'parallel;': '\u2225',
1535 'parsim;': '\u2af3',
1538 'PartialD;': '\u2202',
1543 'permil;': '\u2030',
1545 'pertenk;': '\u2031',
1546 'Pfr;': '\U0001d513',
1547 'pfr;': '\U0001d52d',
1551 'phmmat;': '\u2133',
1555 'pitchfork;': '\u22d4',
1557 'planck;': '\u210f',
1558 'planckh;': '\u210e',
1559 'plankv;': '\u210f',
1561 'plusacir;': '\u2a23',
1563 'pluscir;': '\u2a22',
1564 'plusdo;': '\u2214',
1565 'plusdu;': '\u2a25',
1567 'PlusMinus;': '\xb1',
1570 'plussim;': '\u2a26',
1571 'plustwo;': '\u2a27',
1573 'Poincareplane;': '\u210c',
1574 'pointint;': '\u2a15',
1576 'popf;': '\U0001d561',
1586 'precapprox;': '\u2ab7',
1587 'preccurlyeq;': '\u227c',
1588 'Precedes;': '\u227a',
1589 'PrecedesEqual;': '\u2aaf',
1590 'PrecedesSlantEqual;': '\u227c',
1591 'PrecedesTilde;': '\u227e',
1592 'preceq;': '\u2aaf',
1593 'precnapprox;': '\u2ab9',
1594 'precneqq;': '\u2ab5',
1595 'precnsim;': '\u22e8',
1596 'precsim;': '\u227e',
1599 'primes;': '\u2119',
1602 'prnsim;': '\u22e8',
1604 'Product;': '\u220f',
1605 'profalar;': '\u232e',
1606 'profline;': '\u2312',
1607 'profsurf;': '\u2313',
1609 'Proportion;': '\u2237',
1610 'Proportional;': '\u221d',
1611 'propto;': '\u221d',
1613 'prurel;': '\u22b0',
1614 'Pscr;': '\U0001d4ab',
1615 'pscr;': '\U0001d4c5',
1618 'puncsp;': '\u2008',
1619 'Qfr;': '\U0001d514',
1620 'qfr;': '\U0001d52e',
1623 'qopf;': '\U0001d562',
1624 'qprime;': '\u2057',
1625 'Qscr;': '\U0001d4ac',
1626 'qscr;': '\U0001d4c6',
1627 'quaternions;': '\u210d',
1628 'quatint;': '\u2a16',
1630 'questeq;': '\u225f',
1636 'race;': '\u223d\u0331',
1637 'Racute;': '\u0154',
1638 'racute;': '\u0155',
1640 'raemptyv;': '\u29b3',
1645 'rangle;': '\u27e9',
1651 'rarrap;': '\u2975',
1653 'rarrbfs;': '\u2920',
1655 'rarrfs;': '\u291e',
1656 'rarrhk;': '\u21aa',
1657 'rarrlp;': '\u21ac',
1658 'rarrpl;': '\u2945',
1659 'rarrsim;': '\u2974',
1660 'Rarrtl;': '\u2916',
1661 'rarrtl;': '\u21a3',
1663 'rAtail;': '\u291c',
1664 'ratail;': '\u291a',
1666 'rationals;': '\u211a',
1674 'rbrksld;': '\u298e',
1675 'rbrkslu;': '\u2990',
1676 'Rcaron;': '\u0158',
1677 'rcaron;': '\u0159',
1678 'Rcedil;': '\u0156',
1679 'rcedil;': '\u0157',
1685 'rdldhar;': '\u2969',
1687 'rdquor;': '\u201d',
1691 'realine;': '\u211b',
1692 'realpart;': '\u211c',
1699 'ReverseElement;': '\u220b',
1700 'ReverseEquilibrium;': '\u21cb',
1701 'ReverseUpEquilibrium;': '\u296f',
1702 'rfisht;': '\u297d',
1703 'rfloor;': '\u230b',
1705 'rfr;': '\U0001d52f',
1709 'rharul;': '\u296c',
1713 'RightAngleBracket;': '\u27e9',
1714 'RightArrow;': '\u2192',
1715 'Rightarrow;': '\u21d2',
1716 'rightarrow;': '\u2192',
1717 'RightArrowBar;': '\u21e5',
1718 'RightArrowLeftArrow;': '\u21c4',
1719 'rightarrowtail;': '\u21a3',
1720 'RightCeiling;': '\u2309',
1721 'RightDoubleBracket;': '\u27e7',
1722 'RightDownTeeVector;': '\u295d',
1723 'RightDownVector;': '\u21c2',
1724 'RightDownVectorBar;': '\u2955',
1725 'RightFloor;': '\u230b',
1726 'rightharpoondown;': '\u21c1',
1727 'rightharpoonup;': '\u21c0',
1728 'rightleftarrows;': '\u21c4',
1729 'rightleftharpoons;': '\u21cc',
1730 'rightrightarrows;': '\u21c9',
1731 'rightsquigarrow;': '\u219d',
1732 'RightTee;': '\u22a2',
1733 'RightTeeArrow;': '\u21a6',
1734 'RightTeeVector;': '\u295b',
1735 'rightthreetimes;': '\u22cc',
1736 'RightTriangle;': '\u22b3',
1737 'RightTriangleBar;': '\u29d0',
1738 'RightTriangleEqual;': '\u22b5',
1739 'RightUpDownVector;': '\u294f',
1740 'RightUpTeeVector;': '\u295c',
1741 'RightUpVector;': '\u21be',
1742 'RightUpVectorBar;': '\u2954',
1743 'RightVector;': '\u21c0',
1744 'RightVectorBar;': '\u2953',
1746 'risingdotseq;': '\u2253',
1750 'rmoust;': '\u23b1',
1751 'rmoustache;': '\u23b1',
1758 'ropf;': '\U0001d563',
1759 'roplus;': '\u2a2e',
1760 'rotimes;': '\u2a35',
1761 'RoundImplies;': '\u2970',
1763 'rpargt;': '\u2994',
1764 'rppolint;': '\u2a12',
1766 'Rrightarrow;': '\u21db',
1767 'rsaquo;': '\u203a',
1769 'rscr;': '\U0001d4c7',
1774 'rsquor;': '\u2019',
1775 'rthree;': '\u22cc',
1776 'rtimes;': '\u22ca',
1780 'rtriltri;': '\u29ce',
1781 'RuleDelayed;': '\u29f4',
1782 'ruluhar;': '\u2968',
1784 'Sacute;': '\u015a',
1785 'sacute;': '\u015b',
1790 'Scaron;': '\u0160',
1791 'scaron;': '\u0161',
1795 'Scedil;': '\u015e',
1796 'scedil;': '\u015f',
1801 'scnsim;': '\u22e9',
1802 'scpolint;': '\u2a13',
1809 'searhk;': '\u2925',
1812 'searrow;': '\u2198',
1816 'seswar;': '\u2929',
1817 'setminus;': '\u2216',
1820 'Sfr;': '\U0001d516',
1821 'sfr;': '\U0001d530',
1822 'sfrown;': '\u2322',
1824 'SHCHcy;': '\u0429',
1825 'shchcy;': '\u0449',
1828 'ShortDownArrow;': '\u2193',
1829 'ShortLeftArrow;': '\u2190',
1830 'shortmid;': '\u2223',
1831 'shortparallel;': '\u2225',
1832 'ShortRightArrow;': '\u2192',
1833 'ShortUpArrow;': '\u2191',
1838 'sigmaf;': '\u03c2',
1839 'sigmav;': '\u03c2',
1841 'simdot;': '\u2a6a',
1849 'simplus;': '\u2a24',
1850 'simrarr;': '\u2972',
1852 'SmallCircle;': '\u2218',
1853 'smallsetminus;': '\u2216',
1854 'smashp;': '\u2a33',
1855 'smeparsl;': '\u29e4',
1860 'smtes;': '\u2aac\ufe00',
1861 'SOFTcy;': '\u042c',
1862 'softcy;': '\u044c',
1865 'solbar;': '\u233f',
1866 'Sopf;': '\U0001d54a',
1867 'sopf;': '\U0001d564',
1868 'spades;': '\u2660',
1869 'spadesuit;': '\u2660',
1872 'sqcaps;': '\u2293\ufe00',
1874 'sqcups;': '\u2294\ufe00',
1877 'sqsube;': '\u2291',
1878 'sqsubset;': '\u228f',
1879 'sqsubseteq;': '\u2291',
1881 'sqsupe;': '\u2292',
1882 'sqsupset;': '\u2290',
1883 'sqsupseteq;': '\u2292',
1885 'Square;': '\u25a1',
1886 'square;': '\u25a1',
1887 'SquareIntersection;': '\u2293',
1888 'SquareSubset;': '\u228f',
1889 'SquareSubsetEqual;': '\u2291',
1890 'SquareSuperset;': '\u2290',
1891 'SquareSupersetEqual;': '\u2292',
1892 'SquareUnion;': '\u2294',
1893 'squarf;': '\u25aa',
1896 'Sscr;': '\U0001d4ae',
1897 'sscr;': '\U0001d4c8',
1898 'ssetmn;': '\u2216',
1899 'ssmile;': '\u2323',
1900 'sstarf;': '\u22c6',
1904 'straightepsilon;': '\u03f5',
1905 'straightphi;': '\u03d5',
1909 'subdot;': '\u2abd',
1912 'subedot;': '\u2ac3',
1913 'submult;': '\u2ac1',
1916 'subplus;': '\u2abf',
1917 'subrarr;': '\u2979',
1918 'Subset;': '\u22d0',
1919 'subset;': '\u2282',
1920 'subseteq;': '\u2286',
1921 'subseteqq;': '\u2ac5',
1922 'SubsetEqual;': '\u2286',
1923 'subsetneq;': '\u228a',
1924 'subsetneqq;': '\u2acb',
1925 'subsim;': '\u2ac7',
1926 'subsub;': '\u2ad5',
1927 'subsup;': '\u2ad3',
1929 'succapprox;': '\u2ab8',
1930 'succcurlyeq;': '\u227d',
1931 'Succeeds;': '\u227b',
1932 'SucceedsEqual;': '\u2ab0',
1933 'SucceedsSlantEqual;': '\u227d',
1934 'SucceedsTilde;': '\u227f',
1935 'succeq;': '\u2ab0',
1936 'succnapprox;': '\u2aba',
1937 'succneqq;': '\u2ab6',
1938 'succnsim;': '\u22e9',
1939 'succsim;': '\u227f',
1940 'SuchThat;': '\u220b',
1952 'supdot;': '\u2abe',
1953 'supdsub;': '\u2ad8',
1956 'supedot;': '\u2ac4',
1957 'Superset;': '\u2283',
1958 'SupersetEqual;': '\u2287',
1959 'suphsol;': '\u27c9',
1960 'suphsub;': '\u2ad7',
1961 'suplarr;': '\u297b',
1962 'supmult;': '\u2ac2',
1965 'supplus;': '\u2ac0',
1966 'Supset;': '\u22d1',
1967 'supset;': '\u2283',
1968 'supseteq;': '\u2287',
1969 'supseteqq;': '\u2ac6',
1970 'supsetneq;': '\u228b',
1971 'supsetneqq;': '\u2acc',
1972 'supsim;': '\u2ac8',
1973 'supsub;': '\u2ad4',
1974 'supsup;': '\u2ad6',
1975 'swarhk;': '\u2926',
1978 'swarrow;': '\u2199',
1979 'swnwar;': '\u292a',
1983 'target;': '\u2316',
1987 'Tcaron;': '\u0164',
1988 'tcaron;': '\u0165',
1989 'Tcedil;': '\u0162',
1990 'tcedil;': '\u0163',
1994 'telrec;': '\u2315',
1995 'Tfr;': '\U0001d517',
1996 'tfr;': '\U0001d531',
1997 'there4;': '\u2234',
1998 'Therefore;': '\u2234',
1999 'therefore;': '\u2234',
2002 'thetasym;': '\u03d1',
2003 'thetav;': '\u03d1',
2004 'thickapprox;': '\u2248',
2005 'thicksim;': '\u223c',
2006 'ThickSpace;': '\u205f\u200a',
2007 'thinsp;': '\u2009',
2008 'ThinSpace;': '\u2009',
2010 'thksim;': '\u223c',
2017 'TildeEqual;': '\u2243',
2018 'TildeFullEqual;': '\u2245',
2019 'TildeTilde;': '\u2248',
2022 'timesb;': '\u22a0',
2023 'timesbar;': '\u2a31',
2024 'timesd;': '\u2a30',
2028 'topbot;': '\u2336',
2029 'topcir;': '\u2af1',
2030 'Topf;': '\U0001d54b',
2031 'topf;': '\U0001d565',
2032 'topfork;': '\u2ada',
2034 'tprime;': '\u2034',
2037 'triangle;': '\u25b5',
2038 'triangledown;': '\u25bf',
2039 'triangleleft;': '\u25c3',
2040 'trianglelefteq;': '\u22b4',
2041 'triangleq;': '\u225c',
2042 'triangleright;': '\u25b9',
2043 'trianglerighteq;': '\u22b5',
2044 'tridot;': '\u25ec',
2046 'triminus;': '\u2a3a',
2047 'TripleDot;': '\u20db',
2048 'triplus;': '\u2a39',
2050 'tritime;': '\u2a3b',
2051 'trpezium;': '\u23e2',
2052 'Tscr;': '\U0001d4af',
2053 'tscr;': '\U0001d4c9',
2058 'Tstrok;': '\u0166',
2059 'tstrok;': '\u0167',
2061 'twoheadleftarrow;': '\u219e',
2062 'twoheadrightarrow;': '\u21a0',
2070 'Uarrocir;': '\u2949',
2073 'Ubreve;': '\u016c',
2074 'ubreve;': '\u016d',
2082 'Udblac;': '\u0170',
2083 'udblac;': '\u0171',
2085 'ufisht;': '\u297e',
2086 'Ufr;': '\U0001d518',
2087 'ufr;': '\U0001d532',
2096 'ulcorn;': '\u231c',
2097 'ulcorner;': '\u231c',
2098 'ulcrop;': '\u230f',
2105 'UnderBrace;': '\u23df',
2106 'UnderBracket;': '\u23b5',
2107 'UnderParenthesis;': '\u23dd',
2109 'UnionPlus;': '\u228e',
2112 'Uopf;': '\U0001d54c',
2113 'uopf;': '\U0001d566',
2114 'UpArrow;': '\u2191',
2115 'Uparrow;': '\u21d1',
2116 'uparrow;': '\u2191',
2117 'UpArrowBar;': '\u2912',
2118 'UpArrowDownArrow;': '\u21c5',
2119 'UpDownArrow;': '\u2195',
2120 'Updownarrow;': '\u21d5',
2121 'updownarrow;': '\u2195',
2122 'UpEquilibrium;': '\u296e',
2123 'upharpoonleft;': '\u21bf',
2124 'upharpoonright;': '\u21be',
2126 'UpperLeftArrow;': '\u2196',
2127 'UpperRightArrow;': '\u2197',
2131 'Upsilon;': '\u03a5',
2132 'upsilon;': '\u03c5',
2134 'UpTeeArrow;': '\u21a5',
2135 'upuparrows;': '\u21c8',
2136 'urcorn;': '\u231d',
2137 'urcorner;': '\u231d',
2138 'urcrop;': '\u230e',
2142 'Uscr;': '\U0001d4b0',
2143 'uscr;': '\U0001d4ca',
2145 'Utilde;': '\u0168',
2146 'utilde;': '\u0169',
2154 'uwangle;': '\u29a7',
2155 'vangrt;': '\u299c',
2156 'varepsilon;': '\u03f5',
2157 'varkappa;': '\u03f0',
2158 'varnothing;': '\u2205',
2159 'varphi;': '\u03d5',
2161 'varpropto;': '\u221d',
2164 'varrho;': '\u03f1',
2165 'varsigma;': '\u03c2',
2166 'varsubsetneq;': '\u228a\ufe00',
2167 'varsubsetneqq;': '\u2acb\ufe00',
2168 'varsupsetneq;': '\u228b\ufe00',
2169 'varsupsetneqq;': '\u2acc\ufe00',
2170 'vartheta;': '\u03d1',
2171 'vartriangleleft;': '\u22b2',
2172 'vartriangleright;': '\u22b3',
2182 'Vdashl;': '\u2ae6',
2185 'veebar;': '\u22bb',
2187 'vellip;': '\u22ee',
2188 'Verbar;': '\u2016',
2192 'VerticalBar;': '\u2223',
2193 'VerticalLine;': '|',
2194 'VerticalSeparator;': '\u2758',
2195 'VerticalTilde;': '\u2240',
2196 'VeryThinSpace;': '\u200a',
2197 'Vfr;': '\U0001d519',
2198 'vfr;': '\U0001d533',
2200 'vnsub;': '\u2282\u20d2',
2201 'vnsup;': '\u2283\u20d2',
2202 'Vopf;': '\U0001d54d',
2203 'vopf;': '\U0001d567',
2206 'Vscr;': '\U0001d4b1',
2207 'vscr;': '\U0001d4cb',
2208 'vsubnE;': '\u2acb\ufe00',
2209 'vsubne;': '\u228a\ufe00',
2210 'vsupnE;': '\u2acc\ufe00',
2211 'vsupne;': '\u228b\ufe00',
2212 'Vvdash;': '\u22aa',
2213 'vzigzag;': '\u299a',
2216 'wedbar;': '\u2a5f',
2219 'wedgeq;': '\u2259',
2220 'weierp;': '\u2118',
2221 'Wfr;': '\U0001d51a',
2222 'wfr;': '\U0001d534',
2223 'Wopf;': '\U0001d54e',
2224 'wopf;': '\U0001d568',
2227 'wreath;': '\u2240',
2228 'Wscr;': '\U0001d4b2',
2229 'wscr;': '\U0001d4cc',
2234 'Xfr;': '\U0001d51b',
2235 'xfr;': '\U0001d535',
2245 'Xopf;': '\U0001d54f',
2246 'xopf;': '\U0001d569',
2247 'xoplus;': '\u2a01',
2248 'xotime;': '\u2a02',
2251 'Xscr;': '\U0001d4b3',
2252 'xscr;': '\U0001d4cd',
2253 'xsqcup;': '\u2a06',
2254 'xuplus;': '\u2a04',
2257 'xwedge;': '\u22c0',
2270 'Yfr;': '\U0001d51c',
2271 'yfr;': '\U0001d536',
2274 'Yopf;': '\U0001d550',
2275 'yopf;': '\U0001d56a',
2276 'Yscr;': '\U0001d4b4',
2277 'yscr;': '\U0001d4ce',
2283 'Zacute;': '\u0179',
2284 'zacute;': '\u017a',
2285 'Zcaron;': '\u017d',
2286 'zcaron;': '\u017e',
2291 'zeetrf;': '\u2128',
2292 'ZeroWidthSpace;': '\u200b',
2296 'zfr;': '\U0001d537',
2299 'zigrarr;': '\u21dd',
2301 'zopf;': '\U0001d56b',
2302 'Zscr;': '\U0001d4b5',
2303 'zscr;': '\U0001d4cf',
2309 import http
.client
as compat_http_client
2310 except ImportError: # Python 2
2311 import httplib
as compat_http_client
2314 from urllib
.error
import HTTPError
as compat_HTTPError
2315 except ImportError: # Python 2
2316 from urllib2
import HTTPError
as compat_HTTPError
2319 from urllib
.request
import urlretrieve
as compat_urlretrieve
2320 except ImportError: # Python 2
2321 from urllib
import urlretrieve
as compat_urlretrieve
2324 from html
.parser
import HTMLParser
as compat_HTMLParser
2325 except ImportError: # Python 2
2326 from HTMLParser
import HTMLParser
as compat_HTMLParser
2329 from HTMLParser
import HTMLParseError
as compat_HTMLParseError
2330 except ImportError: # Python <3.4
2332 from html
.parser
import HTMLParseError
as compat_HTMLParseError
2333 except ImportError: # Python >3.4
2335 # HTMLParseError has been deprecated in Python 3.3 and removed in
2336 # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
2337 # and uniform cross-version exceptiong handling
2338 class compat_HTMLParseError(Exception):
2342 from subprocess
import DEVNULL
2343 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
2345 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
2348 import http
.server
as compat_http_server
2350 import BaseHTTPServer
as compat_http_server
2353 compat_str
= unicode # Python 2
2358 from urllib
.parse
import unquote_to_bytes
as compat_urllib_parse_unquote_to_bytes
2359 from urllib
.parse
import unquote
as compat_urllib_parse_unquote
2360 from urllib
.parse
import unquote_plus
as compat_urllib_parse_unquote_plus
2361 except ImportError: # Python 2
2362 _asciire
= (compat_urllib_parse
._asciire
if hasattr(compat_urllib_parse
, '_asciire')
2363 else re
.compile(r
'([\x00-\x7f]+)'))
2365 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2366 # implementations from cpython 3.4.3's stdlib. Python 2's version
2367 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
2369 def compat_urllib_parse_unquote_to_bytes(string
):
2370 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2371 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2372 # unescaped non-ASCII characters, which URIs should not.
2374 # Is it a string-like object?
2377 if isinstance(string
, compat_str
):
2378 string
= string
.encode('utf-8')
2379 bits
= string
.split(b
'%')
2384 for item
in bits
[1:]:
2386 append(compat_urllib_parse
._hextochr
[item
[:2]])
2391 return b
''.join(res
)
2393 def compat_urllib_parse_unquote(string
, encoding
='utf-8', errors
='replace'):
2394 """Replace %xx escapes by their single-character equivalent. The optional
2395 encoding and errors parameters specify how to decode percent-encoded
2396 sequences into Unicode characters, as accepted by the bytes.decode()
2398 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2399 sequences are replaced by a placeholder character.
2401 unquote('abc%20def') -> 'abc def'.
2403 if '%' not in string
:
2406 if encoding
is None:
2410 bits
= _asciire
.split(string
)
2413 for i
in range(1, len(bits
), 2):
2414 append(compat_urllib_parse_unquote_to_bytes(bits
[i
]).decode(encoding
, errors
))
2418 def compat_urllib_parse_unquote_plus(string
, encoding
='utf-8', errors
='replace'):
2419 """Like unquote(), but also replace plus signs by spaces, as required for
2420 unquoting HTML form values.
2422 unquote_plus('%7e/abc+def') -> '~/abc def'
2424 string
= string
.replace('+', ' ')
2425 return compat_urllib_parse_unquote(string
, encoding
, errors
)
2428 from urllib
.parse
import urlencode
as compat_urllib_parse_urlencode
2429 except ImportError: # Python 2
2430 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2431 # Possible solutions are to either port it from python 3 with all
2432 # the friends or manually ensure input query contains only byte strings.
2433 # We will stick with latter thus recursively encoding the whole query.
2434 def compat_urllib_parse_urlencode(query
, doseq
=0, encoding
='utf-8'):
2436 if isinstance(e
, dict):
2438 elif isinstance(e
, (list, tuple,)):
2439 list_e
= encode_list(e
)
2440 e
= tuple(list_e
) if isinstance(e
, tuple) else list_e
2441 elif isinstance(e
, compat_str
):
2442 e
= e
.encode(encoding
)
2446 return dict((encode_elem(k
), encode_elem(v
)) for k
, v
in d
.items())
2449 return [encode_elem(e
) for e
in l
]
2451 return compat_urllib_parse
.urlencode(encode_elem(query
), doseq
=doseq
)
2454 from urllib
.request
import DataHandler
as compat_urllib_request_DataHandler
2455 except ImportError: # Python < 3.4
2456 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2457 class compat_urllib_request_DataHandler(compat_urllib_request
.BaseHandler
):
2458 def data_open(self
, req
):
2459 # data URLs as specified in RFC 2397.
2461 # ignores POSTed data
2464 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2465 # mediatype := [ type "/" subtype ] *( ";" parameter )
2467 # parameter := attribute "=" value
2468 url
= req
.get_full_url()
2470 scheme
, data
= url
.split(':', 1)
2471 mediatype
, data
= data
.split(',', 1)
2473 # even base64 encoded data URLs might be quoted so unquote in any case:
2474 data
= compat_urllib_parse_unquote_to_bytes(data
)
2475 if mediatype
.endswith(';base64'):
2476 data
= binascii
.a2b_base64(data
)
2477 mediatype
= mediatype
[:-7]
2480 mediatype
= 'text/plain;charset=US-ASCII'
2482 headers
= email
.message_from_string(
2483 'Content-type: %s\nContent-length: %d\n' % (mediatype
, len(data
)))
2485 return compat_urllib_response
.addinfourl(io
.BytesIO(data
), headers
, url
)
2488 compat_basestring
= basestring
# Python 2
2490 compat_basestring
= str
2493 compat_chr
= unichr # Python 2
2498 from xml
.etree
.ElementTree
import ParseError
as compat_xml_parse_error
2499 except ImportError: # Python 2.6
2500 from xml
.parsers
.expat
import ExpatError
as compat_xml_parse_error
2503 etree
= xml
.etree
.ElementTree
2506 class _TreeBuilder(etree
.TreeBuilder
):
2507 def doctype(self
, name
, pubid
, system
):
2511 if sys
.version_info
[0] >= 3:
2512 def compat_etree_fromstring(text
):
2513 return etree
.XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder()))
2515 # python 2.x tries to encode unicode strings with ascii (see the
2516 # XMLParser._fixtext method)
2518 _etree_iter
= etree
.Element
.iter
2519 except AttributeError: # Python <=2.6
2520 def _etree_iter(root
):
2521 for el
in root
.findall('*'):
2523 for sub
in _etree_iter(el
):
2526 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2528 def _XML(text
, parser
=None):
2530 parser
= etree
.XMLParser(target
=_TreeBuilder())
2532 return parser
.close()
2534 def _element_factory(*args
, **kwargs
):
2535 el
= etree
.Element(*args
, **kwargs
)
2536 for k
, v
in el
.items():
2537 if isinstance(v
, bytes):
2538 el
.set(k
, v
.decode('utf-8'))
2541 def compat_etree_fromstring(text
):
2542 doc
= _XML(text
, parser
=etree
.XMLParser(target
=_TreeBuilder(element_factory
=_element_factory
)))
2543 for el
in _etree_iter(doc
):
2544 if el
.text
is not None and isinstance(el
.text
, bytes):
2545 el
.text
= el
.text
.decode('utf-8')
2548 if hasattr(etree
, 'register_namespace'):
2549 compat_etree_register_namespace
= etree
.register_namespace
2551 def compat_etree_register_namespace(prefix
, uri
):
2552 """Register a namespace prefix.
2553 The registry is global, and any existing mapping for either the
2554 given prefix or the namespace URI will be removed.
2555 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2556 attributes in this namespace will be serialized with prefix if possible.
2557 ValueError is raised if prefix is reserved or is invalid.
2559 if re
.match(r
"ns\d+$", prefix
):
2560 raise ValueError("Prefix format reserved for internal use")
2561 for k
, v
in list(etree
._namespace
_map
.items()):
2562 if k
== uri
or v
== prefix
:
2563 del etree
._namespace
_map
[k
]
2564 etree
._namespace
_map
[uri
] = prefix
2566 if sys
.version_info
< (2, 7):
2567 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2568 # .//node does not match if a node is a direct child of . !
2569 def compat_xpath(xpath
):
2570 if isinstance(xpath
, compat_str
):
2571 xpath
= xpath
.encode('ascii')
2574 compat_xpath
= lambda xpath
: xpath
2577 from urllib
.parse
import parse_qs
as compat_parse_qs
2578 except ImportError: # Python 2
2579 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2580 # Python 2's version is apparently totally broken
2582 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
2583 encoding
='utf-8', errors
='replace'):
2584 qs
, _coerce_result
= qs
, compat_str
2585 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
2587 for name_value
in pairs
:
2588 if not name_value
and not strict_parsing
:
2590 nv
= name_value
.split('=', 1)
2593 raise ValueError('bad query field: %r' % (name_value
,))
2594 # Handle case of a control-name with no equal sign
2595 if keep_blank_values
:
2599 if len(nv
[1]) or keep_blank_values
:
2600 name
= nv
[0].replace('+', ' ')
2601 name
= compat_urllib_parse_unquote(
2602 name
, encoding
=encoding
, errors
=errors
)
2603 name
= _coerce_result(name
)
2604 value
= nv
[1].replace('+', ' ')
2605 value
= compat_urllib_parse_unquote(
2606 value
, encoding
=encoding
, errors
=errors
)
2607 value
= _coerce_result(value
)
2608 r
.append((name
, value
))
2611 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
2612 encoding
='utf-8', errors
='replace'):
2614 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
2615 encoding
=encoding
, errors
=errors
)
2616 for name
, value
in pairs
:
2617 if name
in parsed_result
:
2618 parsed_result
[name
].append(value
)
2620 parsed_result
[name
] = [value
]
2621 return parsed_result
2624 compat_os_name
= os
._name
if os
.name
== 'java' else os
.name
2627 if compat_os_name
== 'nt':
2628 def compat_shlex_quote(s
):
2629 return s
if re
.match(r
'^[-_\w./]+$', s
) else '"%s"' % s
.replace('"', '\\"')
2632 from shlex
import quote
as compat_shlex_quote
2633 except ImportError: # Python < 3.3
2634 def compat_shlex_quote(s
):
2635 if re
.match(r
'^[-_\w./]+$', s
):
2638 return "'" + s
.replace("'", "'\"'\"'") + "'"
2642 args
= shlex
.split('äøę')
2643 assert (isinstance(args
, list) and
2644 isinstance(args
[0], compat_str
) and
2645 args
[0] == 'äøę')
2646 compat_shlex_split
= shlex
.split
2647 except (AssertionError, UnicodeEncodeError):
2648 # Working around shlex issue with unicode strings on some python 2
2649 # versions (see http://bugs.python.org/issue1548891)
2650 def compat_shlex_split(s
, comments
=False, posix
=True):
2651 if isinstance(s
, compat_str
):
2652 s
= s
.encode('utf-8')
2653 return list(map(lambda s
: s
.decode('utf-8'), shlex
.split(s
, comments
, posix
)))
2663 if sys
.version_info
>= (3, 0):
2664 compat_getenv
= os
.getenv
2665 compat_expanduser
= os
.path
.expanduser
2667 def compat_setenv(key
, value
, env
=os
.environ
):
2670 # Environment variables should be decoded with filesystem encoding.
2671 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2673 def compat_getenv(key
, default
=None):
2674 from .utils
import get_filesystem_encoding
2675 env
= os
.getenv(key
, default
)
2677 env
= env
.decode(get_filesystem_encoding())
2680 def compat_setenv(key
, value
, env
=os
.environ
):
2682 from .utils
import get_filesystem_encoding
2683 return v
.encode(get_filesystem_encoding()) if isinstance(v
, compat_str
) else v
2684 env
[encode(key
)] = encode(value
)
2686 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2687 # environment variables with filesystem encoding. We will work around this by
2688 # providing adjusted implementations.
2689 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2690 # for different platforms with correct environment variables decoding.
2692 if compat_os_name
== 'posix':
2693 def compat_expanduser(path
):
2694 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2696 if not path
.startswith('~'):
2698 i
= path
.find('/', 1)
2702 if 'HOME' not in os
.environ
:
2704 userhome
= pwd
.getpwuid(os
.getuid()).pw_dir
2706 userhome
= compat_getenv('HOME')
2710 pwent
= pwd
.getpwnam(path
[1:i
])
2713 userhome
= pwent
.pw_dir
2714 userhome
= userhome
.rstrip('/')
2715 return (userhome
+ path
[i
:]) or '/'
2716 elif compat_os_name
in ('nt', 'ce'):
2717 def compat_expanduser(path
):
2718 """Expand ~ and ~user constructs.
2720 If user or $HOME is unknown, do nothing."""
2724 while i
< n
and path
[i
] not in '/\\':
2727 if 'HOME' in os
.environ
:
2728 userhome
= compat_getenv('HOME')
2729 elif 'USERPROFILE' in os
.environ
:
2730 userhome
= compat_getenv('USERPROFILE')
2731 elif 'HOMEPATH' not in os
.environ
:
2735 drive
= compat_getenv('HOMEDRIVE')
2738 userhome
= os
.path
.join(drive
, compat_getenv('HOMEPATH'))
2741 userhome
= os
.path
.join(os
.path
.dirname(userhome
), path
[1:i
])
2743 return userhome
+ path
[i
:]
2745 compat_expanduser
= os
.path
.expanduser
2748 if sys
.version_info
< (3, 0):
2749 def compat_print(s
):
2750 from .utils
import preferredencoding
2751 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
2753 def compat_print(s
):
2754 assert isinstance(s
, compat_str
)
2758 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
2759 def compat_getpass(prompt
, *args
, **kwargs
):
2760 if isinstance(prompt
, compat_str
):
2761 from .utils
import preferredencoding
2762 prompt
= prompt
.encode(preferredencoding())
2763 return getpass
.getpass(prompt
, *args
, **kwargs
)
2765 compat_getpass
= getpass
.getpass
2768 compat_input
= raw_input
2769 except NameError: # Python 3
2770 compat_input
= input
2772 # Python < 2.6.5 require kwargs to be bytes
2776 _testfunc(**{'x': 0})
2778 def compat_kwargs(kwargs
):
2779 return dict((bytes(k
), v
) for k
, v
in kwargs
.items())
2781 compat_kwargs
= lambda kwargs
: kwargs
2785 compat_numeric_types
= (int, float, long, complex)
2786 except NameError: # Python 3
2787 compat_numeric_types
= (int, float, complex)
2791 compat_integer_types
= (int, long)
2792 except NameError: # Python 3
2793 compat_integer_types
= (int, )
2796 if sys
.version_info
< (2, 7):
2797 def compat_socket_create_connection(address
, timeout
, source_address
=None):
2798 host
, port
= address
2800 for res
in socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
):
2801 af
, socktype
, proto
, canonname
, sa
= res
2804 sock
= socket
.socket(af
, socktype
, proto
)
2805 sock
.settimeout(timeout
)
2807 sock
.bind(source_address
)
2810 except socket
.error
as _
:
2812 if sock
is not None:
2817 raise socket
.error('getaddrinfo returns an empty list')
2819 compat_socket_create_connection
= socket
.create_connection
2822 # Fix https://github.com/rg3/youtube-dl/issues/4223
2823 # See http://bugs.python.org/issue9161 for what is broken
2824 def workaround_optparse_bug9161():
2825 op
= optparse
.OptionParser()
2826 og
= optparse
.OptionGroup(op
, 'foo')
2830 real_add_option
= optparse
.OptionGroup
.add_option
2832 def _compat_add_option(self
, *args
, **kwargs
):
2834 v
.encode('ascii', 'replace') if isinstance(v
, compat_str
)
2836 bargs
= [enc(a
) for a
in args
]
2838 (k
, enc(v
)) for k
, v
in kwargs
.items())
2839 return real_add_option(self
, *bargs
, **bkwargs
)
2840 optparse
.OptionGroup
.add_option
= _compat_add_option
2843 if hasattr(shutil
, 'get_terminal_size'): # Python >= 3.3
2844 compat_get_terminal_size
= shutil
.get_terminal_size
2846 _terminal_size
= collections
.namedtuple('terminal_size', ['columns', 'lines'])
2848 def compat_get_terminal_size(fallback
=(80, 24)):
2849 columns
= compat_getenv('COLUMNS')
2851 columns
= int(columns
)
2854 lines
= compat_getenv('LINES')
2860 if columns
is None or lines
is None or columns
<= 0 or lines
<= 0:
2862 sp
= subprocess
.Popen(
2864 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2865 out
, err
= sp
.communicate()
2866 _lines
, _columns
= map(int, out
.split())
2868 _columns
, _lines
= _terminal_size(*fallback
)
2870 if columns
is None or columns
<= 0:
2872 if lines
is None or lines
<= 0:
2874 return _terminal_size(columns
, lines
)
2877 itertools
.count(start
=0, step
=1)
2878 compat_itertools_count
= itertools
.count
2879 except TypeError: # Python 2.6
2880 def compat_itertools_count(start
=0, step
=1):
2886 if sys
.version_info
>= (3, 0):
2887 from tokenize
import tokenize
as compat_tokenize_tokenize
2889 from tokenize
import generate_tokens
as compat_tokenize_tokenize
2893 struct
.pack('!I', 0)
2895 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2896 # See https://bugs.python.org/issue19099
2897 def compat_struct_pack(spec
, *args
):
2898 if isinstance(spec
, compat_str
):
2899 spec
= spec
.encode('ascii')
2900 return struct
.pack(spec
, *args
)
2902 def compat_struct_unpack(spec
, *args
):
2903 if isinstance(spec
, compat_str
):
2904 spec
= spec
.encode('ascii')
2905 return struct
.unpack(spec
, *args
)
2907 class compat_Struct(struct
.Struct
):
2908 def __init__(self
, fmt
):
2909 if isinstance(fmt
, compat_str
):
2910 fmt
= fmt
.encode('ascii')
2911 super(compat_Struct
, self
).__init
__(fmt
)
2913 compat_struct_pack
= struct
.pack
2914 compat_struct_unpack
= struct
.unpack
2915 if platform
.python_implementation() == 'IronPython' and sys
.version_info
< (2, 7, 8):
2916 class compat_Struct(struct
.Struct
):
2917 def unpack(self
, string
):
2918 if not isinstance(string
, buffer): # noqa: F821
2919 string
= buffer(string
) # noqa: F821
2920 return super(compat_Struct
, self
).unpack(string
)
2922 compat_Struct
= struct
.Struct
2926 from future_builtins
import zip as compat_zip
2927 except ImportError: # not 2.6+ or is 3.x
2929 from itertools
import izip
as compat_zip
# < 2.5 or 3.x
2934 if sys
.version_info
< (3, 3):
2935 def compat_b64decode(s
, *args
, **kwargs
):
2936 if isinstance(s
, compat_str
):
2937 s
= s
.encode('ascii')
2938 return base64
.b64decode(s
, *args
, **kwargs
)
2940 compat_b64decode
= base64
.b64decode
2943 if platform
.python_implementation() == 'PyPy' and sys
.pypy_version_info
< (5, 4, 0):
2944 # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
2945 # names, see the original PyPy issue [1] and the youtube-dl one [2].
2946 # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
2947 # 2. https://github.com/rg3/youtube-dl/pull/4392
2948 def compat_ctypes_WINFUNCTYPE(*args
, **kwargs
):
2949 real
= ctypes
.WINFUNCTYPE(*args
, **kwargs
)
2951 def resf(tpl
, *args
, **kwargs
):
2953 return real((str(funcname
), dll
), *args
, **kwargs
)
2957 def compat_ctypes_WINFUNCTYPE(*args
, **kwargs
):
2958 return ctypes
.WINFUNCTYPE(*args
, **kwargs
)
2962 'compat_HTMLParseError',
2963 'compat_HTMLParser',
2967 'compat_basestring',
2971 'compat_ctypes_WINFUNCTYPE',
2972 'compat_etree_fromstring',
2973 'compat_etree_register_namespace',
2974 'compat_expanduser',
2975 'compat_get_terminal_size',
2978 'compat_html_entities',
2979 'compat_html_entities_html5',
2980 'compat_http_client',
2981 'compat_http_server',
2983 'compat_integer_types',
2984 'compat_itertools_count',
2986 'compat_numeric_types',
2992 'compat_shlex_quote',
2993 'compat_shlex_split',
2994 'compat_socket_create_connection',
2996 'compat_struct_pack',
2997 'compat_struct_unpack',
2998 'compat_subprocess_get_DEVNULL',
2999 'compat_tokenize_tokenize',
3000 'compat_urllib_error',
3001 'compat_urllib_parse',
3002 'compat_urllib_parse_unquote',
3003 'compat_urllib_parse_unquote_plus',
3004 'compat_urllib_parse_unquote_to_bytes',
3005 'compat_urllib_parse_urlencode',
3006 'compat_urllib_parse_urlparse',
3007 'compat_urllib_request',
3008 'compat_urllib_request_DataHandler',
3009 'compat_urllib_response',
3011 'compat_urlretrieve',
3012 'compat_xml_parse_error',
3015 'workaround_optparse_bug9161',