1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
# Proposed entity definitions for HTML, taken from
# http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_14.html
entitydefs = {
'nbsp': chr(160), # no-break space
'iexcl': chr(161), # inverted exclamation mark
'cent': chr(162), # cent sign
'pound': chr(163), # pound sterling sign
'curren': chr(164), # general currency sign
'yen': chr(165), # yen sign
'brvbar': chr(166), # broken (vertical) bar
'sect': chr(167), # section sign
'uml': chr(168), # umlaut (dieresis)
'copy': chr(169), # copyright sign
'ordf': chr(170), # ordinal indicator, feminine
'laquo': chr(171), # angle quotation mark, left
'not': chr(172), # not sign
'shy': chr(173), # soft hyphen
'reg': chr(174), # registered sign
'macr': chr(175), # macron
'deg': chr(176), # degree sign
'plusmn': chr(177), # plus-or-minus sign
'sup2': chr(178), # superscript two
'sup3': chr(179), # superscript three
'acute': chr(180), # acute accent
'micro': chr(181), # micro sign
'para': chr(182), # pilcrow (paragraph sign)
'middot': chr(183), # middle dot
'cedil': chr(184), # cedilla
'sup1': chr(185), # superscript one
'ordm': chr(186), # ordinal indicator, masculine
'raquo': chr(187), # angle quotation mark, right
'frac14': chr(188), # fraction one-quarter
'frac12': chr(189), # fraction one-half
'frac34': chr(190), # fraction three-quarters
'iquest': chr(191), # inverted question mark
'Agrave': chr(192), # capital A, grave accent
'Aacute': chr(193), # capital A, acute accent
'Acirc': chr(194), # capital A, circumflex accent
'Atilde': chr(195), # capital A, tilde
'Auml': chr(196), # capital A, dieresis or umlaut mark
'Aring': chr(197), # capital A, ring
'AElig': chr(198), # capital AE diphthong (ligature)
'Ccedil': chr(199), # capital C, cedilla
'Egrave': chr(200), # capital E, grave accent
'Eacute': chr(201), # capital E, acute accent
'Ecirc': chr(202), # capital E, circumflex accent
'Euml': chr(203), # capital E, dieresis or umlaut mark
'Igrave': chr(204), # capital I, grave accent
'Iacute': chr(205), # capital I, acute accent
'Icirc': chr(206), # capital I, circumflex accent
'Iuml': chr(207), # capital I, dieresis or umlaut mark
'ETH': chr(208), # capital Eth, Icelandic
'Ntilde': chr(209), # capital N, tilde
'Ograve': chr(210), # capital O, grave accent
'Oacute': chr(211), # capital O, acute accent
'Ocirc': chr(212), # capital O, circumflex accent
'Otilde': chr(213), # capital O, tilde
'Ouml': chr(214), # capital O, dieresis or umlaut mark
'times': chr(215), # multiply sign
'Oslash': chr(216), # capital O, slash
'Ugrave': chr(217), # capital U, grave accent
'Uacute': chr(218), # capital U, acute accent
'Ucirc': chr(219), # capital U, circumflex accent
'Uuml': chr(220), # capital U, dieresis or umlaut mark
'Yacute': chr(221), # capital Y, acute accent
'THORN': chr(222), # capital THORN, Icelandic
'szlig': chr(223), # small sharp s, German (sz ligature)
'agrave': chr(224), # small a, grave accent
'aacute': chr(225), # small a, acute accent
'acirc': chr(226), # small a, circumflex accent
'atilde': chr(227), # small a, tilde
'auml': chr(228), # small a, dieresis or umlaut mark
'aring': chr(229), # small a, ring
'aelig': chr(230), # small ae diphthong (ligature)
'ccedil': chr(231), # small c, cedilla
'egrave': chr(232), # small e, grave accent
'eacute': chr(233), # small e, acute accent
'ecirc': chr(234), # small e, circumflex accent
'euml': chr(235), # small e, dieresis or umlaut mark
'igrave': chr(236), # small i, grave accent
'iacute': chr(237), # small i, acute accent
'icirc': chr(238), # small i, circumflex accent
'iuml': chr(239), # small i, dieresis or umlaut mark
'eth': chr(240), # small eth, Icelandic
'ntilde': chr(241), # small n, tilde
'ograve': chr(242), # small o, grave accent
'oacute': chr(243), # small o, acute accent
'ocirc': chr(244), # small o, circumflex accent
'otilde': chr(245), # small o, tilde
'ouml': chr(246), # small o, dieresis or umlaut mark
'divide': chr(247), # divide sign
'oslash': chr(248), # small o, slash
'ugrave': chr(249), # small u, grave accent
'uacute': chr(250), # small u, acute accent
'ucirc': chr(251), # small u, circumflex accent
'uuml': chr(252), # small u, dieresis or umlaut mark
'yacute': chr(253), # small y, acute accent
'thorn': chr(254), # small thorn, Icelandic
'yuml': chr(255), # small y, dieresis or umlaut mark
}
|