diff options
Diffstat (limited to 'Lib/sgmllib.py')
-rw-r--r-- | Lib/sgmllib.py | 22 |
1 files changed, 8 insertions, 14 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index a4f0a8b..b259328 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -30,7 +30,6 @@ shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') piclose = re.compile('>') endbracket = re.compile('[<>]') -commentclose = re.compile(r'--\s*>') tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') attrfind = re.compile( r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' @@ -145,6 +144,10 @@ class SGMLParser(markupbase.ParserBase): break continue if rawdata.startswith("<!--", i): + # Strictly speaking, a comment is --.*-- + # within a declaration tag <!...>. + # This should be removed, + # and comments handled only in parse_declaration. k = self.parse_comment(i) if k < 0: break i = k @@ -202,19 +205,6 @@ class SGMLParser(markupbase.ParserBase): self.rawdata = rawdata[i:] # XXX if end: check for empty stack - # Internal -- parse comment, return length or -1 if not terminated - def parse_comment(self, i, report=1): - rawdata = self.rawdata - if rawdata[i:i+4] != '<!--': - self.error('unexpected call to parse_comment()') - match = commentclose.search(rawdata, i+4) - if not match: - return -1 - if report: - j = match.start(0) - self.handle_comment(rawdata[i+4: j]) - return match.end(0) - # Extensions for the DOCTYPE scanner: _decl_otherchars = '=' @@ -471,6 +461,10 @@ class TestSGMLParser(SGMLParser): self.flush() print '*** unknown char ref: &#' + ref + ';' + def unknown_decl(self, data): + self.flush() + print '*** unknown decl: [' + data + ']' + def close(self): SGMLParser.close(self) self.flush() |