# A parser for SGML, using the derived class as static DTD. # XXX This only supports those SGML features used by HTML. # XXX There should be a way to distinguish between PCDATA (parsed # character data -- the normal case), RCDATA (replaceable character # data -- only char and entity references and end tags are special) # and CDATA (character data -- only end tags are special). import regex import string # Regular expressions used for parsing incomplete = regex.compile( '') commentopen = regex.compile('