summaryrefslogtreecommitdiffstats
path: root/Doc/tools/sgmlconv
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2001-03-23 16:42:08 (GMT)
committerFred Drake <fdrake@acm.org>2001-03-23 16:42:08 (GMT)
commitf6c115ff2f56e44f437838b283b6669c75fc5970 (patch)
tree5b7a4fc07fa661007d818fb75a8295ec50f7b266 /Doc/tools/sgmlconv
parenta4699a71b89fb3965f4c7b4aefb7ebf722494df2 (diff)
downloadcpython-f6c115ff2f56e44f437838b283b6669c75fc5970.zip
cpython-f6c115ff2f56e44f437838b283b6669c75fc5970.tar.gz
cpython-f6c115ff2f56e44f437838b283b6669c75fc5970.tar.bz2
Re-write to no longer depend on an old version of PyXML. This now
implements a SAX XMLReader interface instead of the old Builder interface used with PyDOM (now obsolete). This only depends on the standard library, not PyXML.
Diffstat (limited to 'Doc/tools/sgmlconv')
-rw-r--r--Doc/tools/sgmlconv/esistools.py319
1 files changed, 273 insertions, 46 deletions
diff --git a/Doc/tools/sgmlconv/esistools.py b/Doc/tools/sgmlconv/esistools.py
index b89476c..40c9121 100644
--- a/Doc/tools/sgmlconv/esistools.py
+++ b/Doc/tools/sgmlconv/esistools.py
@@ -3,26 +3,33 @@ __version__ = '$Revision$'
import re
import string
-import sys
-import xml.dom.core
-import xml.dom.esis_builder
+import xml.dom.pulldom
-_data_rx = re.compile(r"[^\\][^\\]*")
+import xml.sax
+import xml.sax.handler
+import xml.sax.xmlreader
+
+
+_data_match = re.compile(r"[^\\][^\\]*").match
def decode(s):
r = ''
while s:
- m = _data_rx.match(s)
+ m = _data_match(s)
if m:
r = r + m.group()
- s = s[len(m.group()):]
+ s = s[m.end():]
elif s[1] == "\\":
r = r + "\\"
s = s[2:]
elif s[1] == "n":
r = r + "\n"
s = s[2:]
+ elif s[1] == "%":
+ s = s[2:]
+ n, s = s.split(";", 1)
+ r = r + unichr(int(n))
else:
raise ValueError, "can't handle " + `s`
return r
@@ -35,49 +42,269 @@ _charmap["\n"] = r"\n"
_charmap["\\"] = r"\\"
del c
+_null_join = ''.join
def encode(s):
- return string.join(map(_charmap.get, s), '')
+ return _null_join(map(_charmap.get, s))
-class ExtendedEsisBuilder(xml.dom.esis_builder.EsisBuilder):
- def __init__(self, *args, **kw):
- self.__empties = {}
- self.__is_empty = 0
- apply(xml.dom.esis_builder.EsisBuilder.__init__, (self,) + args, kw)
- self.buildFragment()
+class ESISReader(xml.sax.xmlreader.XMLReader):
+ """SAX Reader which reads from an ESIS stream.
- def feed(self, data):
- for line in string.split(data, '\n'):
- if not line:
- break
- event = line[0]
- text = line[1:]
- if event == '(':
- element = self.document.createElement(text, self.attr_store)
- self.attr_store = {}
- self.push(element)
- if self.__is_empty:
- self.__empties[text] = text
- self.__is_empty = 0
- elif event == ')':
- self.pop()
- elif event == 'A':
- l = re.split(' ', text, 2)
- name = l[0]
- value = decode(l[2])
- self.attr_store[name] = value
- elif event == '-':
- text = self.document.createText(decode(text))
- self.push(text)
- elif event == 'C':
- return
- elif event == 'e':
- self.__is_empty = 1
- elif event == '&':
- eref = self.document.createEntityReference(text)
- self.push(eref)
- else:
- sys.stderr.write('Unknown event: %s\n' % line)
+ No verification of the document structure is performed by the
+ reader; a general verifier could be used as the target
+ ContentHandler instance.
+
+ """
+ _decl_handler = None
+ _lexical_handler = None
+
+ _public_id = None
+ _system_id = None
+
+ _buffer = ""
+ _is_empty = 0
+ _lineno = 0
+ _started = 0
+
+ def __init__(self, contentHandler=None, errorHandler=None):
+ xml.sax.xmlreader.XMLReader.__init__(self)
+ self._attrs = {}
+ self._attributes = Attributes(self._attrs)
+ self._locator = Locator()
+ self._empties = {}
+ if contentHandler:
+ self.setContentHandler(contentHandler)
+ if errorHandler:
+ self.setErrorHandler(errorHandler)
def get_empties(self):
- return self.__empties.keys()
+ return self._empties.keys()
+
+ #
+ # XMLReader interface
+ #
+
+ def parse(self, source):
+ raise RuntimeError
+ self._locator._public_id = source.getPublicId()
+ self._locator._system_id = source.getSystemId()
+ fp = source.getByteStream()
+ handler = self.getContentHandler()
+ if handler:
+ handler.startDocument()
+ lineno = 0
+ while 1:
+ token, data = self._get_token(fp)
+ if token is None:
+ break
+ lineno = lineno + 1
+ self._locator._lineno = lineno
+ self._handle_token(token, data)
+ handler = self.getContentHandler()
+ if handler:
+ handler.startDocument()
+
+ def feed(self, data):
+ if not self._started:
+ handler = self.getContentHandler()
+ if handler:
+ handler.startDocument()
+ self._started = 1
+ data = self._buffer + data
+ self._buffer = None
+ lines = data.split("\n")
+ if lines:
+ for line in lines[:-1]:
+ self._lineno = self._lineno + 1
+ self._locator._lineno = self._lineno
+ if not line:
+ e = xml.sax.SAXParseException(
+ "ESIS input line contains no token type mark",
+ None, self._locator)
+ self.getErrorHandler().error(e)
+ else:
+ self._handle_token(line[0], line[1:])
+ self._buffer = lines[-1]
+ else:
+ self._buffer = ""
+
+ def close(self):
+ handler = self.getContentHandler()
+ if handler:
+ handler.endDocument()
+ self._buffer = ""
+
+ def _get_token(self, fp):
+ try:
+ line = fp.readline()
+ except IOError, e:
+ e = SAXException("I/O error reading input stream", e)
+ self.getErrorHandler().fatalError(e)
+ return
+ if not line:
+ return None, None
+ if line[-1] == "\n":
+ line = line[:-1]
+ if not line:
+ e = xml.sax.SAXParseException(
+ "ESIS input line contains no token type mark",
+ None, self._locator)
+ self.getErrorHandler().error(e)
+ return
+ return line[0], line[1:]
+
+ def _handle_token(self, token, data):
+ handler = self.getContentHandler()
+ if token == '-':
+ if data and handler:
+ handler.characters(decode(data))
+ elif token == ')':
+ if handler:
+ handler.endElement(decode(data))
+ elif token == '(':
+ if self._is_empty:
+ self._empties[data] = 1
+ if handler:
+ handler.startElement(data, self._attributes)
+ self._attrs.clear()
+ self._is_empty = 0
+ elif token == 'A':
+ name, value = data.split(' ', 1)
+ if value != "IMPLIED":
+ type, value = value.split(' ', 1)
+ self._attrs[name] = (decode(value), type)
+ elif token == '&':
+ # entity reference in SAX?
+ pass
+ elif token == '?':
+ if handler:
+ if ' ' in data:
+ target, data = string.split(data, None, 1)
+ else:
+ target, data = data, ""
+ handler.processingInstruction(target, decode(data))
+ elif token == 'N':
+ handler = self.getDTDHandler()
+ if handler:
+ handler.notationDecl(data, self._public_id, self._system_id)
+ self._public_id = None
+ self._system_id = None
+ elif token == 'p':
+ self._public_id = decode(data)
+ elif token == 's':
+ self._system_id = decode(data)
+ elif token == 'e':
+ self._is_empty = 1
+ elif token == 'C':
+ pass
+ else:
+ e = SAXParseException("unknown ESIS token in event stream",
+ None, self._locator)
+ self.getErrorHandler().error(e)
+
+ def setContentHandler(self, handler):
+ old = self.getContentHandler()
+ if old:
+ old.setDocumentLocator(None)
+ if handler:
+ handler.setDocumentLocator(self._locator)
+ xml.sax.xmlreader.XMLReader.setContentHandler(self, handler)
+
+ def getProperty(self, property):
+ if property == xml.sax.handler.property_lexical_handler:
+ return self._lexical_handler
+
+ elif property == xml.sax.handler.property_declaration_handler:
+ return self._decl_handler
+
+ else:
+ raise xml.sax.SAXNotRecognizedException("unknown property %s"
+ % `property`)
+
+ def setProperty(self, property, value):
+ if property == xml.sax.handler.property_lexical_handler:
+ if self._lexical_handler:
+ self._lexical_handler.setDocumentLocator(None)
+ if value:
+ value.setDocumentLocator(self._locator)
+ self._lexical_handler = value
+
+ elif property == xml.sax.handler.property_declaration_handler:
+ if self._decl_handler:
+ self._decl_handler.setDocumentLocator(None)
+ if value:
+ value.setDocumentLocator(self._locator)
+ self._decl_handler = value
+
+ else:
+ raise xml.sax.SAXNotRecognizedException()
+
+ def getFeature(self, feature):
+ if feature == xml.sax.handler.feature_namespaces:
+ return 1
+ else:
+ return xml.sax.xmlreader.XMLReader.getFeature(self, feature)
+
+ def setFeature(self, feature, enabled):
+ if feature == xml.sax.handler.feature_namespaces:
+ pass
+ else:
+ xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled)
+
+
+class Attributes(xml.sax.xmlreader.AttributesImpl):
+ # self._attrs has the form {name: (value, type)}
+
+ def getType(self, name):
+ return self._attrs[name][1]
+
+ def getValue(self, name):
+ return self._attrs[name][0]
+
+ def getValueByQName(self, name):
+ return self._attrs[name][0]
+
+ def __getitem__(self, name):
+ return self._attrs[name][0]
+
+ def get(self, name, default=None):
+ if self._attrs.has_key(name):
+ return self._attrs[name][0]
+ return default
+
+ def items(self):
+ L = []
+ for name, (value, type) in self._attrs.items():
+ L.append((name, value))
+ return L
+
+ def values(self):
+ L = []
+ for value, type in self._attrs.values():
+ L.append(value)
+ return L
+
+
+class Locator(xml.sax.xmlreader.Locator):
+ _lineno = -1
+ _public_id = None
+ _system_id = None
+
+ def getLineNumber(self):
+ return self._lineno
+
+ def getPublicId(self):
+ return self._public_id
+
+ def getSystemId(self):
+ return self._system_id
+
+
+def parse(stream_or_string, parser=None):
+ if type(stream_or_string) in [type(""), type(u"")]:
+ stream = open(stream_or_string)
+ else:
+ stream = stream_or_string
+ if not parser:
+ parser = ESISReader()
+ return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20)