From c6730e1772f52b99feaffb95cdb7774905340d52 Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Wed, 14 Dec 2005 06:20:35 +0000 Subject: move the xml package implementation to xmlcore, and adjust the tests to test that package, not the xmlcore/PyXML switcheroo fiasco in the xml module/package --- Lib/test/test_minidom.py | 92 +- Lib/test/test_sax.py | 26 +- Lib/test/test_xml_etree.py | 20 +- Lib/xml.py | 47 + Lib/xml/__init__.py | 47 - Lib/xml/dom/NodeFilter.py | 27 - Lib/xml/dom/__init__.py | 139 --- Lib/xml/dom/domreg.py | 99 -- Lib/xml/dom/expatbuilder.py | 983 ------------------ Lib/xml/dom/minicompat.py | 184 ---- Lib/xml/dom/minidom.py | 1938 ----------------------------------- Lib/xml/dom/pulldom.py | 351 ------- Lib/xml/dom/xmlbuilder.py | 388 ------- Lib/xml/etree/ElementInclude.py | 141 --- Lib/xml/etree/ElementPath.py | 196 ---- Lib/xml/etree/ElementTree.py | 1254 ----------------------- Lib/xml/etree/__init__.py | 30 - Lib/xml/parsers/__init__.py | 8 - Lib/xml/parsers/expat.py | 4 - Lib/xml/sax/__init__.py | 108 -- Lib/xml/sax/_exceptions.py | 131 --- Lib/xml/sax/expatreader.py | 414 -------- Lib/xml/sax/handler.py | 342 ------- Lib/xml/sax/saxutils.py | 297 ------ Lib/xml/sax/xmlreader.py | 381 ------- Lib/xmlcore/dom/NodeFilter.py | 27 + Lib/xmlcore/dom/__init__.py | 139 +++ Lib/xmlcore/dom/domreg.py | 99 ++ Lib/xmlcore/dom/expatbuilder.py | 983 ++++++++++++++++++ Lib/xmlcore/dom/minicompat.py | 184 ++++ Lib/xmlcore/dom/minidom.py | 1938 +++++++++++++++++++++++++++++++++++ Lib/xmlcore/dom/pulldom.py | 351 +++++++ Lib/xmlcore/dom/xmlbuilder.py | 388 +++++++ Lib/xmlcore/etree/ElementInclude.py | 141 +++ Lib/xmlcore/etree/ElementPath.py | 196 ++++ Lib/xmlcore/etree/ElementTree.py | 1254 +++++++++++++++++++++++ Lib/xmlcore/etree/__init__.py | 30 + Lib/xmlcore/parsers/__init__.py | 8 + Lib/xmlcore/parsers/expat.py | 4 + Lib/xmlcore/sax/__init__.py | 108 ++ Lib/xmlcore/sax/_exceptions.py | 131 +++ Lib/xmlcore/sax/expatreader.py | 414 ++++++++ Lib/xmlcore/sax/handler.py | 342 +++++++ Lib/xmlcore/sax/saxutils.py | 297 ++++++ Lib/xmlcore/sax/xmlreader.py | 381 +++++++ 45 files changed, 7532 insertions(+), 7530 deletions(-) create mode 100644 Lib/xml.py delete mode 100644 Lib/xml/__init__.py delete mode 100644 Lib/xml/dom/NodeFilter.py delete mode 100644 Lib/xml/dom/__init__.py delete mode 100644 Lib/xml/dom/domreg.py delete mode 100644 Lib/xml/dom/expatbuilder.py delete mode 100644 Lib/xml/dom/minicompat.py delete mode 100644 Lib/xml/dom/minidom.py delete mode 100644 Lib/xml/dom/pulldom.py delete mode 100644 Lib/xml/dom/xmlbuilder.py delete mode 100644 Lib/xml/etree/ElementInclude.py delete mode 100644 Lib/xml/etree/ElementPath.py delete mode 100644 Lib/xml/etree/ElementTree.py delete mode 100644 Lib/xml/etree/__init__.py delete mode 100644 Lib/xml/parsers/__init__.py delete mode 100644 Lib/xml/parsers/expat.py delete mode 100644 Lib/xml/sax/__init__.py delete mode 100644 Lib/xml/sax/_exceptions.py delete mode 100644 Lib/xml/sax/expatreader.py delete mode 100644 Lib/xml/sax/handler.py delete mode 100644 Lib/xml/sax/saxutils.py delete mode 100644 Lib/xml/sax/xmlreader.py create mode 100644 Lib/xmlcore/dom/NodeFilter.py create mode 100644 Lib/xmlcore/dom/__init__.py create mode 100644 Lib/xmlcore/dom/domreg.py create mode 100644 Lib/xmlcore/dom/expatbuilder.py create mode 100644 Lib/xmlcore/dom/minicompat.py create mode 100644 Lib/xmlcore/dom/minidom.py create mode 100644 Lib/xmlcore/dom/pulldom.py create mode 100644 Lib/xmlcore/dom/xmlbuilder.py create mode 100644 Lib/xmlcore/etree/ElementInclude.py create mode 100644 Lib/xmlcore/etree/ElementPath.py create mode 100644 Lib/xmlcore/etree/ElementTree.py create mode 100644 Lib/xmlcore/etree/__init__.py create mode 100644 Lib/xmlcore/parsers/__init__.py create mode 100644 Lib/xmlcore/parsers/expat.py create mode 100644 Lib/xmlcore/sax/__init__.py create mode 100644 Lib/xmlcore/sax/_exceptions.py create mode 100644 Lib/xmlcore/sax/expatreader.py create mode 100644 Lib/xmlcore/sax/handler.py create mode 100644 Lib/xmlcore/sax/saxutils.py create mode 100644 Lib/xmlcore/sax/xmlreader.py diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 68aac0f..2510a41 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1,4 +1,4 @@ -# test for xml.dom.minidom +# test for xmlcore.dom.minidom import os import sys @@ -7,12 +7,12 @@ import traceback from StringIO import StringIO from test.test_support import verbose -import xml.dom -import xml.dom.minidom -import xml.parsers.expat +import xmlcore.dom +import xmlcore.dom.minidom +import xmlcore.parsers.expat -from xml.dom.minidom import parse, Node, Document, parseString -from xml.dom.minidom import getDOMImplementation +from xmlcore.dom.minidom import parse, Node, Document, parseString +from xmlcore.dom.minidom import getDOMImplementation if __name__ == "__main__": @@ -138,29 +138,29 @@ def testLegalChildren(): text = dom.createTextNode('text') try: dom.appendChild(text) - except xml.dom.HierarchyRequestErr: pass + except xmlcore.dom.HierarchyRequestErr: pass else: print "dom.appendChild didn't raise HierarchyRequestErr" dom.appendChild(elem) try: dom.insertBefore(text, elem) - except xml.dom.HierarchyRequestErr: pass + except xmlcore.dom.HierarchyRequestErr: pass else: print "dom.appendChild didn't raise HierarchyRequestErr" try: dom.replaceChild(text, elem) - except xml.dom.HierarchyRequestErr: pass + except xmlcore.dom.HierarchyRequestErr: pass else: print "dom.appendChild didn't raise HierarchyRequestErr" nodemap = elem.attributes try: nodemap.setNamedItem(text) - except xml.dom.HierarchyRequestErr: pass + except xmlcore.dom.HierarchyRequestErr: pass else: print "NamedNodeMap.setNamedItem didn't raise HierarchyRequestErr" try: nodemap.setNamedItemNS(text) - except xml.dom.HierarchyRequestErr: pass + except xmlcore.dom.HierarchyRequestErr: pass else: print "NamedNodeMap.setNamedItemNS didn't raise HierarchyRequestErr" @@ -439,7 +439,7 @@ def testProcessingInstruction(): and pi.firstChild is None and pi.lastChild is None and pi.localName is None - and pi.namespaceURI == xml.dom.EMPTY_NAMESPACE) + and pi.namespaceURI == xmlcore.dom.EMPTY_NAMESPACE) def testProcessingInstructionRepr(): pass @@ -454,7 +454,7 @@ def testTooManyDocumentElements(): elem = doc.createElement("extra") try: doc.appendChild(elem) - except xml.dom.HierarchyRequestErr: + except xmlcore.dom.HierarchyRequestErr: pass else: print "Failed to catch expected exception when" \ @@ -491,7 +491,7 @@ def testRemoveNamedItem(): confirm(a1.isSameNode(a2)) try: attrs.removeNamedItem("a") - except xml.dom.NotFoundErr: + except xmlcore.dom.NotFoundErr: pass def testRemoveNamedItemNS(): @@ -503,7 +503,7 @@ def testRemoveNamedItemNS(): confirm(a1.isSameNode(a2)) try: attrs.removeNamedItemNS("http://xml.python.org/", "b") - except xml.dom.NotFoundErr: + except xmlcore.dom.NotFoundErr: pass def testAttrListValues(): pass @@ -682,7 +682,7 @@ def check_import_document(deep, testName): doc2 = parseString("") try: doc1.importNode(doc2, deep) - except xml.dom.NotSupportedErr: + except xmlcore.dom.NotSupportedErr: pass else: raise Exception(testName + @@ -705,12 +705,14 @@ def create_nonempty_doctype(): doctype = getDOMImplementation().createDocumentType("doc", None, None) doctype.entities._seq = [] doctype.notations._seq = [] - notation = xml.dom.minidom.Notation("my-notation", None, - "http://xml.python.org/notations/my") + notation = xmlcore.dom.minidom.Notation( + "my-notation", None, + "http://xml.python.org/notations/my") doctype.notations._seq.append(notation) - entity = xml.dom.minidom.Entity("my-entity", None, - "http://xml.python.org/entities/my", - "my-notation") + entity = xmlcore.dom.minidom.Entity( + "my-entity", None, + "http://xml.python.org/entities/my", + "my-notation") entity.version = "1.0" entity.encoding = "utf-8" entity.actualEncoding = "us-ascii" @@ -729,7 +731,7 @@ def testImportDocumentTypeShallow(): target = create_doc_without_doctype() try: imported = target.importNode(src.doctype, 0) - except xml.dom.NotSupportedErr: + except xmlcore.dom.NotSupportedErr: pass else: raise Exception( @@ -740,7 +742,7 @@ def testImportDocumentTypeDeep(): target = create_doc_without_doctype() try: imported = target.importNode(src.doctype, 1) - except xml.dom.NotSupportedErr: + except xmlcore.dom.NotSupportedErr: pass else: raise Exception( @@ -848,7 +850,7 @@ def testNodeListItem(): doc.unlink() def testSAX2DOM(): - from xml.dom import pulldom + from xmlcore.dom import pulldom sax2dom = pulldom.SAX2DOM() sax2dom.startDocument() @@ -938,11 +940,11 @@ def testRenameAttribute(): attr = elem.attributes['a'] # Simple renaming - attr = doc.renameNode(attr, xml.dom.EMPTY_NAMESPACE, "b") + attr = doc.renameNode(attr, xmlcore.dom.EMPTY_NAMESPACE, "b") confirm(attr.name == "b" and attr.nodeName == "b" and attr.localName is None - and attr.namespaceURI == xml.dom.EMPTY_NAMESPACE + and attr.namespaceURI == xmlcore.dom.EMPTY_NAMESPACE and attr.prefix is None and attr.value == "v" and elem.getAttributeNode("a") is None @@ -987,11 +989,11 @@ def testRenameAttribute(): and attrmap[("http://xml.python.org/ns2", "d")].isSameNode(attr)) # Rename back to a simple non-NS node - attr = doc.renameNode(attr, xml.dom.EMPTY_NAMESPACE, "e") + attr = doc.renameNode(attr, xmlcore.dom.EMPTY_NAMESPACE, "e") confirm(attr.name == "e" and attr.nodeName == "e" and attr.localName is None - and attr.namespaceURI == xml.dom.EMPTY_NAMESPACE + and attr.namespaceURI == xmlcore.dom.EMPTY_NAMESPACE and attr.prefix is None and attr.value == "v" and elem.getAttributeNode("a") is None @@ -1005,7 +1007,7 @@ def testRenameAttribute(): try: doc.renameNode(attr, "http://xml.python.org/ns", "xmlns") - except xml.dom.NamespaceErr: + except xmlcore.dom.NamespaceErr: pass else: print "expected NamespaceErr" @@ -1018,11 +1020,11 @@ def testRenameElement(): elem = doc.documentElement # Simple renaming - elem = doc.renameNode(elem, xml.dom.EMPTY_NAMESPACE, "a") + elem = doc.renameNode(elem, xmlcore.dom.EMPTY_NAMESPACE, "a") confirm(elem.tagName == "a" and elem.nodeName == "a" and elem.localName is None - and elem.namespaceURI == xml.dom.EMPTY_NAMESPACE + and elem.namespaceURI == xmlcore.dom.EMPTY_NAMESPACE and elem.prefix is None and elem.ownerDocument.isSameNode(doc)) @@ -1045,11 +1047,11 @@ def testRenameElement(): and elem.ownerDocument.isSameNode(doc)) # Rename back to a simple non-NS node - elem = doc.renameNode(elem, xml.dom.EMPTY_NAMESPACE, "d") + elem = doc.renameNode(elem, xmlcore.dom.EMPTY_NAMESPACE, "d") confirm(elem.tagName == "d" and elem.nodeName == "d" and elem.localName is None - and elem.namespaceURI == xml.dom.EMPTY_NAMESPACE + and elem.namespaceURI == xmlcore.dom.EMPTY_NAMESPACE and elem.prefix is None and elem.ownerDocument.isSameNode(doc)) @@ -1060,15 +1062,15 @@ def checkRenameNodeSharedConstraints(doc, node): # Make sure illegal NS usage is detected: try: doc.renameNode(node, "http://xml.python.org/ns", "xmlns:foo") - except xml.dom.NamespaceErr: + except xmlcore.dom.NamespaceErr: pass else: print "expected NamespaceErr" doc2 = parseString("") try: - doc2.renameNode(node, xml.dom.EMPTY_NAMESPACE, "foo") - except xml.dom.WrongDocumentErr: + doc2.renameNode(node, xmlcore.dom.EMPTY_NAMESPACE, "foo") + except xmlcore.dom.WrongDocumentErr: pass else: print "expected WrongDocumentErr" @@ -1076,12 +1078,12 @@ def checkRenameNodeSharedConstraints(doc, node): def testRenameOther(): # We have to create a comment node explicitly since not all DOM # builders used with minidom add comments to the DOM. - doc = xml.dom.minidom.getDOMImplementation().createDocument( - xml.dom.EMPTY_NAMESPACE, "e", None) + doc = xmlcore.dom.minidom.getDOMImplementation().createDocument( + xmlcore.dom.EMPTY_NAMESPACE, "e", None) node = doc.createComment("comment") try: - doc.renameNode(node, xml.dom.EMPTY_NAMESPACE, "foo") - except xml.dom.NotSupportedErr: + doc.renameNode(node, xmlcore.dom.EMPTY_NAMESPACE, "foo") + except xmlcore.dom.NotSupportedErr: pass else: print "expected NotSupportedErr when renaming comment node" @@ -1192,13 +1194,13 @@ def testSchemaType(): # since each supports a different level of DTD information. t = elem.schemaType confirm(t.name is None - and t.namespace == xml.dom.EMPTY_NAMESPACE) + and t.namespace == xmlcore.dom.EMPTY_NAMESPACE) names = "id notid text enum ref refs ent ents nm nms".split() for name in names: a = elem.getAttributeNode(name) t = a.schemaType confirm(hasattr(t, "name") - and t.namespace == xml.dom.EMPTY_NAMESPACE) + and t.namespace == xmlcore.dom.EMPTY_NAMESPACE) def testSetIdAttribute(): doc = parseString("") @@ -1227,7 +1229,7 @@ def testSetIdAttribute(): and a2.isId and not a3.isId) # renaming an attribute should not affect its ID-ness: - doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an") + doc.renameNode(a2, xmlcore.dom.EMPTY_NAMESPACE, "an") confirm(e.isSameNode(doc.getElementById("w")) and a2.isId) @@ -1263,7 +1265,7 @@ def testSetIdAttributeNS(): confirm(not a3.isId) confirm(doc.getElementById("v") is None) # renaming an attribute should not affect its ID-ness: - doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an") + doc.renameNode(a2, xmlcore.dom.EMPTY_NAMESPACE, "an") confirm(e.isSameNode(doc.getElementById("w")) and a2.isId) @@ -1299,7 +1301,7 @@ def testSetIdAttributeNode(): confirm(not a3.isId) confirm(doc.getElementById("v") is None) # renaming an attribute should not affect its ID-ness: - doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an") + doc.renameNode(a2, xmlcore.dom.EMPTY_NAMESPACE, "an") confirm(e.isSameNode(doc.getElementById("w")) and a2.isId) diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 210ca19..ded81fb 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -1,17 +1,17 @@ # regression test for SAX 2.0 -*- coding: iso-8859-1 -*- # $Id$ -from xml.sax import make_parser, ContentHandler, \ - SAXException, SAXReaderNotAvailable, SAXParseException +from xmlcore.sax import make_parser, ContentHandler, \ + SAXException, SAXReaderNotAvailable, SAXParseException try: make_parser() except SAXReaderNotAvailable: # don't try to test this module if we cannot create a parser raise ImportError("no XML parsers available") -from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ - XMLFilterBase -from xml.sax.expatreader import create_parser -from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl +from xmlcore.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ + XMLFilterBase +from xmlcore.sax.expatreader import create_parser +from xmlcore.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from cStringIO import StringIO from test.test_support import verify, verbose, TestFailed, findfile import os @@ -36,17 +36,17 @@ def test_make_parser2(): # Creating parsers several times in a row should succeed. # Testing this because there have been failures of this kind # before. - from xml.sax import make_parser + from xmlcore.sax import make_parser p = make_parser() - from xml.sax import make_parser + from xmlcore.sax import make_parser p = make_parser() - from xml.sax import make_parser + from xmlcore.sax import make_parser p = make_parser() - from xml.sax import make_parser + from xmlcore.sax import make_parser p = make_parser() - from xml.sax import make_parser + from xmlcore.sax import make_parser p = make_parser() - from xml.sax import make_parser + from xmlcore.sax import make_parser p = make_parser() except: return 0 @@ -108,7 +108,7 @@ def test_make_parser(): try: # Creating a parser should succeed - it should fall back # to the expatreader - p = make_parser(['xml.parsers.no_such_parser']) + p = make_parser(['xmlcore.parsers.no_such_parser']) except: return 0 else: diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 1ceb5f1..94f6b31 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1,4 +1,4 @@ -# xml.etree test. This file contains enough tests to make sure that +# xmlcore.etree test. This file contains enough tests to make sure that # all included components work as they should. For a more extensive # test suite, see the selftest script in the ElementTree distribution. @@ -30,9 +30,9 @@ def sanity(): """ Import sanity. - >>> from xml.etree import ElementTree - >>> from xml.etree import ElementInclude - >>> from xml.etree import ElementPath + >>> from xmlcore.etree import ElementTree + >>> from xmlcore.etree import ElementInclude + >>> from xmlcore.etree import ElementPath """ def check_method(method): @@ -59,7 +59,7 @@ def interface(): """ Test element tree interface. - >>> from xml.etree import ElementTree as ET + >>> from xmlcore.etree import ElementTree as ET >>> element = ET.Element("tag", key="value") >>> tree = ET.ElementTree(element) @@ -108,7 +108,7 @@ def find(): """ Test find methods (including xpath syntax). - >>> from xml.etree import ElementTree as ET + >>> from xmlcore.etree import ElementTree as ET >>> elem = ET.XML(SAMPLE_XML) >>> elem.find("tag").tag @@ -176,7 +176,7 @@ def find(): def parseliteral(): r""" - >>> from xml.etree import ElementTree as ET + >>> from xmlcore.etree import ElementTree as ET >>> element = ET.XML("text") >>> ET.ElementTree(element).write(sys.stdout) @@ -273,7 +273,7 @@ def xinclude_loader(href, parse="xml", encoding=None): except KeyError: raise IOError("resource not found") if parse == "xml": - from xml.etree.ElementTree import XML + from xmlcore.etree.ElementTree import XML return XML(data) return data @@ -281,8 +281,8 @@ def xinclude(): r""" Basic inclusion example (XInclude C.1) - >>> from xml.etree import ElementTree as ET - >>> from xml.etree import ElementInclude + >>> from xmlcore.etree import ElementTree as ET + >>> from xmlcore.etree import ElementInclude >>> document = xinclude_loader("C1.xml") >>> ElementInclude.include(document, xinclude_loader) diff --git a/Lib/xml.py b/Lib/xml.py new file mode 100644 index 0000000..7393c66 --- /dev/null +++ b/Lib/xml.py @@ -0,0 +1,47 @@ +"""Core XML support for Python. + +This package contains four sub-packages: + +dom -- The W3C Document Object Model. This supports DOM Level 1 + + Namespaces. + +parsers -- Python wrappers for XML parsers (currently only supports Expat). + +sax -- The Simple API for XML, developed by XML-Dev, led by David + Megginson and ported to Python by Lars Marius Garshol. This + supports the SAX 2 API. + +etree -- The ElementTree XML library. This is a subset of the full + ElementTree XML release. + +""" + +import sys +import xmlcore + +__all__ = ["dom", "parsers", "sax", "etree"] + +# When being checked-out without options, this has the form +# "Revision: x.y " +# When exported using -kv, it is "x.y". +__version__ = "$Revision$".split()[-2:][0] + + +_MINIMUM_XMLPLUS_VERSION = (0, 8, 4) + +try: + import _xmlplus +except ImportError: + sys.modules[__name__] = xmlcore +else: + try: + v = _xmlplus.version_info + except AttributeError: + # _xmlplus is too old; ignore it + pass + else: + if v >= _MINIMUM_XMLPLUS_VERSION: + _xmlplus.__path__.extend(xmlcore.__path__) + sys.modules[__name__] = _xmlplus + else: + del v diff --git a/Lib/xml/__init__.py b/Lib/xml/__init__.py deleted file mode 100644 index fa5e8cd..0000000 --- a/Lib/xml/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Core XML support for Python. - -This package contains four sub-packages: - -dom -- The W3C Document Object Model. This supports DOM Level 1 + - Namespaces. - -parsers -- Python wrappers for XML parsers (currently only supports Expat). - -sax -- The Simple API for XML, developed by XML-Dev, led by David - Megginson and ported to Python by Lars Marius Garshol. This - supports the SAX 2 API. - -etree -- The ElementTree XML library. This is a subset of the full - ElementTree XML release. - -""" - - -__all__ = ["dom", "parsers", "sax", "etree"] - -# When being checked-out without options, this has the form -# "Revision: x.y " -# When exported using -kv, it is "x.y". -__version__ = "$Revision$".split()[-2:][0] - - -_MINIMUM_XMLPLUS_VERSION = (0, 8, 4) - - -try: - import _xmlplus -except ImportError: - pass -else: - try: - v = _xmlplus.version_info - except AttributeError: - # _xmlplus is too old; ignore it - pass - else: - if v >= _MINIMUM_XMLPLUS_VERSION: - import sys - _xmlplus.__path__.extend(__path__) - sys.modules[__name__] = _xmlplus - else: - del v diff --git a/Lib/xml/dom/NodeFilter.py b/Lib/xml/dom/NodeFilter.py deleted file mode 100644 index fc05245..0000000 --- a/Lib/xml/dom/NodeFilter.py +++ /dev/null @@ -1,27 +0,0 @@ -# This is the Python mapping for interface NodeFilter from -# DOM2-Traversal-Range. It contains only constants. - -class NodeFilter: - """ - This is the DOM2 NodeFilter interface. It contains only constants. - """ - FILTER_ACCEPT = 1 - FILTER_REJECT = 2 - FILTER_SKIP = 3 - - SHOW_ALL = 0xFFFFFFFFL - SHOW_ELEMENT = 0x00000001 - SHOW_ATTRIBUTE = 0x00000002 - SHOW_TEXT = 0x00000004 - SHOW_CDATA_SECTION = 0x00000008 - SHOW_ENTITY_REFERENCE = 0x00000010 - SHOW_ENTITY = 0x00000020 - SHOW_PROCESSING_INSTRUCTION = 0x00000040 - SHOW_COMMENT = 0x00000080 - SHOW_DOCUMENT = 0x00000100 - SHOW_DOCUMENT_TYPE = 0x00000200 - SHOW_DOCUMENT_FRAGMENT = 0x00000400 - SHOW_NOTATION = 0x00000800 - - def acceptNode(self, node): - raise NotImplementedError diff --git a/Lib/xml/dom/__init__.py b/Lib/xml/dom/__init__.py deleted file mode 100644 index 6363d00..0000000 --- a/Lib/xml/dom/__init__.py +++ /dev/null @@ -1,139 +0,0 @@ -"""W3C Document Object Model implementation for Python. - -The Python mapping of the Document Object Model is documented in the -Python Library Reference in the section on the xml.dom package. - -This package contains the following modules: - -minidom -- A simple implementation of the Level 1 DOM with namespace - support added (based on the Level 2 specification) and other - minor Level 2 functionality. - -pulldom -- DOM builder supporting on-demand tree-building for selected - subtrees of the document. - -""" - - -class Node: - """Class giving the NodeType constants.""" - - # DOM implementations may use this as a base class for their own - # Node implementations. If they don't, the constants defined here - # should still be used as the canonical definitions as they match - # the values given in the W3C recommendation. Client code can - # safely refer to these values in all tests of Node.nodeType - # values. - - ELEMENT_NODE = 1 - ATTRIBUTE_NODE = 2 - TEXT_NODE = 3 - CDATA_SECTION_NODE = 4 - ENTITY_REFERENCE_NODE = 5 - ENTITY_NODE = 6 - PROCESSING_INSTRUCTION_NODE = 7 - COMMENT_NODE = 8 - DOCUMENT_NODE = 9 - DOCUMENT_TYPE_NODE = 10 - DOCUMENT_FRAGMENT_NODE = 11 - NOTATION_NODE = 12 - - -#ExceptionCode -INDEX_SIZE_ERR = 1 -DOMSTRING_SIZE_ERR = 2 -HIERARCHY_REQUEST_ERR = 3 -WRONG_DOCUMENT_ERR = 4 -INVALID_CHARACTER_ERR = 5 -NO_DATA_ALLOWED_ERR = 6 -NO_MODIFICATION_ALLOWED_ERR = 7 -NOT_FOUND_ERR = 8 -NOT_SUPPORTED_ERR = 9 -INUSE_ATTRIBUTE_ERR = 10 -INVALID_STATE_ERR = 11 -SYNTAX_ERR = 12 -INVALID_MODIFICATION_ERR = 13 -NAMESPACE_ERR = 14 -INVALID_ACCESS_ERR = 15 -VALIDATION_ERR = 16 - - -class DOMException(Exception): - """Abstract base class for DOM exceptions. - Exceptions with specific codes are specializations of this class.""" - - def __init__(self, *args, **kw): - if self.__class__ is DOMException: - raise RuntimeError( - "DOMException should not be instantiated directly") - Exception.__init__(self, *args, **kw) - - def _get_code(self): - return self.code - - -class IndexSizeErr(DOMException): - code = INDEX_SIZE_ERR - -class DomstringSizeErr(DOMException): - code = DOMSTRING_SIZE_ERR - -class HierarchyRequestErr(DOMException): - code = HIERARCHY_REQUEST_ERR - -class WrongDocumentErr(DOMException): - code = WRONG_DOCUMENT_ERR - -class InvalidCharacterErr(DOMException): - code = INVALID_CHARACTER_ERR - -class NoDataAllowedErr(DOMException): - code = NO_DATA_ALLOWED_ERR - -class NoModificationAllowedErr(DOMException): - code = NO_MODIFICATION_ALLOWED_ERR - -class NotFoundErr(DOMException): - code = NOT_FOUND_ERR - -class NotSupportedErr(DOMException): - code = NOT_SUPPORTED_ERR - -class InuseAttributeErr(DOMException): - code = INUSE_ATTRIBUTE_ERR - -class InvalidStateErr(DOMException): - code = INVALID_STATE_ERR - -class SyntaxErr(DOMException): - code = SYNTAX_ERR - -class InvalidModificationErr(DOMException): - code = INVALID_MODIFICATION_ERR - -class NamespaceErr(DOMException): - code = NAMESPACE_ERR - -class InvalidAccessErr(DOMException): - code = INVALID_ACCESS_ERR - -class ValidationErr(DOMException): - code = VALIDATION_ERR - -class UserDataHandler: - """Class giving the operation constants for UserDataHandler.handle().""" - - # Based on DOM Level 3 (WD 9 April 2002) - - NODE_CLONED = 1 - NODE_IMPORTED = 2 - NODE_DELETED = 3 - NODE_RENAMED = 4 - -XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" -XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" -XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" -EMPTY_NAMESPACE = None -EMPTY_PREFIX = None - -from domreg import getDOMImplementation,registerDOMImplementation diff --git a/Lib/xml/dom/domreg.py b/Lib/xml/dom/domreg.py deleted file mode 100644 index 684c436..0000000 --- a/Lib/xml/dom/domreg.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Registration facilities for DOM. This module should not be used -directly. Instead, the functions getDOMImplementation and -registerDOMImplementation should be imported from xml.dom.""" - -from xml.dom.minicompat import * # isinstance, StringTypes - -# This is a list of well-known implementations. Well-known names -# should be published by posting to xml-sig@python.org, and are -# subsequently recorded in this file. - -well_known_implementations = { - 'minidom':'xml.dom.minidom', - '4DOM': 'xml.dom.DOMImplementation', - } - -# DOM implementations not officially registered should register -# themselves with their - -registered = {} - -def registerDOMImplementation(name, factory): - """registerDOMImplementation(name, factory) - - Register the factory function with the name. The factory function - should return an object which implements the DOMImplementation - interface. The factory function can either return the same object, - or a new one (e.g. if that implementation supports some - customization).""" - - registered[name] = factory - -def _good_enough(dom, features): - "_good_enough(dom, features) -> Return 1 if the dom offers the features" - for f,v in features: - if not dom.hasFeature(f,v): - return 0 - return 1 - -def getDOMImplementation(name = None, features = ()): - """getDOMImplementation(name = None, features = ()) -> DOM implementation. - - Return a suitable DOM implementation. The name is either - well-known, the module name of a DOM implementation, or None. If - it is not None, imports the corresponding module and returns - DOMImplementation object if the import succeeds. - - If name is not given, consider the available implementations to - find one with the required feature set. If no implementation can - be found, raise an ImportError. The features list must be a sequence - of (feature, version) pairs which are passed to hasFeature.""" - - import os - creator = None - mod = well_known_implementations.get(name) - if mod: - mod = __import__(mod, {}, {}, ['getDOMImplementation']) - return mod.getDOMImplementation() - elif name: - return registered[name]() - elif os.environ.has_key("PYTHON_DOM"): - return getDOMImplementation(name = os.environ["PYTHON_DOM"]) - - # User did not specify a name, try implementations in arbitrary - # order, returning the one that has the required features - if isinstance(features, StringTypes): - features = _parse_feature_string(features) - for creator in registered.values(): - dom = creator() - if _good_enough(dom, features): - return dom - - for creator in well_known_implementations.keys(): - try: - dom = getDOMImplementation(name = creator) - except StandardError: # typically ImportError, or AttributeError - continue - if _good_enough(dom, features): - return dom - - raise ImportError,"no suitable DOM implementation found" - -def _parse_feature_string(s): - features = [] - parts = s.split() - i = 0 - length = len(parts) - while i < length: - feature = parts[i] - if feature[0] in "0123456789": - raise ValueError, "bad feature name: %r" % (feature,) - i = i + 1 - version = None - if i < length: - v = parts[i] - if v[0] in "0123456789": - i = i + 1 - version = v - features.append((feature, version)) - return tuple(features) diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py deleted file mode 100644 index 47d81fb..0000000 --- a/Lib/xml/dom/expatbuilder.py +++ /dev/null @@ -1,983 +0,0 @@ -"""Facility to use the Expat parser to load a minidom instance -from a string or file. - -This avoids all the overhead of SAX and pulldom to gain performance. -""" - -# Warning! -# -# This module is tightly bound to the implementation details of the -# minidom DOM and can't be used with other DOM implementations. This -# is due, in part, to a lack of appropriate methods in the DOM (there is -# no way to create Entity and Notation nodes via the DOM Level 2 -# interface), and for performance. The later is the cause of some fairly -# cryptic code. -# -# Performance hacks: -# -# - .character_data_handler() has an extra case in which continuing -# data is appended to an existing Text node; this can be a -# speedup since pyexpat can break up character data into multiple -# callbacks even though we set the buffer_text attribute on the -# parser. This also gives us the advantage that we don't need a -# separate normalization pass. -# -# - Determining that a node exists is done using an identity comparison -# with None rather than a truth test; this avoids searching for and -# calling any methods on the node object if it exists. (A rather -# nice speedup is achieved this way as well!) - -from xml.dom import xmlbuilder, minidom, Node -from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE -from xml.parsers import expat -from xml.dom.minidom import _append_child, _set_attribute_node -from xml.dom.NodeFilter import NodeFilter - -from xml.dom.minicompat import * - -TEXT_NODE = Node.TEXT_NODE -CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE -DOCUMENT_NODE = Node.DOCUMENT_NODE - -FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT -FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT -FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP -FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT - -theDOMImplementation = minidom.getDOMImplementation() - -# Expat typename -> TypeInfo -_typeinfo_map = { - "CDATA": minidom.TypeInfo(None, "cdata"), - "ENUM": minidom.TypeInfo(None, "enumeration"), - "ENTITY": minidom.TypeInfo(None, "entity"), - "ENTITIES": minidom.TypeInfo(None, "entities"), - "ID": minidom.TypeInfo(None, "id"), - "IDREF": minidom.TypeInfo(None, "idref"), - "IDREFS": minidom.TypeInfo(None, "idrefs"), - "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), - "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), - } - -class ElementInfo(NewStyle): - __slots__ = '_attr_info', '_model', 'tagName' - - def __init__(self, tagName, model=None): - self.tagName = tagName - self._attr_info = [] - self._model = model - - def __getstate__(self): - return self._attr_info, self._model, self.tagName - - def __setstate__(self, state): - self._attr_info, self._model, self.tagName = state - - def getAttributeType(self, aname): - for info in self._attr_info: - if info[1] == aname: - t = info[-2] - if t[0] == "(": - return _typeinfo_map["ENUM"] - else: - return _typeinfo_map[info[-2]] - return minidom._no_type - - def getAttributeTypeNS(self, namespaceURI, localName): - return minidom._no_type - - def isElementContent(self): - if self._model: - type = self._model[0] - return type not in (expat.model.XML_CTYPE_ANY, - expat.model.XML_CTYPE_MIXED) - else: - return False - - def isEmpty(self): - if self._model: - return self._model[0] == expat.model.XML_CTYPE_EMPTY - else: - return False - - def isId(self, aname): - for info in self._attr_info: - if info[1] == aname: - return info[-2] == "ID" - return False - - def isIdNS(self, euri, ename, auri, aname): - # not sure this is meaningful - return self.isId((auri, aname)) - -def _intern(builder, s): - return builder._intern_setdefault(s, s) - -def _parse_ns_name(builder, name): - assert ' ' in name - parts = name.split(' ') - intern = builder._intern_setdefault - if len(parts) == 3: - uri, localname, prefix = parts - prefix = intern(prefix, prefix) - qname = "%s:%s" % (prefix, localname) - qname = intern(qname, qname) - localname = intern(localname, localname) - else: - uri, localname = parts - prefix = EMPTY_PREFIX - qname = localname = intern(localname, localname) - return intern(uri, uri), localname, prefix, qname - - -class ExpatBuilder: - """Document builder that uses Expat to build a ParsedXML.DOM document - instance.""" - - def __init__(self, options=None): - if options is None: - options = xmlbuilder.Options() - self._options = options - if self._options.filter is not None: - self._filter = FilterVisibilityController(self._options.filter) - else: - self._filter = None - # This *really* doesn't do anything in this case, so - # override it with something fast & minimal. - self._finish_start_element = id - self._parser = None - self.reset() - - def createParser(self): - """Create a new parser object.""" - return expat.ParserCreate() - - def getParser(self): - """Return the parser object, creating a new one if needed.""" - if not self._parser: - self._parser = self.createParser() - self._intern_setdefault = self._parser.intern.setdefault - self._parser.buffer_text = True - self._parser.ordered_attributes = True - self._parser.specified_attributes = True - self.install(self._parser) - return self._parser - - def reset(self): - """Free all data structures used during DOM construction.""" - self.document = theDOMImplementation.createDocument( - EMPTY_NAMESPACE, None, None) - self.curNode = self.document - self._elem_info = self.document._elem_info - self._cdata = False - - def install(self, parser): - """Install the callbacks needed to build the DOM into the parser.""" - # This creates circular references! - parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler - parser.StartElementHandler = self.first_element_handler - parser.EndElementHandler = self.end_element_handler - parser.ProcessingInstructionHandler = self.pi_handler - if self._options.entities: - parser.EntityDeclHandler = self.entity_decl_handler - parser.NotationDeclHandler = self.notation_decl_handler - if self._options.comments: - parser.CommentHandler = self.comment_handler - if self._options.cdata_sections: - parser.StartCdataSectionHandler = self.start_cdata_section_handler - parser.EndCdataSectionHandler = self.end_cdata_section_handler - parser.CharacterDataHandler = self.character_data_handler_cdata - else: - parser.CharacterDataHandler = self.character_data_handler - parser.ExternalEntityRefHandler = self.external_entity_ref_handler - parser.XmlDeclHandler = self.xml_decl_handler - parser.ElementDeclHandler = self.element_decl_handler - parser.AttlistDeclHandler = self.attlist_decl_handler - - def parseFile(self, file): - """Parse a document from a file object, returning the document - node.""" - parser = self.getParser() - first_buffer = True - try: - while 1: - buffer = file.read(16*1024) - if not buffer: - break - parser.Parse(buffer, 0) - if first_buffer and self.document.documentElement: - self._setup_subset(buffer) - first_buffer = False - parser.Parse("", True) - except ParseEscape: - pass - doc = self.document - self.reset() - self._parser = None - return doc - - def parseString(self, string): - """Parse a document from a string, returning the document node.""" - parser = self.getParser() - try: - parser.Parse(string, True) - self._setup_subset(string) - except ParseEscape: - pass - doc = self.document - self.reset() - self._parser = None - return doc - - def _setup_subset(self, buffer): - """Load the internal subset if there might be one.""" - if self.document.doctype: - extractor = InternalSubsetExtractor() - extractor.parseString(buffer) - subset = extractor.getSubset() - self.document.doctype.internalSubset = subset - - def start_doctype_decl_handler(self, doctypeName, systemId, publicId, - has_internal_subset): - doctype = self.document.implementation.createDocumentType( - doctypeName, publicId, systemId) - doctype.ownerDocument = self.document - self.document.childNodes.append(doctype) - self.document.doctype = doctype - if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: - self.document.doctype = None - del self.document.childNodes[-1] - doctype = None - self._parser.EntityDeclHandler = None - self._parser.NotationDeclHandler = None - if has_internal_subset: - if doctype is not None: - doctype.entities._seq = [] - doctype.notations._seq = [] - self._parser.CommentHandler = None - self._parser.ProcessingInstructionHandler = None - self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler - - def end_doctype_decl_handler(self): - if self._options.comments: - self._parser.CommentHandler = self.comment_handler - self._parser.ProcessingInstructionHandler = self.pi_handler - if not (self._elem_info or self._filter): - self._finish_end_element = id - - def pi_handler(self, target, data): - node = self.document.createProcessingInstruction(target, data) - _append_child(self.curNode, node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - self.curNode.removeChild(node) - - def character_data_handler_cdata(self, data): - childNodes = self.curNode.childNodes - if self._cdata: - if ( self._cdata_continue - and childNodes[-1].nodeType == CDATA_SECTION_NODE): - childNodes[-1].appendData(data) - return - node = self.document.createCDATASection(data) - self._cdata_continue = True - elif childNodes and childNodes[-1].nodeType == TEXT_NODE: - node = childNodes[-1] - value = node.data + data - d = node.__dict__ - d['data'] = d['nodeValue'] = value - return - else: - node = minidom.Text() - d = node.__dict__ - d['data'] = d['nodeValue'] = data - d['ownerDocument'] = self.document - _append_child(self.curNode, node) - - def character_data_handler(self, data): - childNodes = self.curNode.childNodes - if childNodes and childNodes[-1].nodeType == TEXT_NODE: - node = childNodes[-1] - d = node.__dict__ - d['data'] = d['nodeValue'] = node.data + data - return - node = minidom.Text() - d = node.__dict__ - d['data'] = d['nodeValue'] = node.data + data - d['ownerDocument'] = self.document - _append_child(self.curNode, node) - - def entity_decl_handler(self, entityName, is_parameter_entity, value, - base, systemId, publicId, notationName): - if is_parameter_entity: - # we don't care about parameter entities for the DOM - return - if not self._options.entities: - return - node = self.document._create_entity(entityName, publicId, - systemId, notationName) - if value is not None: - # internal entity - # node *should* be readonly, but we'll cheat - child = self.document.createTextNode(value) - node.childNodes.append(child) - self.document.doctype.entities._seq.append(node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - del self.document.doctype.entities._seq[-1] - - def notation_decl_handler(self, notationName, base, systemId, publicId): - node = self.document._create_notation(notationName, publicId, systemId) - self.document.doctype.notations._seq.append(node) - if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: - del self.document.doctype.notations._seq[-1] - - def comment_handler(self, data): - node = self.document.createComment(data) - _append_child(self.curNode, node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - self.curNode.removeChild(node) - - def start_cdata_section_handler(self): - self._cdata = True - self._cdata_continue = False - - def end_cdata_section_handler(self): - self._cdata = False - self._cdata_continue = False - - def external_entity_ref_handler(self, context, base, systemId, publicId): - return 1 - - def first_element_handler(self, name, attributes): - if self._filter is None and not self._elem_info: - self._finish_end_element = id - self.getParser().StartElementHandler = self.start_element_handler - self.start_element_handler(name, attributes) - - def start_element_handler(self, name, attributes): - node = self.document.createElement(name) - _append_child(self.curNode, node) - self.curNode = node - - if attributes: - for i in range(0, len(attributes), 2): - a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, - None, EMPTY_PREFIX) - value = attributes[i+1] - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = value - d = a.__dict__ - d['value'] = d['nodeValue'] = value - d['ownerDocument'] = self.document - _set_attribute_node(node, a) - - if node is not self.document.documentElement: - self._finish_start_element(node) - - def _finish_start_element(self, node): - if self._filter: - # To be general, we'd have to call isSameNode(), but this - # is sufficient for minidom: - if node is self.document.documentElement: - return - filt = self._filter.startContainer(node) - if filt == FILTER_REJECT: - # ignore this node & all descendents - Rejecter(self) - elif filt == FILTER_SKIP: - # ignore this node, but make it's children become - # children of the parent node - Skipper(self) - else: - return - self.curNode = node.parentNode - node.parentNode.removeChild(node) - node.unlink() - - # If this ever changes, Namespaces.end_element_handler() needs to - # be changed to match. - # - def end_element_handler(self, name): - curNode = self.curNode - self.curNode = curNode.parentNode - self._finish_end_element(curNode) - - def _finish_end_element(self, curNode): - info = self._elem_info.get(curNode.tagName) - if info: - self._handle_white_text_nodes(curNode, info) - if self._filter: - if curNode is self.document.documentElement: - return - if self._filter.acceptNode(curNode) == FILTER_REJECT: - self.curNode.removeChild(curNode) - curNode.unlink() - - def _handle_white_text_nodes(self, node, info): - if (self._options.whitespace_in_element_content - or not info.isElementContent()): - return - - # We have element type information and should remove ignorable - # whitespace; identify for text nodes which contain only - # whitespace. - L = [] - for child in node.childNodes: - if child.nodeType == TEXT_NODE and not child.data.strip(): - L.append(child) - - # Remove ignorable whitespace from the tree. - for child in L: - node.removeChild(child) - - def element_decl_handler(self, name, model): - info = self._elem_info.get(name) - if info is None: - self._elem_info[name] = ElementInfo(name, model) - else: - assert info._model is None - info._model = model - - def attlist_decl_handler(self, elem, name, type, default, required): - info = self._elem_info.get(elem) - if info is None: - info = ElementInfo(elem) - self._elem_info[elem] = info - info._attr_info.append( - [None, name, None, None, default, 0, type, required]) - - def xml_decl_handler(self, version, encoding, standalone): - self.document.version = version - self.document.encoding = encoding - # This is still a little ugly, thanks to the pyexpat API. ;-( - if standalone >= 0: - if standalone: - self.document.standalone = True - else: - self.document.standalone = False - - -# Don't include FILTER_INTERRUPT, since that's checked separately -# where allowed. -_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) - -class FilterVisibilityController(NewStyle): - """Wrapper around a DOMBuilderFilter which implements the checks - to make the whatToShow filter attribute work.""" - - __slots__ = 'filter', - - def __init__(self, filter): - self.filter = filter - - def startContainer(self, node): - mask = self._nodetype_mask[node.nodeType] - if self.filter.whatToShow & mask: - val = self.filter.startContainer(node) - if val == FILTER_INTERRUPT: - raise ParseEscape - if val not in _ALLOWED_FILTER_RETURNS: - raise ValueError, \ - "startContainer() returned illegal value: " + repr(val) - return val - else: - return FILTER_ACCEPT - - def acceptNode(self, node): - mask = self._nodetype_mask[node.nodeType] - if self.filter.whatToShow & mask: - val = self.filter.acceptNode(node) - if val == FILTER_INTERRUPT: - raise ParseEscape - if val == FILTER_SKIP: - # move all child nodes to the parent, and remove this node - parent = node.parentNode - for child in node.childNodes[:]: - parent.appendChild(child) - # node is handled by the caller - return FILTER_REJECT - if val not in _ALLOWED_FILTER_RETURNS: - raise ValueError, \ - "acceptNode() returned illegal value: " + repr(val) - return val - else: - return FILTER_ACCEPT - - _nodetype_mask = { - Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, - Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, - Node.TEXT_NODE: NodeFilter.SHOW_TEXT, - Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, - Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, - Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, - Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, - Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, - Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, - Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, - Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, - Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, - } - - -class FilterCrutch(NewStyle): - __slots__ = '_builder', '_level', '_old_start', '_old_end' - - def __init__(self, builder): - self._level = 0 - self._builder = builder - parser = builder._parser - self._old_start = parser.StartElementHandler - self._old_end = parser.EndElementHandler - parser.StartElementHandler = self.start_element_handler - parser.EndElementHandler = self.end_element_handler - -class Rejecter(FilterCrutch): - __slots__ = () - - def __init__(self, builder): - FilterCrutch.__init__(self, builder) - parser = builder._parser - for name in ("ProcessingInstructionHandler", - "CommentHandler", - "CharacterDataHandler", - "StartCdataSectionHandler", - "EndCdataSectionHandler", - "ExternalEntityRefHandler", - ): - setattr(parser, name, None) - - def start_element_handler(self, *args): - self._level = self._level + 1 - - def end_element_handler(self, *args): - if self._level == 0: - # restore the old handlers - parser = self._builder._parser - self._builder.install(parser) - parser.StartElementHandler = self._old_start - parser.EndElementHandler = self._old_end - else: - self._level = self._level - 1 - -class Skipper(FilterCrutch): - __slots__ = () - - def start_element_handler(self, *args): - node = self._builder.curNode - self._old_start(*args) - if self._builder.curNode is not node: - self._level = self._level + 1 - - def end_element_handler(self, *args): - if self._level == 0: - # We're popping back out of the node we're skipping, so we - # shouldn't need to do anything but reset the handlers. - self._builder._parser.StartElementHandler = self._old_start - self._builder._parser.EndElementHandler = self._old_end - self._builder = None - else: - self._level = self._level - 1 - self._old_end(*args) - - -# framework document used by the fragment builder. -# Takes a string for the doctype, subset string, and namespace attrs string. - -_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ - "http://xml.python.org/entities/fragment-builder/internal" - -_FRAGMENT_BUILDER_TEMPLATE = ( - '''\ - -%%s -]> -&fragment-builder-internal;''' - % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) - - -class FragmentBuilder(ExpatBuilder): - """Builder which constructs document fragments given XML source - text and a context node. - - The context node is expected to provide information about the - namespace declarations which are in scope at the start of the - fragment. - """ - - def __init__(self, context, options=None): - if context.nodeType == DOCUMENT_NODE: - self.originalDocument = context - self.context = context - else: - self.originalDocument = context.ownerDocument - self.context = context - ExpatBuilder.__init__(self, options) - - def reset(self): - ExpatBuilder.reset(self) - self.fragment = None - - def parseFile(self, file): - """Parse a document fragment from a file object, returning the - fragment node.""" - return self.parseString(file.read()) - - def parseString(self, string): - """Parse a document fragment from a string, returning the - fragment node.""" - self._source = string - parser = self.getParser() - doctype = self.originalDocument.doctype - ident = "" - if doctype: - subset = doctype.internalSubset or self._getDeclarations() - if doctype.publicId: - ident = ('PUBLIC "%s" "%s"' - % (doctype.publicId, doctype.systemId)) - elif doctype.systemId: - ident = 'SYSTEM "%s"' % doctype.systemId - else: - subset = "" - nsattrs = self._getNSattrs() # get ns decls from node's ancestors - document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) - try: - parser.Parse(document, 1) - except: - self.reset() - raise - fragment = self.fragment - self.reset() -## self._parser = None - return fragment - - def _getDeclarations(self): - """Re-create the internal subset from the DocumentType node. - - This is only needed if we don't already have the - internalSubset as a string. - """ - doctype = self.context.ownerDocument.doctype - s = "" - if doctype: - for i in range(doctype.notations.length): - notation = doctype.notations.item(i) - if s: - s = s + "\n " - s = "%s' \ - % (s, notation.publicId, notation.systemId) - else: - s = '%s SYSTEM "%s">' % (s, notation.systemId) - for i in range(doctype.entities.length): - entity = doctype.entities.item(i) - if s: - s = s + "\n " - s = "%s" - return s - - def _getNSattrs(self): - return "" - - def external_entity_ref_handler(self, context, base, systemId, publicId): - if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: - # this entref is the one that we made to put the subtree - # in; all of our given input is parsed in here. - old_document = self.document - old_cur_node = self.curNode - parser = self._parser.ExternalEntityParserCreate(context) - # put the real document back, parse into the fragment to return - self.document = self.originalDocument - self.fragment = self.document.createDocumentFragment() - self.curNode = self.fragment - try: - parser.Parse(self._source, 1) - finally: - self.curNode = old_cur_node - self.document = old_document - self._source = None - return -1 - else: - return ExpatBuilder.external_entity_ref_handler( - self, context, base, systemId, publicId) - - -class Namespaces: - """Mix-in class for builders; adds support for namespaces.""" - - def _initNamespaces(self): - # list of (prefix, uri) ns declarations. Namespace attrs are - # constructed from this and added to the element's attrs. - self._ns_ordered_prefixes = [] - - def createParser(self): - """Create a new namespace-handling parser.""" - parser = expat.ParserCreate(namespace_separator=" ") - parser.namespace_prefixes = True - return parser - - def install(self, parser): - """Insert the namespace-handlers onto the parser.""" - ExpatBuilder.install(self, parser) - if self._options.namespace_declarations: - parser.StartNamespaceDeclHandler = ( - self.start_namespace_decl_handler) - - def start_namespace_decl_handler(self, prefix, uri): - """Push this namespace declaration on our storage.""" - self._ns_ordered_prefixes.append((prefix, uri)) - - def start_element_handler(self, name, attributes): - if ' ' in name: - uri, localname, prefix, qname = _parse_ns_name(self, name) - else: - uri = EMPTY_NAMESPACE - qname = name - localname = None - prefix = EMPTY_PREFIX - node = minidom.Element(qname, uri, prefix, localname) - node.ownerDocument = self.document - _append_child(self.curNode, node) - self.curNode = node - - if self._ns_ordered_prefixes: - for prefix, uri in self._ns_ordered_prefixes: - if prefix: - a = minidom.Attr(_intern(self, 'xmlns:' + prefix), - XMLNS_NAMESPACE, prefix, "xmlns") - else: - a = minidom.Attr("xmlns", XMLNS_NAMESPACE, - "xmlns", EMPTY_PREFIX) - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = uri - d = a.__dict__ - d['value'] = d['nodeValue'] = uri - d['ownerDocument'] = self.document - _set_attribute_node(node, a) - del self._ns_ordered_prefixes[:] - - if attributes: - _attrs = node._attrs - _attrsNS = node._attrsNS - for i in range(0, len(attributes), 2): - aname = attributes[i] - value = attributes[i+1] - if ' ' in aname: - uri, localname, prefix, qname = _parse_ns_name(self, aname) - a = minidom.Attr(qname, uri, localname, prefix) - _attrs[qname] = a - _attrsNS[(uri, localname)] = a - else: - a = minidom.Attr(aname, EMPTY_NAMESPACE, - aname, EMPTY_PREFIX) - _attrs[aname] = a - _attrsNS[(EMPTY_NAMESPACE, aname)] = a - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = value - d = a.__dict__ - d['ownerDocument'] = self.document - d['value'] = d['nodeValue'] = value - d['ownerElement'] = node - - if __debug__: - # This only adds some asserts to the original - # end_element_handler(), so we only define this when -O is not - # used. If changing one, be sure to check the other to see if - # it needs to be changed as well. - # - def end_element_handler(self, name): - curNode = self.curNode - if ' ' in name: - uri, localname, prefix, qname = _parse_ns_name(self, name) - assert (curNode.namespaceURI == uri - and curNode.localName == localname - and curNode.prefix == prefix), \ - "element stack messed up! (namespace)" - else: - assert curNode.nodeName == name, \ - "element stack messed up - bad nodeName" - assert curNode.namespaceURI == EMPTY_NAMESPACE, \ - "element stack messed up - bad namespaceURI" - self.curNode = curNode.parentNode - self._finish_end_element(curNode) - - -class ExpatBuilderNS(Namespaces, ExpatBuilder): - """Document builder that supports namespaces.""" - - def reset(self): - ExpatBuilder.reset(self) - self._initNamespaces() - - -class FragmentBuilderNS(Namespaces, FragmentBuilder): - """Fragment builder that supports namespaces.""" - - def reset(self): - FragmentBuilder.reset(self) - self._initNamespaces() - - def _getNSattrs(self): - """Return string of namespace attributes from this element and - ancestors.""" - # XXX This needs to be re-written to walk the ancestors of the - # context to build up the namespace information from - # declarations, elements, and attributes found in context. - # Otherwise we have to store a bunch more data on the DOM - # (though that *might* be more reliable -- not clear). - attrs = "" - context = self.context - L = [] - while context: - if hasattr(context, '_ns_prefix_uri'): - for prefix, uri in context._ns_prefix_uri.items(): - # add every new NS decl from context to L and attrs string - if prefix in L: - continue - L.append(prefix) - if prefix: - declname = "xmlns:" + prefix - else: - declname = "xmlns" - if attrs: - attrs = "%s\n %s='%s'" % (attrs, declname, uri) - else: - attrs = " %s='%s'" % (declname, uri) - context = context.parentNode - return attrs - - -class ParseEscape(Exception): - """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" - pass - -class InternalSubsetExtractor(ExpatBuilder): - """XML processor which can rip out the internal document type subset.""" - - subset = None - - def getSubset(self): - """Return the internal subset as a string.""" - return self.subset - - def parseFile(self, file): - try: - ExpatBuilder.parseFile(self, file) - except ParseEscape: - pass - - def parseString(self, string): - try: - ExpatBuilder.parseString(self, string) - except ParseEscape: - pass - - def install(self, parser): - parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler - parser.StartElementHandler = self.start_element_handler - - def start_doctype_decl_handler(self, name, publicId, systemId, - has_internal_subset): - if has_internal_subset: - parser = self.getParser() - self.subset = [] - parser.DefaultHandler = self.subset.append - parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler - else: - raise ParseEscape() - - def end_doctype_decl_handler(self): - s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') - self.subset = s - raise ParseEscape() - - def start_element_handler(self, name, attrs): - raise ParseEscape() - - -def parse(file, namespaces=1): - """Parse a document, returning the resulting Document node. - - 'file' may be either a file name or an open file object. - """ - if namespaces: - builder = ExpatBuilderNS() - else: - builder = ExpatBuilder() - - if isinstance(file, StringTypes): - fp = open(file, 'rb') - try: - result = builder.parseFile(fp) - finally: - fp.close() - else: - result = builder.parseFile(file) - return result - - -def parseString(string, namespaces=1): - """Parse a document from a string, returning the resulting - Document node. - """ - if namespaces: - builder = ExpatBuilderNS() - else: - builder = ExpatBuilder() - return builder.parseString(string) - - -def parseFragment(file, context, namespaces=1): - """Parse a fragment of a document, given the context from which it - was originally extracted. context should be the parent of the - node(s) which are in the fragment. - - 'file' may be either a file name or an open file object. - """ - if namespaces: - builder = FragmentBuilderNS(context) - else: - builder = FragmentBuilder(context) - - if isinstance(file, StringTypes): - fp = open(file, 'rb') - try: - result = builder.parseFile(fp) - finally: - fp.close() - else: - result = builder.parseFile(file) - return result - - -def parseFragmentString(string, context, namespaces=1): - """Parse a fragment of a document from a string, given the context - from which it was originally extracted. context should be the - parent of the node(s) which are in the fragment. - """ - if namespaces: - builder = FragmentBuilderNS(context) - else: - builder = FragmentBuilder(context) - return builder.parseString(string) - - -def makeBuilder(options): - """Create a builder based on an Options object.""" - if options.namespaces: - return ExpatBuilderNS(options) - else: - return ExpatBuilder(options) diff --git a/Lib/xml/dom/minicompat.py b/Lib/xml/dom/minicompat.py deleted file mode 100644 index 9f2f8f7..0000000 --- a/Lib/xml/dom/minicompat.py +++ /dev/null @@ -1,184 +0,0 @@ -"""Python version compatibility support for minidom.""" - -# This module should only be imported using "import *". -# -# The following names are defined: -# -# isinstance -- version of the isinstance() function that accepts -# tuples as the second parameter regardless of the -# Python version -# -# NodeList -- lightest possible NodeList implementation -# -# EmptyNodeList -- lightest possible NodeList that is guarateed to -# remain empty (immutable) -# -# StringTypes -- tuple of defined string types -# -# GetattrMagic -- base class used to make _get_ be magically -# invoked when available -# defproperty -- function used in conjunction with GetattrMagic; -# using these together is needed to make them work -# as efficiently as possible in both Python 2.2+ -# and older versions. For example: -# -# class MyClass(GetattrMagic): -# def _get_myattr(self): -# return something -# -# defproperty(MyClass, "myattr", -# "return some value") -# -# For Python 2.2 and newer, this will construct a -# property object on the class, which avoids -# needing to override __getattr__(). It will only -# work for read-only attributes. -# -# For older versions of Python, inheriting from -# GetattrMagic will use the traditional -# __getattr__() hackery to achieve the same effect, -# but less efficiently. -# -# defproperty() should be used for each version of -# the relevant _get_() function. -# -# NewStyle -- base class to cause __slots__ to be honored in -# the new world -# -# True, False -- only for Python 2.2 and earlier - -__all__ = ["NodeList", "EmptyNodeList", "NewStyle", - "StringTypes", "defproperty", "GetattrMagic"] - -import xml.dom - -try: - unicode -except NameError: - StringTypes = type(''), -else: - StringTypes = type(''), type(unicode('')) - - -# define True and False only if not defined as built-ins -try: - True -except NameError: - True = 1 - False = 0 - __all__.extend(["True", "False"]) - - -try: - isinstance('', StringTypes) -except TypeError: - # - # Wrap isinstance() to make it compatible with the version in - # Python 2.2 and newer. - # - _isinstance = isinstance - def isinstance(obj, type_or_seq): - try: - return _isinstance(obj, type_or_seq) - except TypeError: - for t in type_or_seq: - if _isinstance(obj, t): - return 1 - return 0 - __all__.append("isinstance") - - -if list is type([]): - class NodeList(list): - __slots__ = () - - def item(self, index): - if 0 <= index < len(self): - return self[index] - - def _get_length(self): - return len(self) - - def _set_length(self, value): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute 'length'") - - length = property(_get_length, _set_length, - doc="The number of nodes in the NodeList.") - - def __getstate__(self): - return list(self) - - def __setstate__(self, state): - self[:] = state - - class EmptyNodeList(tuple): - __slots__ = () - - def __add__(self, other): - NL = NodeList() - NL.extend(other) - return NL - - def __radd__(self, other): - NL = NodeList() - NL.extend(other) - return NL - - def item(self, index): - return None - - def _get_length(self): - return 0 - - def _set_length(self, value): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute 'length'") - - length = property(_get_length, _set_length, - doc="The number of nodes in the NodeList.") - -else: - def NodeList(): - return [] - - def EmptyNodeList(): - return [] - - -try: - property -except NameError: - def defproperty(klass, name, doc): - # taken care of by the base __getattr__() - pass - - class GetattrMagic: - def __getattr__(self, key): - if key.startswith("_"): - raise AttributeError, key - - try: - get = getattr(self, "_get_" + key) - except AttributeError: - raise AttributeError, key - return get() - - class NewStyle: - pass - -else: - def defproperty(klass, name, doc): - get = getattr(klass, ("_get_" + name)).im_func - def set(self, value, name=name): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute " + repr(name)) - assert not hasattr(klass, "_set_" + name), \ - "expected not to find _set_" + name - prop = property(get, set, doc=doc) - setattr(klass, name, prop) - - class GetattrMagic: - pass - - NewStyle = object diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py deleted file mode 100644 index 84be99b..0000000 --- a/Lib/xml/dom/minidom.py +++ /dev/null @@ -1,1938 +0,0 @@ -"""\ -minidom.py -- a lightweight DOM implementation. - -parse("foo.xml") - -parseString("") - -Todo: -===== - * convenience methods for getting elements and text. - * more testing - * bring some of the writer and linearizer code into conformance with this - interface - * SAX 2 namespaces -""" - -import xml.dom - -from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg -from xml.dom.minicompat import * -from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS - -_TupleType = type(()) - -# This is used by the ID-cache invalidation checks; the list isn't -# actually complete, since the nodes being checked will never be the -# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is -# the node being added or removed, not the node being modified.) -# -_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, - xml.dom.Node.ENTITY_REFERENCE_NODE) - - -class Node(xml.dom.Node, GetattrMagic): - namespaceURI = None # this is non-null only for elements and attributes - parentNode = None - ownerDocument = None - nextSibling = None - previousSibling = None - - prefix = EMPTY_PREFIX # non-null only for NS elements and attributes - - def __nonzero__(self): - return True - - def toxml(self, encoding = None): - return self.toprettyxml("", "", encoding) - - def toprettyxml(self, indent="\t", newl="\n", encoding = None): - # indent = the indentation string to prepend, per level - # newl = the newline string to append - writer = _get_StringIO() - if encoding is not None: - import codecs - # Can't use codecs.getwriter to preserve 2.0 compatibility - writer = codecs.lookup(encoding)[3](writer) - if self.nodeType == Node.DOCUMENT_NODE: - # Can pass encoding only to document, to put it into XML header - self.writexml(writer, "", indent, newl, encoding) - else: - self.writexml(writer, "", indent, newl) - return writer.getvalue() - - def hasChildNodes(self): - if self.childNodes: - return True - else: - return False - - def _get_childNodes(self): - return self.childNodes - - def _get_firstChild(self): - if self.childNodes: - return self.childNodes[0] - - def _get_lastChild(self): - if self.childNodes: - return self.childNodes[-1] - - def insertBefore(self, newChild, refChild): - if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: - for c in tuple(newChild.childNodes): - self.insertBefore(c, refChild) - ### The DOM does not clearly specify what to return in this case - return newChild - if newChild.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(newChild), repr(self))) - if newChild.parentNode is not None: - newChild.parentNode.removeChild(newChild) - if refChild is None: - self.appendChild(newChild) - else: - try: - index = self.childNodes.index(refChild) - except ValueError: - raise xml.dom.NotFoundErr() - if newChild.nodeType in _nodeTypes_with_children: - _clear_id_cache(self) - self.childNodes.insert(index, newChild) - newChild.nextSibling = refChild - refChild.previousSibling = newChild - if index: - node = self.childNodes[index-1] - node.nextSibling = newChild - newChild.previousSibling = node - else: - newChild.previousSibling = None - newChild.parentNode = self - return newChild - - def appendChild(self, node): - if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: - for c in tuple(node.childNodes): - self.appendChild(c) - ### The DOM does not clearly specify what to return in this case - return node - if node.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(node), repr(self))) - elif node.nodeType in _nodeTypes_with_children: - _clear_id_cache(self) - if node.parentNode is not None: - node.parentNode.removeChild(node) - _append_child(self, node) - node.nextSibling = None - return node - - def replaceChild(self, newChild, oldChild): - if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: - refChild = oldChild.nextSibling - self.removeChild(oldChild) - return self.insertBefore(newChild, refChild) - if newChild.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(newChild), repr(self))) - if newChild is oldChild: - return - if newChild.parentNode is not None: - newChild.parentNode.removeChild(newChild) - try: - index = self.childNodes.index(oldChild) - except ValueError: - raise xml.dom.NotFoundErr() - self.childNodes[index] = newChild - newChild.parentNode = self - oldChild.parentNode = None - if (newChild.nodeType in _nodeTypes_with_children - or oldChild.nodeType in _nodeTypes_with_children): - _clear_id_cache(self) - newChild.nextSibling = oldChild.nextSibling - newChild.previousSibling = oldChild.previousSibling - oldChild.nextSibling = None - oldChild.previousSibling = None - if newChild.previousSibling: - newChild.previousSibling.nextSibling = newChild - if newChild.nextSibling: - newChild.nextSibling.previousSibling = newChild - return oldChild - - def removeChild(self, oldChild): - try: - self.childNodes.remove(oldChild) - except ValueError: - raise xml.dom.NotFoundErr() - if oldChild.nextSibling is not None: - oldChild.nextSibling.previousSibling = oldChild.previousSibling - if oldChild.previousSibling is not None: - oldChild.previousSibling.nextSibling = oldChild.nextSibling - oldChild.nextSibling = oldChild.previousSibling = None - if oldChild.nodeType in _nodeTypes_with_children: - _clear_id_cache(self) - - oldChild.parentNode = None - return oldChild - - def normalize(self): - L = [] - for child in self.childNodes: - if child.nodeType == Node.TEXT_NODE: - data = child.data - if data and L and L[-1].nodeType == child.nodeType: - # collapse text node - node = L[-1] - node.data = node.data + child.data - node.nextSibling = child.nextSibling - child.unlink() - elif data: - if L: - L[-1].nextSibling = child - child.previousSibling = L[-1] - else: - child.previousSibling = None - L.append(child) - else: - # empty text node; discard - child.unlink() - else: - if L: - L[-1].nextSibling = child - child.previousSibling = L[-1] - else: - child.previousSibling = None - L.append(child) - if child.nodeType == Node.ELEMENT_NODE: - child.normalize() - self.childNodes[:] = L - - def cloneNode(self, deep): - return _clone_node(self, deep, self.ownerDocument or self) - - def isSupported(self, feature, version): - return self.ownerDocument.implementation.hasFeature(feature, version) - - def _get_localName(self): - # Overridden in Element and Attr where localName can be Non-Null - return None - - # Node interfaces from Level 3 (WD 9 April 2002) - - def isSameNode(self, other): - return self is other - - def getInterface(self, feature): - if self.isSupported(feature, None): - return self - else: - return None - - # The "user data" functions use a dictionary that is only present - # if some user data has been set, so be careful not to assume it - # exists. - - def getUserData(self, key): - try: - return self._user_data[key][0] - except (AttributeError, KeyError): - return None - - def setUserData(self, key, data, handler): - old = None - try: - d = self._user_data - except AttributeError: - d = {} - self._user_data = d - if d.has_key(key): - old = d[key][0] - if data is None: - # ignore handlers passed for None - handler = None - if old is not None: - del d[key] - else: - d[key] = (data, handler) - return old - - def _call_user_data_handler(self, operation, src, dst): - if hasattr(self, "_user_data"): - for key, (data, handler) in self._user_data.items(): - if handler is not None: - handler.handle(operation, key, data, src, dst) - - # minidom-specific API: - - def unlink(self): - self.parentNode = self.ownerDocument = None - if self.childNodes: - for child in self.childNodes: - child.unlink() - self.childNodes = NodeList() - self.previousSibling = None - self.nextSibling = None - -defproperty(Node, "firstChild", doc="First child node, or None.") -defproperty(Node, "lastChild", doc="Last child node, or None.") -defproperty(Node, "localName", doc="Namespace-local name of this node.") - - -def _append_child(self, node): - # fast path with less checks; usable by DOM builders if careful - childNodes = self.childNodes - if childNodes: - last = childNodes[-1] - node.__dict__["previousSibling"] = last - last.__dict__["nextSibling"] = node - childNodes.append(node) - node.__dict__["parentNode"] = self - -def _in_document(node): - # return True iff node is part of a document tree - while node is not None: - if node.nodeType == Node.DOCUMENT_NODE: - return True - node = node.parentNode - return False - -def _write_data(writer, data): - "Writes datachars to writer." - data = data.replace("&", "&").replace("<", "<") - data = data.replace("\"", """).replace(">", ">") - writer.write(data) - -def _get_elements_by_tagName_helper(parent, name, rc): - for node in parent.childNodes: - if node.nodeType == Node.ELEMENT_NODE and \ - (name == "*" or node.tagName == name): - rc.append(node) - _get_elements_by_tagName_helper(node, name, rc) - return rc - -def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): - for node in parent.childNodes: - if node.nodeType == Node.ELEMENT_NODE: - if ((localName == "*" or node.localName == localName) and - (nsURI == "*" or node.namespaceURI == nsURI)): - rc.append(node) - _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) - return rc - -class DocumentFragment(Node): - nodeType = Node.DOCUMENT_FRAGMENT_NODE - nodeName = "#document-fragment" - nodeValue = None - attributes = None - parentNode = None - _child_node_types = (Node.ELEMENT_NODE, - Node.TEXT_NODE, - Node.CDATA_SECTION_NODE, - Node.ENTITY_REFERENCE_NODE, - Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, - Node.NOTATION_NODE) - - def __init__(self): - self.childNodes = NodeList() - - -class Attr(Node): - nodeType = Node.ATTRIBUTE_NODE - attributes = None - ownerElement = None - specified = False - _is_id = False - - _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) - - def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, - prefix=None): - # skip setattr for performance - d = self.__dict__ - d["nodeName"] = d["name"] = qName - d["namespaceURI"] = namespaceURI - d["prefix"] = prefix - d['childNodes'] = NodeList() - - # Add the single child node that represents the value of the attr - self.childNodes.append(Text()) - - # nodeValue and value are set elsewhere - - def _get_localName(self): - return self.nodeName.split(":", 1)[-1] - - def _get_name(self): - return self.name - - def _get_specified(self): - return self.specified - - def __setattr__(self, name, value): - d = self.__dict__ - if name in ("value", "nodeValue"): - d["value"] = d["nodeValue"] = value - d2 = self.childNodes[0].__dict__ - d2["data"] = d2["nodeValue"] = value - if self.ownerElement is not None: - _clear_id_cache(self.ownerElement) - elif name in ("name", "nodeName"): - d["name"] = d["nodeName"] = value - if self.ownerElement is not None: - _clear_id_cache(self.ownerElement) - else: - d[name] = value - - def _set_prefix(self, prefix): - nsuri = self.namespaceURI - if prefix == "xmlns": - if nsuri and nsuri != XMLNS_NAMESPACE: - raise xml.dom.NamespaceErr( - "illegal use of 'xmlns' prefix for the wrong namespace") - d = self.__dict__ - d['prefix'] = prefix - if prefix is None: - newName = self.localName - else: - newName = "%s:%s" % (prefix, self.localName) - if self.ownerElement: - _clear_id_cache(self.ownerElement) - d['nodeName'] = d['name'] = newName - - def _set_value(self, value): - d = self.__dict__ - d['value'] = d['nodeValue'] = value - if self.ownerElement: - _clear_id_cache(self.ownerElement) - self.childNodes[0].data = value - - def unlink(self): - # This implementation does not call the base implementation - # since most of that is not needed, and the expense of the - # method call is not warranted. We duplicate the removal of - # children, but that's all we needed from the base class. - elem = self.ownerElement - if elem is not None: - del elem._attrs[self.nodeName] - del elem._attrsNS[(self.namespaceURI, self.localName)] - if self._is_id: - self._is_id = False - elem._magic_id_nodes -= 1 - self.ownerDocument._magic_id_count -= 1 - for child in self.childNodes: - child.unlink() - del self.childNodes[:] - - def _get_isId(self): - if self._is_id: - return True - doc = self.ownerDocument - elem = self.ownerElement - if doc is None or elem is None: - return False - - info = doc._get_elem_info(elem) - if info is None: - return False - if self.namespaceURI: - return info.isIdNS(self.namespaceURI, self.localName) - else: - return info.isId(self.nodeName) - - def _get_schemaType(self): - doc = self.ownerDocument - elem = self.ownerElement - if doc is None or elem is None: - return _no_type - - info = doc._get_elem_info(elem) - if info is None: - return _no_type - if self.namespaceURI: - return info.getAttributeTypeNS(self.namespaceURI, self.localName) - else: - return info.getAttributeType(self.nodeName) - -defproperty(Attr, "isId", doc="True if this attribute is an ID.") -defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") -defproperty(Attr, "schemaType", doc="Schema type for this attribute.") - - -class NamedNodeMap(NewStyle, GetattrMagic): - """The attribute list is a transient interface to the underlying - dictionaries. Mutations here will change the underlying element's - dictionary. - - Ordering is imposed artificially and does not reflect the order of - attributes as found in an input document. - """ - - __slots__ = ('_attrs', '_attrsNS', '_ownerElement') - - def __init__(self, attrs, attrsNS, ownerElement): - self._attrs = attrs - self._attrsNS = attrsNS - self._ownerElement = ownerElement - - def _get_length(self): - return len(self._attrs) - - def item(self, index): - try: - return self[self._attrs.keys()[index]] - except IndexError: - return None - - def items(self): - L = [] - for node in self._attrs.values(): - L.append((node.nodeName, node.value)) - return L - - def itemsNS(self): - L = [] - for node in self._attrs.values(): - L.append(((node.namespaceURI, node.localName), node.value)) - return L - - def has_key(self, key): - if isinstance(key, StringTypes): - return self._attrs.has_key(key) - else: - return self._attrsNS.has_key(key) - - def keys(self): - return self._attrs.keys() - - def keysNS(self): - return self._attrsNS.keys() - - def values(self): - return self._attrs.values() - - def get(self, name, value=None): - return self._attrs.get(name, value) - - __len__ = _get_length - - def __cmp__(self, other): - if self._attrs is getattr(other, "_attrs", None): - return 0 - else: - return cmp(id(self), id(other)) - - def __getitem__(self, attname_or_tuple): - if isinstance(attname_or_tuple, _TupleType): - return self._attrsNS[attname_or_tuple] - else: - return self._attrs[attname_or_tuple] - - # same as set - def __setitem__(self, attname, value): - if isinstance(value, StringTypes): - try: - node = self._attrs[attname] - except KeyError: - node = Attr(attname) - node.ownerDocument = self._ownerElement.ownerDocument - self.setNamedItem(node) - node.value = value - else: - if not isinstance(value, Attr): - raise TypeError, "value must be a string or Attr object" - node = value - self.setNamedItem(node) - - def getNamedItem(self, name): - try: - return self._attrs[name] - except KeyError: - return None - - def getNamedItemNS(self, namespaceURI, localName): - try: - return self._attrsNS[(namespaceURI, localName)] - except KeyError: - return None - - def removeNamedItem(self, name): - n = self.getNamedItem(name) - if n is not None: - _clear_id_cache(self._ownerElement) - del self._attrs[n.nodeName] - del self._attrsNS[(n.namespaceURI, n.localName)] - if n.__dict__.has_key('ownerElement'): - n.__dict__['ownerElement'] = None - return n - else: - raise xml.dom.NotFoundErr() - - def removeNamedItemNS(self, namespaceURI, localName): - n = self.getNamedItemNS(namespaceURI, localName) - if n is not None: - _clear_id_cache(self._ownerElement) - del self._attrsNS[(n.namespaceURI, n.localName)] - del self._attrs[n.nodeName] - if n.__dict__.has_key('ownerElement'): - n.__dict__['ownerElement'] = None - return n - else: - raise xml.dom.NotFoundErr() - - def setNamedItem(self, node): - if not isinstance(node, Attr): - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(node), repr(self))) - old = self._attrs.get(node.name) - if old: - old.unlink() - self._attrs[node.name] = node - self._attrsNS[(node.namespaceURI, node.localName)] = node - node.ownerElement = self._ownerElement - _clear_id_cache(node.ownerElement) - return old - - def setNamedItemNS(self, node): - return self.setNamedItem(node) - - def __delitem__(self, attname_or_tuple): - node = self[attname_or_tuple] - _clear_id_cache(node.ownerElement) - node.unlink() - - def __getstate__(self): - return self._attrs, self._attrsNS, self._ownerElement - - def __setstate__(self, state): - self._attrs, self._attrsNS, self._ownerElement = state - -defproperty(NamedNodeMap, "length", - doc="Number of nodes in the NamedNodeMap.") - -AttributeList = NamedNodeMap - - -class TypeInfo(NewStyle): - __slots__ = 'namespace', 'name' - - def __init__(self, namespace, name): - self.namespace = namespace - self.name = name - - def __repr__(self): - if self.namespace: - return "" % (self.name, self.namespace) - else: - return "" % self.name - - def _get_name(self): - return self.name - - def _get_namespace(self): - return self.namespace - -_no_type = TypeInfo(None, None) - -class Element(Node): - nodeType = Node.ELEMENT_NODE - nodeValue = None - schemaType = _no_type - - _magic_id_nodes = 0 - - _child_node_types = (Node.ELEMENT_NODE, - Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, - Node.TEXT_NODE, - Node.CDATA_SECTION_NODE, - Node.ENTITY_REFERENCE_NODE) - - def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, - localName=None): - self.tagName = self.nodeName = tagName - self.prefix = prefix - self.namespaceURI = namespaceURI - self.childNodes = NodeList() - - self._attrs = {} # attributes are double-indexed: - self._attrsNS = {} # tagName -> Attribute - # URI,localName -> Attribute - # in the future: consider lazy generation - # of attribute objects this is too tricky - # for now because of headaches with - # namespaces. - - def _get_localName(self): - return self.tagName.split(":", 1)[-1] - - def _get_tagName(self): - return self.tagName - - def unlink(self): - for attr in self._attrs.values(): - attr.unlink() - self._attrs = None - self._attrsNS = None - Node.unlink(self) - - def getAttribute(self, attname): - try: - return self._attrs[attname].value - except KeyError: - return "" - - def getAttributeNS(self, namespaceURI, localName): - try: - return self._attrsNS[(namespaceURI, localName)].value - except KeyError: - return "" - - def setAttribute(self, attname, value): - attr = self.getAttributeNode(attname) - if attr is None: - attr = Attr(attname) - # for performance - d = attr.__dict__ - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument - self.setAttributeNode(attr) - elif value != attr.value: - d = attr.__dict__ - d["value"] = d["nodeValue"] = value - if attr.isId: - _clear_id_cache(self) - - def setAttributeNS(self, namespaceURI, qualifiedName, value): - prefix, localname = _nssplit(qualifiedName) - attr = self.getAttributeNodeNS(namespaceURI, localname) - if attr is None: - # for performance - attr = Attr(qualifiedName, namespaceURI, localname, prefix) - d = attr.__dict__ - d["prefix"] = prefix - d["nodeName"] = qualifiedName - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument - self.setAttributeNode(attr) - else: - d = attr.__dict__ - if value != attr.value: - d["value"] = d["nodeValue"] = value - if attr.isId: - _clear_id_cache(self) - if attr.prefix != prefix: - d["prefix"] = prefix - d["nodeName"] = qualifiedName - - def getAttributeNode(self, attrname): - return self._attrs.get(attrname) - - def getAttributeNodeNS(self, namespaceURI, localName): - return self._attrsNS.get((namespaceURI, localName)) - - def setAttributeNode(self, attr): - if attr.ownerElement not in (None, self): - raise xml.dom.InuseAttributeErr("attribute node already owned") - old1 = self._attrs.get(attr.name, None) - if old1 is not None: - self.removeAttributeNode(old1) - old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) - if old2 is not None and old2 is not old1: - self.removeAttributeNode(old2) - _set_attribute_node(self, attr) - - if old1 is not attr: - # It might have already been part of this node, in which case - # it doesn't represent a change, and should not be returned. - return old1 - if old2 is not attr: - return old2 - - setAttributeNodeNS = setAttributeNode - - def removeAttribute(self, name): - try: - attr = self._attrs[name] - except KeyError: - raise xml.dom.NotFoundErr() - self.removeAttributeNode(attr) - - def removeAttributeNS(self, namespaceURI, localName): - try: - attr = self._attrsNS[(namespaceURI, localName)] - except KeyError: - raise xml.dom.NotFoundErr() - self.removeAttributeNode(attr) - - def removeAttributeNode(self, node): - if node is None: - raise xml.dom.NotFoundErr() - try: - self._attrs[node.name] - except KeyError: - raise xml.dom.NotFoundErr() - _clear_id_cache(self) - node.unlink() - # Restore this since the node is still useful and otherwise - # unlinked - node.ownerDocument = self.ownerDocument - - removeAttributeNodeNS = removeAttributeNode - - def hasAttribute(self, name): - return self._attrs.has_key(name) - - def hasAttributeNS(self, namespaceURI, localName): - return self._attrsNS.has_key((namespaceURI, localName)) - - def getElementsByTagName(self, name): - return _get_elements_by_tagName_helper(self, name, NodeList()) - - def getElementsByTagNameNS(self, namespaceURI, localName): - return _get_elements_by_tagName_ns_helper( - self, namespaceURI, localName, NodeList()) - - def __repr__(self): - return "" % (self.tagName, id(self)) - - def writexml(self, writer, indent="", addindent="", newl=""): - # indent = current indentation - # addindent = indentation to add to higher levels - # newl = newline string - writer.write(indent+"<" + self.tagName) - - attrs = self._get_attributes() - a_names = attrs.keys() - a_names.sort() - - for a_name in a_names: - writer.write(" %s=\"" % a_name) - _write_data(writer, attrs[a_name].value) - writer.write("\"") - if self.childNodes: - writer.write(">%s"%(newl)) - for node in self.childNodes: - node.writexml(writer,indent+addindent,addindent,newl) - writer.write("%s%s" % (indent,self.tagName,newl)) - else: - writer.write("/>%s"%(newl)) - - def _get_attributes(self): - return NamedNodeMap(self._attrs, self._attrsNS, self) - - def hasAttributes(self): - if self._attrs: - return True - else: - return False - - # DOM Level 3 attributes, based on the 22 Oct 2002 draft - - def setIdAttribute(self, name): - idAttr = self.getAttributeNode(name) - self.setIdAttributeNode(idAttr) - - def setIdAttributeNS(self, namespaceURI, localName): - idAttr = self.getAttributeNodeNS(namespaceURI, localName) - self.setIdAttributeNode(idAttr) - - def setIdAttributeNode(self, idAttr): - if idAttr is None or not self.isSameNode(idAttr.ownerElement): - raise xml.dom.NotFoundErr() - if _get_containing_entref(self) is not None: - raise xml.dom.NoModificationAllowedErr() - if not idAttr._is_id: - idAttr.__dict__['_is_id'] = True - self._magic_id_nodes += 1 - self.ownerDocument._magic_id_count += 1 - _clear_id_cache(self) - -defproperty(Element, "attributes", - doc="NamedNodeMap of attributes on the element.") -defproperty(Element, "localName", - doc="Namespace-local name of this element.") - - -def _set_attribute_node(element, attr): - _clear_id_cache(element) - element._attrs[attr.name] = attr - element._attrsNS[(attr.namespaceURI, attr.localName)] = attr - - # This creates a circular reference, but Element.unlink() - # breaks the cycle since the references to the attribute - # dictionaries are tossed. - attr.__dict__['ownerElement'] = element - - -class Childless: - """Mixin that makes childless-ness easy to implement and avoids - the complexity of the Node methods that deal with children. - """ - - attributes = None - childNodes = EmptyNodeList() - firstChild = None - lastChild = None - - def _get_firstChild(self): - return None - - def _get_lastChild(self): - return None - - def appendChild(self, node): - raise xml.dom.HierarchyRequestErr( - self.nodeName + " nodes cannot have children") - - def hasChildNodes(self): - return False - - def insertBefore(self, newChild, refChild): - raise xml.dom.HierarchyRequestErr( - self.nodeName + " nodes do not have children") - - def removeChild(self, oldChild): - raise xml.dom.NotFoundErr( - self.nodeName + " nodes do not have children") - - def replaceChild(self, newChild, oldChild): - raise xml.dom.HierarchyRequestErr( - self.nodeName + " nodes do not have children") - - -class ProcessingInstruction(Childless, Node): - nodeType = Node.PROCESSING_INSTRUCTION_NODE - - def __init__(self, target, data): - self.target = self.nodeName = target - self.data = self.nodeValue = data - - def _get_data(self): - return self.data - def _set_data(self, value): - d = self.__dict__ - d['data'] = d['nodeValue'] = value - - def _get_target(self): - return self.target - def _set_target(self, value): - d = self.__dict__ - d['target'] = d['nodeName'] = value - - def __setattr__(self, name, value): - if name == "data" or name == "nodeValue": - self.__dict__['data'] = self.__dict__['nodeValue'] = value - elif name == "target" or name == "nodeName": - self.__dict__['target'] = self.__dict__['nodeName'] = value - else: - self.__dict__[name] = value - - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write("%s%s" % (indent,self.target, self.data, newl)) - - -class CharacterData(Childless, Node): - def _get_length(self): - return len(self.data) - __len__ = _get_length - - def _get_data(self): - return self.__dict__['data'] - def _set_data(self, data): - d = self.__dict__ - d['data'] = d['nodeValue'] = data - - _get_nodeValue = _get_data - _set_nodeValue = _set_data - - def __setattr__(self, name, value): - if name == "data" or name == "nodeValue": - self.__dict__['data'] = self.__dict__['nodeValue'] = value - else: - self.__dict__[name] = value - - def __repr__(self): - data = self.data - if len(data) > 10: - dotdotdot = "..." - else: - dotdotdot = "" - return "" % ( - self.__class__.__name__, data[0:10], dotdotdot) - - def substringData(self, offset, count): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if count < 0: - raise xml.dom.IndexSizeErr("count cannot be negative") - return self.data[offset:offset+count] - - def appendData(self, arg): - self.data = self.data + arg - - def insertData(self, offset, arg): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if arg: - self.data = "%s%s%s" % ( - self.data[:offset], arg, self.data[offset:]) - - def deleteData(self, offset, count): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if count < 0: - raise xml.dom.IndexSizeErr("count cannot be negative") - if count: - self.data = self.data[:offset] + self.data[offset+count:] - - def replaceData(self, offset, count, arg): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if count < 0: - raise xml.dom.IndexSizeErr("count cannot be negative") - if count: - self.data = "%s%s%s" % ( - self.data[:offset], arg, self.data[offset+count:]) - -defproperty(CharacterData, "length", doc="Length of the string data.") - - -class Text(CharacterData): - # Make sure we don't add an instance __dict__ if we don't already - # have one, at least when that's possible: - # XXX this does not work, CharacterData is an old-style class - # __slots__ = () - - nodeType = Node.TEXT_NODE - nodeName = "#text" - attributes = None - - def splitText(self, offset): - if offset < 0 or offset > len(self.data): - raise xml.dom.IndexSizeErr("illegal offset value") - newText = self.__class__() - newText.data = self.data[offset:] - newText.ownerDocument = self.ownerDocument - next = self.nextSibling - if self.parentNode and self in self.parentNode.childNodes: - if next is None: - self.parentNode.appendChild(newText) - else: - self.parentNode.insertBefore(newText, next) - self.data = self.data[:offset] - return newText - - def writexml(self, writer, indent="", addindent="", newl=""): - _write_data(writer, "%s%s%s"%(indent, self.data, newl)) - - # DOM Level 3 (WD 9 April 2002) - - def _get_wholeText(self): - L = [self.data] - n = self.previousSibling - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - L.insert(0, n.data) - n = n.previousSibling - else: - break - n = self.nextSibling - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - L.append(n.data) - n = n.nextSibling - else: - break - return ''.join(L) - - def replaceWholeText(self, content): - # XXX This needs to be seriously changed if minidom ever - # supports EntityReference nodes. - parent = self.parentNode - n = self.previousSibling - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - next = n.previousSibling - parent.removeChild(n) - n = next - else: - break - n = self.nextSibling - if not content: - parent.removeChild(self) - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - next = n.nextSibling - parent.removeChild(n) - n = next - else: - break - if content: - d = self.__dict__ - d['data'] = content - d['nodeValue'] = content - return self - else: - return None - - def _get_isWhitespaceInElementContent(self): - if self.data.strip(): - return False - elem = _get_containing_element(self) - if elem is None: - return False - info = self.ownerDocument._get_elem_info(elem) - if info is None: - return False - else: - return info.isElementContent() - -defproperty(Text, "isWhitespaceInElementContent", - doc="True iff this text node contains only whitespace" - " and is in element content.") -defproperty(Text, "wholeText", - doc="The text of all logically-adjacent text nodes.") - - -def _get_containing_element(node): - c = node.parentNode - while c is not None: - if c.nodeType == Node.ELEMENT_NODE: - return c - c = c.parentNode - return None - -def _get_containing_entref(node): - c = node.parentNode - while c is not None: - if c.nodeType == Node.ENTITY_REFERENCE_NODE: - return c - c = c.parentNode - return None - - -class Comment(Childless, CharacterData): - nodeType = Node.COMMENT_NODE - nodeName = "#comment" - - def __init__(self, data): - self.data = self.nodeValue = data - - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write("%s%s" % (indent, self.data, newl)) - - -class CDATASection(Text): - # Make sure we don't add an instance __dict__ if we don't already - # have one, at least when that's possible: - # XXX this does not work, Text is an old-style class - # __slots__ = () - - nodeType = Node.CDATA_SECTION_NODE - nodeName = "#cdata-section" - - def writexml(self, writer, indent="", addindent="", newl=""): - if self.data.find("]]>") >= 0: - raise ValueError("']]>' not allowed in a CDATA section") - writer.write("" % self.data) - - -class ReadOnlySequentialNamedNodeMap(NewStyle, GetattrMagic): - __slots__ = '_seq', - - def __init__(self, seq=()): - # seq should be a list or tuple - self._seq = seq - - def __len__(self): - return len(self._seq) - - def _get_length(self): - return len(self._seq) - - def getNamedItem(self, name): - for n in self._seq: - if n.nodeName == name: - return n - - def getNamedItemNS(self, namespaceURI, localName): - for n in self._seq: - if n.namespaceURI == namespaceURI and n.localName == localName: - return n - - def __getitem__(self, name_or_tuple): - if isinstance(name_or_tuple, _TupleType): - node = self.getNamedItemNS(*name_or_tuple) - else: - node = self.getNamedItem(name_or_tuple) - if node is None: - raise KeyError, name_or_tuple - return node - - def item(self, index): - if index < 0: - return None - try: - return self._seq[index] - except IndexError: - return None - - def removeNamedItem(self, name): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def removeNamedItemNS(self, namespaceURI, localName): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def setNamedItem(self, node): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def setNamedItemNS(self, node): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def __getstate__(self): - return [self._seq] - - def __setstate__(self, state): - self._seq = state[0] - -defproperty(ReadOnlySequentialNamedNodeMap, "length", - doc="Number of entries in the NamedNodeMap.") - - -class Identified: - """Mix-in class that supports the publicId and systemId attributes.""" - - # XXX this does not work, this is an old-style class - # __slots__ = 'publicId', 'systemId' - - def _identified_mixin_init(self, publicId, systemId): - self.publicId = publicId - self.systemId = systemId - - def _get_publicId(self): - return self.publicId - - def _get_systemId(self): - return self.systemId - -class DocumentType(Identified, Childless, Node): - nodeType = Node.DOCUMENT_TYPE_NODE - nodeValue = None - name = None - publicId = None - systemId = None - internalSubset = None - - def __init__(self, qualifiedName): - self.entities = ReadOnlySequentialNamedNodeMap() - self.notations = ReadOnlySequentialNamedNodeMap() - if qualifiedName: - prefix, localname = _nssplit(qualifiedName) - self.name = localname - self.nodeName = self.name - - def _get_internalSubset(self): - return self.internalSubset - - def cloneNode(self, deep): - if self.ownerDocument is None: - # it's ok - clone = DocumentType(None) - clone.name = self.name - clone.nodeName = self.name - operation = xml.dom.UserDataHandler.NODE_CLONED - if deep: - clone.entities._seq = [] - clone.notations._seq = [] - for n in self.notations._seq: - notation = Notation(n.nodeName, n.publicId, n.systemId) - clone.notations._seq.append(notation) - n._call_user_data_handler(operation, n, notation) - for e in self.entities._seq: - entity = Entity(e.nodeName, e.publicId, e.systemId, - e.notationName) - entity.actualEncoding = e.actualEncoding - entity.encoding = e.encoding - entity.version = e.version - clone.entities._seq.append(entity) - e._call_user_data_handler(operation, n, entity) - self._call_user_data_handler(operation, self, clone) - return clone - else: - return None - - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write(""+newl) - -class Entity(Identified, Node): - attributes = None - nodeType = Node.ENTITY_NODE - nodeValue = None - - actualEncoding = None - encoding = None - version = None - - def __init__(self, name, publicId, systemId, notation): - self.nodeName = name - self.notationName = notation - self.childNodes = NodeList() - self._identified_mixin_init(publicId, systemId) - - def _get_actualEncoding(self): - return self.actualEncoding - - def _get_encoding(self): - return self.encoding - - def _get_version(self): - return self.version - - def appendChild(self, newChild): - raise xml.dom.HierarchyRequestErr( - "cannot append children to an entity node") - - def insertBefore(self, newChild, refChild): - raise xml.dom.HierarchyRequestErr( - "cannot insert children below an entity node") - - def removeChild(self, oldChild): - raise xml.dom.HierarchyRequestErr( - "cannot remove children from an entity node") - - def replaceChild(self, newChild, oldChild): - raise xml.dom.HierarchyRequestErr( - "cannot replace children of an entity node") - -class Notation(Identified, Childless, Node): - nodeType = Node.NOTATION_NODE - nodeValue = None - - def __init__(self, name, publicId, systemId): - self.nodeName = name - self._identified_mixin_init(publicId, systemId) - - -class DOMImplementation(DOMImplementationLS): - _features = [("core", "1.0"), - ("core", "2.0"), - ("core", "3.0"), - ("core", None), - ("xml", "1.0"), - ("xml", "2.0"), - ("xml", "3.0"), - ("xml", None), - ("ls-load", "3.0"), - ("ls-load", None), - ] - - def hasFeature(self, feature, version): - if version == "": - version = None - return (feature.lower(), version) in self._features - - def createDocument(self, namespaceURI, qualifiedName, doctype): - if doctype and doctype.parentNode is not None: - raise xml.dom.WrongDocumentErr( - "doctype object owned by another DOM tree") - doc = self._create_document() - - add_root_element = not (namespaceURI is None - and qualifiedName is None - and doctype is None) - - if not qualifiedName and add_root_element: - # The spec is unclear what to raise here; SyntaxErr - # would be the other obvious candidate. Since Xerces raises - # InvalidCharacterErr, and since SyntaxErr is not listed - # for createDocument, that seems to be the better choice. - # XXX: need to check for illegal characters here and in - # createElement. - - # DOM Level III clears this up when talking about the return value - # of this function. If namespaceURI, qName and DocType are - # Null the document is returned without a document element - # Otherwise if doctype or namespaceURI are not None - # Then we go back to the above problem - raise xml.dom.InvalidCharacterErr("Element with no name") - - if add_root_element: - prefix, localname = _nssplit(qualifiedName) - if prefix == "xml" \ - and namespaceURI != "http://www.w3.org/XML/1998/namespace": - raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") - if prefix and not namespaceURI: - raise xml.dom.NamespaceErr( - "illegal use of prefix without namespaces") - element = doc.createElementNS(namespaceURI, qualifiedName) - if doctype: - doc.appendChild(doctype) - doc.appendChild(element) - - if doctype: - doctype.parentNode = doctype.ownerDocument = doc - - doc.doctype = doctype - doc.implementation = self - return doc - - def createDocumentType(self, qualifiedName, publicId, systemId): - doctype = DocumentType(qualifiedName) - doctype.publicId = publicId - doctype.systemId = systemId - return doctype - - # DOM Level 3 (WD 9 April 2002) - - def getInterface(self, feature): - if self.hasFeature(feature, None): - return self - else: - return None - - # internal - def _create_document(self): - return Document() - -class ElementInfo(NewStyle): - """Object that represents content-model information for an element. - - This implementation is not expected to be used in practice; DOM - builders should provide implementations which do the right thing - using information available to it. - - """ - - __slots__ = 'tagName', - - def __init__(self, name): - self.tagName = name - - def getAttributeType(self, aname): - return _no_type - - def getAttributeTypeNS(self, namespaceURI, localName): - return _no_type - - def isElementContent(self): - return False - - def isEmpty(self): - """Returns true iff this element is declared to have an EMPTY - content model.""" - return False - - def isId(self, aname): - """Returns true iff the named attribte is a DTD-style ID.""" - return False - - def isIdNS(self, namespaceURI, localName): - """Returns true iff the identified attribute is a DTD-style ID.""" - return False - - def __getstate__(self): - return self.tagName - - def __setstate__(self, state): - self.tagName = state - -def _clear_id_cache(node): - if node.nodeType == Node.DOCUMENT_NODE: - node._id_cache.clear() - node._id_search_stack = None - elif _in_document(node): - node.ownerDocument._id_cache.clear() - node.ownerDocument._id_search_stack= None - -class Document(Node, DocumentLS): - _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) - - nodeType = Node.DOCUMENT_NODE - nodeName = "#document" - nodeValue = None - attributes = None - doctype = None - parentNode = None - previousSibling = nextSibling = None - - implementation = DOMImplementation() - - # Document attributes from Level 3 (WD 9 April 2002) - - actualEncoding = None - encoding = None - standalone = None - version = None - strictErrorChecking = False - errorHandler = None - documentURI = None - - _magic_id_count = 0 - - def __init__(self): - self.childNodes = NodeList() - # mapping of (namespaceURI, localName) -> ElementInfo - # and tagName -> ElementInfo - self._elem_info = {} - self._id_cache = {} - self._id_search_stack = None - - def _get_elem_info(self, element): - if element.namespaceURI: - key = element.namespaceURI, element.localName - else: - key = element.tagName - return self._elem_info.get(key) - - def _get_actualEncoding(self): - return self.actualEncoding - - def _get_doctype(self): - return self.doctype - - def _get_documentURI(self): - return self.documentURI - - def _get_encoding(self): - return self.encoding - - def _get_errorHandler(self): - return self.errorHandler - - def _get_standalone(self): - return self.standalone - - def _get_strictErrorChecking(self): - return self.strictErrorChecking - - def _get_version(self): - return self.version - - def appendChild(self, node): - if node.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(node), repr(self))) - if node.parentNode is not None: - # This needs to be done before the next test since this - # may *be* the document element, in which case it should - # end up re-ordered to the end. - node.parentNode.removeChild(node) - - if node.nodeType == Node.ELEMENT_NODE \ - and self._get_documentElement(): - raise xml.dom.HierarchyRequestErr( - "two document elements disallowed") - return Node.appendChild(self, node) - - def removeChild(self, oldChild): - try: - self.childNodes.remove(oldChild) - except ValueError: - raise xml.dom.NotFoundErr() - oldChild.nextSibling = oldChild.previousSibling = None - oldChild.parentNode = None - if self.documentElement is oldChild: - self.documentElement = None - - return oldChild - - def _get_documentElement(self): - for node in self.childNodes: - if node.nodeType == Node.ELEMENT_NODE: - return node - - def unlink(self): - if self.doctype is not None: - self.doctype.unlink() - self.doctype = None - Node.unlink(self) - - def cloneNode(self, deep): - if not deep: - return None - clone = self.implementation.createDocument(None, None, None) - clone.encoding = self.encoding - clone.standalone = self.standalone - clone.version = self.version - for n in self.childNodes: - childclone = _clone_node(n, deep, clone) - assert childclone.ownerDocument.isSameNode(clone) - clone.childNodes.append(childclone) - if childclone.nodeType == Node.DOCUMENT_NODE: - assert clone.documentElement is None - elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: - assert clone.doctype is None - clone.doctype = childclone - childclone.parentNode = clone - self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, - self, clone) - return clone - - def createDocumentFragment(self): - d = DocumentFragment() - d.ownerDocument = self - return d - - def createElement(self, tagName): - e = Element(tagName) - e.ownerDocument = self - return e - - def createTextNode(self, data): - if not isinstance(data, StringTypes): - raise TypeError, "node contents must be a string" - t = Text() - t.data = data - t.ownerDocument = self - return t - - def createCDATASection(self, data): - if not isinstance(data, StringTypes): - raise TypeError, "node contents must be a string" - c = CDATASection() - c.data = data - c.ownerDocument = self - return c - - def createComment(self, data): - c = Comment(data) - c.ownerDocument = self - return c - - def createProcessingInstruction(self, target, data): - p = ProcessingInstruction(target, data) - p.ownerDocument = self - return p - - def createAttribute(self, qName): - a = Attr(qName) - a.ownerDocument = self - a.value = "" - return a - - def createElementNS(self, namespaceURI, qualifiedName): - prefix, localName = _nssplit(qualifiedName) - e = Element(qualifiedName, namespaceURI, prefix) - e.ownerDocument = self - return e - - def createAttributeNS(self, namespaceURI, qualifiedName): - prefix, localName = _nssplit(qualifiedName) - a = Attr(qualifiedName, namespaceURI, localName, prefix) - a.ownerDocument = self - a.value = "" - return a - - # A couple of implementation-specific helpers to create node types - # not supported by the W3C DOM specs: - - def _create_entity(self, name, publicId, systemId, notationName): - e = Entity(name, publicId, systemId, notationName) - e.ownerDocument = self - return e - - def _create_notation(self, name, publicId, systemId): - n = Notation(name, publicId, systemId) - n.ownerDocument = self - return n - - def getElementById(self, id): - if self._id_cache.has_key(id): - return self._id_cache[id] - if not (self._elem_info or self._magic_id_count): - return None - - stack = self._id_search_stack - if stack is None: - # we never searched before, or the cache has been cleared - stack = [self.documentElement] - self._id_search_stack = stack - elif not stack: - # Previous search was completed and cache is still valid; - # no matching node. - return None - - result = None - while stack: - node = stack.pop() - # add child elements to stack for continued searching - stack.extend([child for child in node.childNodes - if child.nodeType in _nodeTypes_with_children]) - # check this node - info = self._get_elem_info(node) - if info: - # We have to process all ID attributes before - # returning in order to get all the attributes set to - # be IDs using Element.setIdAttribute*(). - for attr in node.attributes.values(): - if attr.namespaceURI: - if info.isIdNS(attr.namespaceURI, attr.localName): - self._id_cache[attr.value] = node - if attr.value == id: - result = node - elif not node._magic_id_nodes: - break - elif info.isId(attr.name): - self._id_cache[attr.value] = node - if attr.value == id: - result = node - elif not node._magic_id_nodes: - break - elif attr._is_id: - self._id_cache[attr.value] = node - if attr.value == id: - result = node - elif node._magic_id_nodes == 1: - break - elif node._magic_id_nodes: - for attr in node.attributes.values(): - if attr._is_id: - self._id_cache[attr.value] = node - if attr.value == id: - result = node - if result is not None: - break - return result - - def getElementsByTagName(self, name): - return _get_elements_by_tagName_helper(self, name, NodeList()) - - def getElementsByTagNameNS(self, namespaceURI, localName): - return _get_elements_by_tagName_ns_helper( - self, namespaceURI, localName, NodeList()) - - def isSupported(self, feature, version): - return self.implementation.hasFeature(feature, version) - - def importNode(self, node, deep): - if node.nodeType == Node.DOCUMENT_NODE: - raise xml.dom.NotSupportedErr("cannot import document nodes") - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - raise xml.dom.NotSupportedErr("cannot import document type nodes") - return _clone_node(node, deep, self) - - def writexml(self, writer, indent="", addindent="", newl="", - encoding = None): - if encoding is None: - writer.write(''+newl) - else: - writer.write('%s' % (encoding, newl)) - for node in self.childNodes: - node.writexml(writer, indent, addindent, newl) - - # DOM Level 3 (WD 9 April 2002) - - def renameNode(self, n, namespaceURI, name): - if n.ownerDocument is not self: - raise xml.dom.WrongDocumentErr( - "cannot rename nodes from other documents;\n" - "expected %s,\nfound %s" % (self, n.ownerDocument)) - if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): - raise xml.dom.NotSupportedErr( - "renameNode() only applies to element and attribute nodes") - if namespaceURI != EMPTY_NAMESPACE: - if ':' in name: - prefix, localName = name.split(':', 1) - if ( prefix == "xmlns" - and namespaceURI != xml.dom.XMLNS_NAMESPACE): - raise xml.dom.NamespaceErr( - "illegal use of 'xmlns' prefix") - else: - if ( name == "xmlns" - and namespaceURI != xml.dom.XMLNS_NAMESPACE - and n.nodeType == Node.ATTRIBUTE_NODE): - raise xml.dom.NamespaceErr( - "illegal use of the 'xmlns' attribute") - prefix = None - localName = name - else: - prefix = None - localName = None - if n.nodeType == Node.ATTRIBUTE_NODE: - element = n.ownerElement - if element is not None: - is_id = n._is_id - element.removeAttributeNode(n) - else: - element = None - # avoid __setattr__ - d = n.__dict__ - d['prefix'] = prefix - d['localName'] = localName - d['namespaceURI'] = namespaceURI - d['nodeName'] = name - if n.nodeType == Node.ELEMENT_NODE: - d['tagName'] = name - else: - # attribute node - d['name'] = name - if element is not None: - element.setAttributeNode(n) - if is_id: - element.setIdAttributeNode(n) - # It's not clear from a semantic perspective whether we should - # call the user data handlers for the NODE_RENAMED event since - # we're re-using the existing node. The draft spec has been - # interpreted as meaning "no, don't call the handler unless a - # new node is created." - return n - -defproperty(Document, "documentElement", - doc="Top-level element of this document.") - - -def _clone_node(node, deep, newOwnerDocument): - """ - Clone a node and give it the new owner document. - Called by Node.cloneNode and Document.importNode - """ - if node.ownerDocument.isSameNode(newOwnerDocument): - operation = xml.dom.UserDataHandler.NODE_CLONED - else: - operation = xml.dom.UserDataHandler.NODE_IMPORTED - if node.nodeType == Node.ELEMENT_NODE: - clone = newOwnerDocument.createElementNS(node.namespaceURI, - node.nodeName) - for attr in node.attributes.values(): - clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) - a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) - a.specified = attr.specified - - if deep: - for child in node.childNodes: - c = _clone_node(child, deep, newOwnerDocument) - clone.appendChild(c) - - elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: - clone = newOwnerDocument.createDocumentFragment() - if deep: - for child in node.childNodes: - c = _clone_node(child, deep, newOwnerDocument) - clone.appendChild(c) - - elif node.nodeType == Node.TEXT_NODE: - clone = newOwnerDocument.createTextNode(node.data) - elif node.nodeType == Node.CDATA_SECTION_NODE: - clone = newOwnerDocument.createCDATASection(node.data) - elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: - clone = newOwnerDocument.createProcessingInstruction(node.target, - node.data) - elif node.nodeType == Node.COMMENT_NODE: - clone = newOwnerDocument.createComment(node.data) - elif node.nodeType == Node.ATTRIBUTE_NODE: - clone = newOwnerDocument.createAttributeNS(node.namespaceURI, - node.nodeName) - clone.specified = True - clone.value = node.value - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - assert node.ownerDocument is not newOwnerDocument - operation = xml.dom.UserDataHandler.NODE_IMPORTED - clone = newOwnerDocument.implementation.createDocumentType( - node.name, node.publicId, node.systemId) - clone.ownerDocument = newOwnerDocument - if deep: - clone.entities._seq = [] - clone.notations._seq = [] - for n in node.notations._seq: - notation = Notation(n.nodeName, n.publicId, n.systemId) - notation.ownerDocument = newOwnerDocument - clone.notations._seq.append(notation) - if hasattr(n, '_call_user_data_handler'): - n._call_user_data_handler(operation, n, notation) - for e in node.entities._seq: - entity = Entity(e.nodeName, e.publicId, e.systemId, - e.notationName) - entity.actualEncoding = e.actualEncoding - entity.encoding = e.encoding - entity.version = e.version - entity.ownerDocument = newOwnerDocument - clone.entities._seq.append(entity) - if hasattr(e, '_call_user_data_handler'): - e._call_user_data_handler(operation, n, entity) - else: - # Note the cloning of Document and DocumentType nodes is - # implemenetation specific. minidom handles those cases - # directly in the cloneNode() methods. - raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) - - # Check for _call_user_data_handler() since this could conceivably - # used with other DOM implementations (one of the FourThought - # DOMs, perhaps?). - if hasattr(node, '_call_user_data_handler'): - node._call_user_data_handler(operation, node, clone) - return clone - - -def _nssplit(qualifiedName): - fields = qualifiedName.split(':', 1) - if len(fields) == 2: - return fields - else: - return (None, fields[0]) - - -def _get_StringIO(): - # we can't use cStringIO since it doesn't support Unicode strings - from StringIO import StringIO - return StringIO() - -def _do_pulldom_parse(func, args, kwargs): - events = func(*args, **kwargs) - toktype, rootNode = events.getEvent() - events.expandNode(rootNode) - events.clear() - return rootNode - -def parse(file, parser=None, bufsize=None): - """Parse a file into a DOM by filename or file object.""" - if parser is None and not bufsize: - from xml.dom import expatbuilder - return expatbuilder.parse(file) - else: - from xml.dom import pulldom - return _do_pulldom_parse(pulldom.parse, (file,), - {'parser': parser, 'bufsize': bufsize}) - -def parseString(string, parser=None): - """Parse a file into a DOM from a string.""" - if parser is None: - from xml.dom import expatbuilder - return expatbuilder.parseString(string) - else: - from xml.dom import pulldom - return _do_pulldom_parse(pulldom.parseString, (string,), - {'parser': parser}) - -def getDOMImplementation(features=None): - if features: - if isinstance(features, StringTypes): - features = domreg._parse_feature_string(features) - for f, v in features: - if not Document.implementation.hasFeature(f, v): - return None - return Document.implementation diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py deleted file mode 100644 index 18f49b5..0000000 --- a/Lib/xml/dom/pulldom.py +++ /dev/null @@ -1,351 +0,0 @@ -import xml.sax -import xml.sax.handler -import types - -try: - _StringTypes = [types.StringType, types.UnicodeType] -except AttributeError: - _StringTypes = [types.StringType] - -START_ELEMENT = "START_ELEMENT" -END_ELEMENT = "END_ELEMENT" -COMMENT = "COMMENT" -START_DOCUMENT = "START_DOCUMENT" -END_DOCUMENT = "END_DOCUMENT" -PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION" -IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" -CHARACTERS = "CHARACTERS" - -class PullDOM(xml.sax.ContentHandler): - _locator = None - document = None - - def __init__(self, documentFactory=None): - from xml.dom import XML_NAMESPACE - self.documentFactory = documentFactory - self.firstEvent = [None, None] - self.lastEvent = self.firstEvent - self.elementStack = [] - self.push = self.elementStack.append - try: - self.pop = self.elementStack.pop - except AttributeError: - # use class' pop instead - pass - self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - self.pending_events = [] - - def pop(self): - result = self.elementStack[-1] - del self.elementStack[-1] - return result - - def setDocumentLocator(self, locator): - self._locator = locator - - def startPrefixMapping(self, prefix, uri): - if not hasattr(self, '_xmlns_attrs'): - self._xmlns_attrs = [] - self._xmlns_attrs.append((prefix or 'xmlns', uri)) - self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix or None - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts.pop() - - def startElementNS(self, name, tagName , attrs): - # Retrieve xml namespace declaration attributes. - xmlns_uri = 'http://www.w3.org/2000/xmlns/' - xmlns_attrs = getattr(self, '_xmlns_attrs', None) - if xmlns_attrs is not None: - for aname, value in xmlns_attrs: - attrs._attrs[(xmlns_uri, aname)] = value - self._xmlns_attrs = [] - uri, localname = name - if uri: - # When using namespaces, the reader may or may not - # provide us with the original name. If not, create - # *a* valid tagName from the current context. - if tagName is None: - prefix = self._current_context[uri] - if prefix: - tagName = prefix + ":" + localname - else: - tagName = localname - if self.document: - node = self.document.createElementNS(uri, tagName) - else: - node = self.buildDocument(uri, tagName) - else: - # When the tagname is not prefixed, it just appears as - # localname - if self.document: - node = self.document.createElement(localname) - else: - node = self.buildDocument(None, localname) - - for aname,value in attrs.items(): - a_uri, a_localname = aname - if a_uri == xmlns_uri: - if a_localname == 'xmlns': - qname = a_localname - else: - qname = 'xmlns:' + a_localname - attr = self.document.createAttributeNS(a_uri, qname) - node.setAttributeNodeNS(attr) - elif a_uri: - prefix = self._current_context[a_uri] - if prefix: - qname = prefix + ":" + a_localname - else: - qname = a_localname - attr = self.document.createAttributeNS(a_uri, qname) - node.setAttributeNodeNS(attr) - else: - attr = self.document.createAttribute(a_localname) - node.setAttributeNode(attr) - attr.value = value - - self.lastEvent[1] = [(START_ELEMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - - def endElementNS(self, name, tagName): - self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] - self.lastEvent = self.lastEvent[1] - - def startElement(self, name, attrs): - if self.document: - node = self.document.createElement(name) - else: - node = self.buildDocument(None, name) - - for aname,value in attrs.items(): - attr = self.document.createAttribute(aname) - attr.value = value - node.setAttributeNode(attr) - - self.lastEvent[1] = [(START_ELEMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - - def endElement(self, name): - self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] - self.lastEvent = self.lastEvent[1] - - def comment(self, s): - if self.document: - node = self.document.createComment(s) - self.lastEvent[1] = [(COMMENT, node), None] - self.lastEvent = self.lastEvent[1] - else: - event = [(COMMENT, s), None] - self.pending_events.append(event) - - def processingInstruction(self, target, data): - if self.document: - node = self.document.createProcessingInstruction(target, data) - self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None] - self.lastEvent = self.lastEvent[1] - else: - event = [(PROCESSING_INSTRUCTION, target, data), None] - self.pending_events.append(event) - - def ignorableWhitespace(self, chars): - node = self.document.createTextNode(chars) - self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] - self.lastEvent = self.lastEvent[1] - - def characters(self, chars): - node = self.document.createTextNode(chars) - self.lastEvent[1] = [(CHARACTERS, node), None] - self.lastEvent = self.lastEvent[1] - - def startDocument(self): - if self.documentFactory is None: - import xml.dom.minidom - self.documentFactory = xml.dom.minidom.Document.implementation - - def buildDocument(self, uri, tagname): - # Can't do that in startDocument, since we need the tagname - # XXX: obtain DocumentType - node = self.documentFactory.createDocument(uri, tagname, None) - self.document = node - self.lastEvent[1] = [(START_DOCUMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - # Put everything we have seen so far into the document - for e in self.pending_events: - if e[0][0] == PROCESSING_INSTRUCTION: - _,target,data = e[0] - n = self.document.createProcessingInstruction(target, data) - e[0] = (PROCESSING_INSTRUCTION, n) - elif e[0][0] == COMMENT: - n = self.document.createComment(e[0][1]) - e[0] = (COMMENT, n) - else: - raise AssertionError("Unknown pending event ",e[0][0]) - self.lastEvent[1] = e - self.lastEvent = e - self.pending_events = None - return node.firstChild - - def endDocument(self): - self.lastEvent[1] = [(END_DOCUMENT, self.document), None] - self.pop() - - def clear(self): - "clear(): Explicitly release parsing structures" - self.document = None - -class ErrorHandler: - def warning(self, exception): - print exception - def error(self, exception): - raise exception - def fatalError(self, exception): - raise exception - -class DOMEventStream: - def __init__(self, stream, parser, bufsize): - self.stream = stream - self.parser = parser - self.bufsize = bufsize - if not hasattr(self.parser, 'feed'): - self.getEvent = self._slurp - self.reset() - - def reset(self): - self.pulldom = PullDOM() - # This content handler relies on namespace support - self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) - self.parser.setContentHandler(self.pulldom) - - def __getitem__(self, pos): - rc = self.getEvent() - if rc: - return rc - raise IndexError - - def next(self): - rc = self.getEvent() - if rc: - return rc - raise StopIteration - - def __iter__(self): - return self - - def expandNode(self, node): - event = self.getEvent() - parents = [node] - while event: - token, cur_node = event - if cur_node is node: - return - if token != END_ELEMENT: - parents[-1].appendChild(cur_node) - if token == START_ELEMENT: - parents.append(cur_node) - elif token == END_ELEMENT: - del parents[-1] - event = self.getEvent() - - def getEvent(self): - # use IncrementalParser interface, so we get the desired - # pull effect - if not self.pulldom.firstEvent[1]: - self.pulldom.lastEvent = self.pulldom.firstEvent - while not self.pulldom.firstEvent[1]: - buf = self.stream.read(self.bufsize) - if not buf: - self.parser.close() - return None - self.parser.feed(buf) - rc = self.pulldom.firstEvent[1][0] - self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] - return rc - - def _slurp(self): - """ Fallback replacement for getEvent() using the - standard SAX2 interface, which means we slurp the - SAX events into memory (no performance gain, but - we are compatible to all SAX parsers). - """ - self.parser.parse(self.stream) - self.getEvent = self._emit - return self._emit() - - def _emit(self): - """ Fallback replacement for getEvent() that emits - the events that _slurp() read previously. - """ - rc = self.pulldom.firstEvent[1][0] - self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] - return rc - - def clear(self): - """clear(): Explicitly release parsing objects""" - self.pulldom.clear() - del self.pulldom - self.parser = None - self.stream = None - -class SAX2DOM(PullDOM): - - def startElementNS(self, name, tagName , attrs): - PullDOM.startElementNS(self, name, tagName, attrs) - curNode = self.elementStack[-1] - parentNode = self.elementStack[-2] - parentNode.appendChild(curNode) - - def startElement(self, name, attrs): - PullDOM.startElement(self, name, attrs) - curNode = self.elementStack[-1] - parentNode = self.elementStack[-2] - parentNode.appendChild(curNode) - - def processingInstruction(self, target, data): - PullDOM.processingInstruction(self, target, data) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - def ignorableWhitespace(self, chars): - PullDOM.ignorableWhitespace(self, chars) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - def characters(self, chars): - PullDOM.characters(self, chars) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - -default_bufsize = (2 ** 14) - 20 - -def parse(stream_or_string, parser=None, bufsize=None): - if bufsize is None: - bufsize = default_bufsize - if type(stream_or_string) in _StringTypes: - stream = open(stream_or_string) - else: - stream = stream_or_string - if not parser: - parser = xml.sax.make_parser() - return DOMEventStream(stream, parser, bufsize) - -def parseString(string, parser=None): - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - - bufsize = len(string) - buf = StringIO(string) - if not parser: - parser = xml.sax.make_parser() - return DOMEventStream(buf, parser, bufsize) diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py deleted file mode 100644 index 5c04412..0000000 --- a/Lib/xml/dom/xmlbuilder.py +++ /dev/null @@ -1,388 +0,0 @@ -"""Implementation of the DOM Level 3 'LS-Load' feature.""" - -import copy -import xml.dom - -from xml.dom.minicompat import * - -from xml.dom.NodeFilter import NodeFilter - - -__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] - - -class Options: - """Features object that has variables set for each DOMBuilder feature. - - The DOMBuilder class uses an instance of this class to pass settings to - the ExpatBuilder class. - """ - - # Note that the DOMBuilder class in LoadSave constrains which of these - # values can be set using the DOM Level 3 LoadSave feature. - - namespaces = 1 - namespace_declarations = True - validation = False - external_parameter_entities = True - external_general_entities = True - external_dtd_subset = True - validate_if_schema = False - validate = False - datatype_normalization = False - create_entity_ref_nodes = True - entities = True - whitespace_in_element_content = True - cdata_sections = True - comments = True - charset_overrides_xml_encoding = True - infoset = False - supported_mediatypes_only = False - - errorHandler = None - filter = None - - -class DOMBuilder: - entityResolver = None - errorHandler = None - filter = None - - ACTION_REPLACE = 1 - ACTION_APPEND_AS_CHILDREN = 2 - ACTION_INSERT_AFTER = 3 - ACTION_INSERT_BEFORE = 4 - - _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, - ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) - - def __init__(self): - self._options = Options() - - def _get_entityResolver(self): - return self.entityResolver - def _set_entityResolver(self, entityResolver): - self.entityResolver = entityResolver - - def _get_errorHandler(self): - return self.errorHandler - def _set_errorHandler(self, errorHandler): - self.errorHandler = errorHandler - - def _get_filter(self): - return self.filter - def _set_filter(self, filter): - self.filter = filter - - def setFeature(self, name, state): - if self.supportsFeature(name): - state = state and 1 or 0 - try: - settings = self._settings[(_name_xform(name), state)] - except KeyError: - raise xml.dom.NotSupportedErr( - "unsupported feature: %r" % (name,)) - else: - for name, value in settings: - setattr(self._options, name, value) - else: - raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) - - def supportsFeature(self, name): - return hasattr(self._options, _name_xform(name)) - - def canSetFeature(self, name, state): - key = (_name_xform(name), state and 1 or 0) - return self._settings.has_key(key) - - # This dictionary maps from (feature,value) to a list of - # (option,value) pairs that should be set on the Options object. - # If a (feature,value) setting is not in this dictionary, it is - # not supported by the DOMBuilder. - # - _settings = { - ("namespace_declarations", 0): [ - ("namespace_declarations", 0)], - ("namespace_declarations", 1): [ - ("namespace_declarations", 1)], - ("validation", 0): [ - ("validation", 0)], - ("external_general_entities", 0): [ - ("external_general_entities", 0)], - ("external_general_entities", 1): [ - ("external_general_entities", 1)], - ("external_parameter_entities", 0): [ - ("external_parameter_entities", 0)], - ("external_parameter_entities", 1): [ - ("external_parameter_entities", 1)], - ("validate_if_schema", 0): [ - ("validate_if_schema", 0)], - ("create_entity_ref_nodes", 0): [ - ("create_entity_ref_nodes", 0)], - ("create_entity_ref_nodes", 1): [ - ("create_entity_ref_nodes", 1)], - ("entities", 0): [ - ("create_entity_ref_nodes", 0), - ("entities", 0)], - ("entities", 1): [ - ("entities", 1)], - ("whitespace_in_element_content", 0): [ - ("whitespace_in_element_content", 0)], - ("whitespace_in_element_content", 1): [ - ("whitespace_in_element_content", 1)], - ("cdata_sections", 0): [ - ("cdata_sections", 0)], - ("cdata_sections", 1): [ - ("cdata_sections", 1)], - ("comments", 0): [ - ("comments", 0)], - ("comments", 1): [ - ("comments", 1)], - ("charset_overrides_xml_encoding", 0): [ - ("charset_overrides_xml_encoding", 0)], - ("charset_overrides_xml_encoding", 1): [ - ("charset_overrides_xml_encoding", 1)], - ("infoset", 0): [], - ("infoset", 1): [ - ("namespace_declarations", 0), - ("validate_if_schema", 0), - ("create_entity_ref_nodes", 0), - ("entities", 0), - ("cdata_sections", 0), - ("datatype_normalization", 1), - ("whitespace_in_element_content", 1), - ("comments", 1), - ("charset_overrides_xml_encoding", 1)], - ("supported_mediatypes_only", 0): [ - ("supported_mediatypes_only", 0)], - ("namespaces", 0): [ - ("namespaces", 0)], - ("namespaces", 1): [ - ("namespaces", 1)], - } - - def getFeature(self, name): - xname = _name_xform(name) - try: - return getattr(self._options, xname) - except AttributeError: - if name == "infoset": - options = self._options - return (options.datatype_normalization - and options.whitespace_in_element_content - and options.comments - and options.charset_overrides_xml_encoding - and not (options.namespace_declarations - or options.validate_if_schema - or options.create_entity_ref_nodes - or options.entities - or options.cdata_sections)) - raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) - - def parseURI(self, uri): - if self.entityResolver: - input = self.entityResolver.resolveEntity(None, uri) - else: - input = DOMEntityResolver().resolveEntity(None, uri) - return self.parse(input) - - def parse(self, input): - options = copy.copy(self._options) - options.filter = self.filter - options.errorHandler = self.errorHandler - fp = input.byteStream - if fp is None and options.systemId: - import urllib2 - fp = urllib2.urlopen(input.systemId) - return self._parse_bytestream(fp, options) - - def parseWithContext(self, input, cnode, action): - if action not in self._legal_actions: - raise ValueError("not a legal action") - raise NotImplementedError("Haven't written this yet...") - - def _parse_bytestream(self, stream, options): - import xml.dom.expatbuilder - builder = xml.dom.expatbuilder.makeBuilder(options) - return builder.parseFile(stream) - - -def _name_xform(name): - return name.lower().replace('-', '_') - - -class DOMEntityResolver(NewStyle): - __slots__ = '_opener', - - def resolveEntity(self, publicId, systemId): - assert systemId is not None - source = DOMInputSource() - source.publicId = publicId - source.systemId = systemId - source.byteStream = self._get_opener().open(systemId) - - # determine the encoding if the transport provided it - source.encoding = self._guess_media_encoding(source) - - # determine the base URI is we can - import posixpath, urlparse - parts = urlparse.urlparse(systemId) - scheme, netloc, path, params, query, fragment = parts - # XXX should we check the scheme here as well? - if path and not path.endswith("/"): - path = posixpath.dirname(path) + "/" - parts = scheme, netloc, path, params, query, fragment - source.baseURI = urlparse.urlunparse(parts) - - return source - - def _get_opener(self): - try: - return self._opener - except AttributeError: - self._opener = self._create_opener() - return self._opener - - def _create_opener(self): - import urllib2 - return urllib2.build_opener() - - def _guess_media_encoding(self, source): - info = source.byteStream.info() - if info.has_key("Content-Type"): - for param in info.getplist(): - if param.startswith("charset="): - return param.split("=", 1)[1].lower() - - -class DOMInputSource(NewStyle): - __slots__ = ('byteStream', 'characterStream', 'stringData', - 'encoding', 'publicId', 'systemId', 'baseURI') - - def __init__(self): - self.byteStream = None - self.characterStream = None - self.stringData = None - self.encoding = None - self.publicId = None - self.systemId = None - self.baseURI = None - - def _get_byteStream(self): - return self.byteStream - def _set_byteStream(self, byteStream): - self.byteStream = byteStream - - def _get_characterStream(self): - return self.characterStream - def _set_characterStream(self, characterStream): - self.characterStream = characterStream - - def _get_stringData(self): - return self.stringData - def _set_stringData(self, data): - self.stringData = data - - def _get_encoding(self): - return self.encoding - def _set_encoding(self, encoding): - self.encoding = encoding - - def _get_publicId(self): - return self.publicId - def _set_publicId(self, publicId): - self.publicId = publicId - - def _get_systemId(self): - return self.systemId - def _set_systemId(self, systemId): - self.systemId = systemId - - def _get_baseURI(self): - return self.baseURI - def _set_baseURI(self, uri): - self.baseURI = uri - - -class DOMBuilderFilter: - """Element filter which can be used to tailor construction of - a DOM instance. - """ - - # There's really no need for this class; concrete implementations - # should just implement the endElement() and startElement() - # methods as appropriate. Using this makes it easy to only - # implement one of them. - - FILTER_ACCEPT = 1 - FILTER_REJECT = 2 - FILTER_SKIP = 3 - FILTER_INTERRUPT = 4 - - whatToShow = NodeFilter.SHOW_ALL - - def _get_whatToShow(self): - return self.whatToShow - - def acceptNode(self, element): - return self.FILTER_ACCEPT - - def startContainer(self, element): - return self.FILTER_ACCEPT - -del NodeFilter - - -class DocumentLS: - """Mixin to create documents that conform to the load/save spec.""" - - async = False - - def _get_async(self): - return False - def _set_async(self, async): - if async: - raise xml.dom.NotSupportedErr( - "asynchronous document loading is not supported") - - def abort(self): - # What does it mean to "clear" a document? Does the - # documentElement disappear? - raise NotImplementedError( - "haven't figured out what this means yet") - - def load(self, uri): - raise NotImplementedError("haven't written this yet") - - def loadXML(self, source): - raise NotImplementedError("haven't written this yet") - - def saveXML(self, snode): - if snode is None: - snode = self - elif snode.ownerDocument is not self: - raise xml.dom.WrongDocumentErr() - return snode.toxml() - - -class DOMImplementationLS: - MODE_SYNCHRONOUS = 1 - MODE_ASYNCHRONOUS = 2 - - def createDOMBuilder(self, mode, schemaType): - if schemaType is not None: - raise xml.dom.NotSupportedErr( - "schemaType not yet supported") - if mode == self.MODE_SYNCHRONOUS: - return DOMBuilder() - if mode == self.MODE_ASYNCHRONOUS: - raise xml.dom.NotSupportedErr( - "asynchronous builders are not supported") - raise ValueError("unknown value for mode") - - def createDOMWriter(self): - raise NotImplementedError( - "the writer interface hasn't been written yet!") - - def createDOMInputSource(self): - return DOMInputSource() diff --git a/Lib/xml/etree/ElementInclude.py b/Lib/xml/etree/ElementInclude.py deleted file mode 100644 index 5ee199a..0000000 --- a/Lib/xml/etree/ElementInclude.py +++ /dev/null @@ -1,141 +0,0 @@ -# -# ElementTree -# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $ -# -# limited xinclude support for element trees -# -# history: -# 2003-08-15 fl created -# 2003-11-14 fl fixed default loader -# -# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2004 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -## -# Limited XInclude support for the ElementTree package. -## - -import copy -import ElementTree - -XINCLUDE = "{http://www.w3.org/2001/XInclude}" - -XINCLUDE_INCLUDE = XINCLUDE + "include" -XINCLUDE_FALLBACK = XINCLUDE + "fallback" - -## -# Fatal include error. - -class FatalIncludeError(SyntaxError): - pass - -## -# Default loader. This loader reads an included resource from disk. -# -# @param href Resource reference. -# @param parse Parse mode. Either "xml" or "text". -# @param encoding Optional text encoding. -# @return The expanded resource. If the parse mode is "xml", this -# is an ElementTree instance. If the parse mode is "text", this -# is a Unicode string. If the loader fails, it can return None -# or raise an IOError exception. -# @throws IOError If the loader fails to load the resource. - -def default_loader(href, parse, encoding=None): - file = open(href) - if parse == "xml": - data = ElementTree.parse(file).getroot() - else: - data = file.read() - if encoding: - data = data.decode(encoding) - file.close() - return data - -## -# Expand XInclude directives. -# -# @param elem Root element. -# @param loader Optional resource loader. If omitted, it defaults -# to {@link default_loader}. If given, it should be a callable -# that implements the same interface as default_loader. -# @throws FatalIncludeError If the function fails to include a given -# resource, or if the tree contains malformed XInclude elements. -# @throws IOError If the function fails to load a given resource. - -def include(elem, loader=None): - if loader is None: - loader = default_loader - # look for xinclude elements - i = 0 - while i < len(elem): - e = elem[i] - if e.tag == XINCLUDE_INCLUDE: - # process xinclude directive - href = e.get("href") - parse = e.get("parse", "xml") - if parse == "xml": - node = loader(href, parse) - if node is None: - raise FatalIncludeError( - "cannot load %r as %r" % (href, parse) - ) - node = copy.copy(node) - if e.tail: - node.tail = (node.tail or "") + e.tail - elem[i] = node - elif parse == "text": - text = loader(href, parse, e.get("encoding")) - if text is None: - raise FatalIncludeError( - "cannot load %r as %r" % (href, parse) - ) - if i: - node = elem[i-1] - node.tail = (node.tail or "") + text - else: - elem.text = (elem.text or "") + text + (e.tail or "") - del elem[i] - continue - else: - raise FatalIncludeError( - "unknown parse type in xi:include tag (%r)" % parse - ) - elif e.tag == XINCLUDE_FALLBACK: - raise FatalIncludeError( - "xi:fallback tag must be child of xi:include (%r)" % e.tag - ) - else: - include(e, loader) - i = i + 1 - diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py deleted file mode 100644 index e8093c6..0000000 --- a/Lib/xml/etree/ElementPath.py +++ /dev/null @@ -1,196 +0,0 @@ -# -# ElementTree -# $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $ -# -# limited xpath support for element trees -# -# history: -# 2003-05-23 fl created -# 2003-05-28 fl added support for // etc -# 2003-08-27 fl fixed parsing of periods in element names -# -# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2004 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -## -# Implementation module for XPath support. There's usually no reason -# to import this module directly; the ElementTree does this for -# you, if needed. -## - -import re - -xpath_tokenizer = re.compile( - "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+" - ).findall - -class xpath_descendant_or_self: - pass - -## -# Wrapper for a compiled XPath. - -class Path: - - ## - # Create an Path instance from an XPath expression. - - def __init__(self, path): - tokens = xpath_tokenizer(path) - # the current version supports 'path/path'-style expressions only - self.path = [] - self.tag = None - if tokens and tokens[0][0] == "/": - raise SyntaxError("cannot use absolute path on element") - while tokens: - op, tag = tokens.pop(0) - if tag or op == "*": - self.path.append(tag or op) - elif op == ".": - pass - elif op == "/": - self.path.append(xpath_descendant_or_self()) - continue - else: - raise SyntaxError("unsupported path syntax (%s)" % op) - if tokens: - op, tag = tokens.pop(0) - if op != "/": - raise SyntaxError( - "expected path separator (%s)" % (op or tag) - ) - if self.path and isinstance(self.path[-1], xpath_descendant_or_self): - raise SyntaxError("path cannot end with //") - if len(self.path) == 1 and isinstance(self.path[0], type("")): - self.tag = self.path[0] - - ## - # Find first matching object. - - def find(self, element): - tag = self.tag - if tag is None: - nodeset = self.findall(element) - if not nodeset: - return None - return nodeset[0] - for elem in element: - if elem.tag == tag: - return elem - return None - - ## - # Find text for first matching object. - - def findtext(self, element, default=None): - tag = self.tag - if tag is None: - nodeset = self.findall(element) - if not nodeset: - return default - return nodeset[0].text or "" - for elem in element: - if elem.tag == tag: - return elem.text or "" - return default - - ## - # Find all matching objects. - - def findall(self, element): - nodeset = [element] - index = 0 - while 1: - try: - path = self.path[index] - index = index + 1 - except IndexError: - return nodeset - set = [] - if isinstance(path, xpath_descendant_or_self): - try: - tag = self.path[index] - if not isinstance(tag, type("")): - tag = None - else: - index = index + 1 - except IndexError: - tag = None # invalid path - for node in nodeset: - new = list(node.getiterator(tag)) - if new and new[0] is node: - set.extend(new[1:]) - else: - set.extend(new) - else: - for node in nodeset: - for node in node: - if path == "*" or node.tag == path: - set.append(node) - if not set: - return [] - nodeset = set - -_cache = {} - -## -# (Internal) Compile path. - -def _compile(path): - p = _cache.get(path) - if p is not None: - return p - p = Path(path) - if len(_cache) >= 100: - _cache.clear() - _cache[path] = p - return p - -## -# Find first matching object. - -def find(element, path): - return _compile(path).find(element) - -## -# Find text for first matching object. - -def findtext(element, path, default=None): - return _compile(path).findtext(element, default) - -## -# Find all matching objects. - -def findall(element, path): - return _compile(path).findall(element) - diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py deleted file mode 100644 index bd45251..0000000 --- a/Lib/xml/etree/ElementTree.py +++ /dev/null @@ -1,1254 +0,0 @@ -# -# ElementTree -# $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $ -# -# light-weight XML support for Python 1.5.2 and later. -# -# history: -# 2001-10-20 fl created (from various sources) -# 2001-11-01 fl return root from parse method -# 2002-02-16 fl sort attributes in lexical order -# 2002-04-06 fl TreeBuilder refactoring, added PythonDoc markup -# 2002-05-01 fl finished TreeBuilder refactoring -# 2002-07-14 fl added basic namespace support to ElementTree.write -# 2002-07-25 fl added QName attribute support -# 2002-10-20 fl fixed encoding in write -# 2002-11-24 fl changed default encoding to ascii; fixed attribute encoding -# 2002-11-27 fl accept file objects or file names for parse/write -# 2002-12-04 fl moved XMLTreeBuilder back to this module -# 2003-01-11 fl fixed entity encoding glitch for us-ascii -# 2003-02-13 fl added XML literal factory -# 2003-02-21 fl added ProcessingInstruction/PI factory -# 2003-05-11 fl added tostring/fromstring helpers -# 2003-05-26 fl added ElementPath support -# 2003-07-05 fl added makeelement factory method -# 2003-07-28 fl added more well-known namespace prefixes -# 2003-08-15 fl fixed typo in ElementTree.findtext (Thomas Dartsch) -# 2003-09-04 fl fall back on emulator if ElementPath is not installed -# 2003-10-31 fl markup updates -# 2003-11-15 fl fixed nested namespace bug -# 2004-03-28 fl added XMLID helper -# 2004-06-02 fl added default support to findtext -# 2004-06-08 fl fixed encoding of non-ascii element/attribute names -# 2004-08-23 fl take advantage of post-2.1 expat features -# 2005-02-01 fl added iterparse implementation -# 2005-03-02 fl fixed iterparse support for pre-2.2 versions -# -# Copyright (c) 1999-2005 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2005 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -__all__ = [ - # public symbols - "Comment", - "dump", - "Element", "ElementTree", - "fromstring", - "iselement", "iterparse", - "parse", - "PI", "ProcessingInstruction", - "QName", - "SubElement", - "tostring", - "TreeBuilder", - "VERSION", "XML", - "XMLTreeBuilder", - ] - -## -# The Element type is a flexible container object, designed to -# store hierarchical data structures in memory. The type can be -# described as a cross between a list and a dictionary. -#

-# Each element has a number of properties associated with it: -#

    -#
  • a tag. This is a string identifying what kind of data -# this element represents (the element type, in other words).
  • -#
  • a number of attributes, stored in a Python dictionary.
  • -#
  • a text string.
  • -#
  • an optional tail string.
  • -#
  • a number of child elements, stored in a Python sequence
  • -#
-# -# To create an element instance, use the {@link #Element} or {@link -# #SubElement} factory functions. -#

-# The {@link #ElementTree} class can be used to wrap an element -# structure, and convert it from and to XML. -## - -import string, sys, re - -class _SimpleElementPath: - # emulate pre-1.2 find/findtext/findall behaviour - def find(self, element, tag): - for elem in element: - if elem.tag == tag: - return elem - return None - def findtext(self, element, tag, default=None): - for elem in element: - if elem.tag == tag: - return elem.text or "" - return default - def findall(self, element, tag): - if tag[:3] == ".//": - return element.getiterator(tag[3:]) - result = [] - for elem in element: - if elem.tag == tag: - result.append(elem) - return result - -try: - import ElementPath -except ImportError: - # FIXME: issue warning in this case? - ElementPath = _SimpleElementPath() - -# TODO: add support for custom namespace resolvers/default namespaces -# TODO: add improved support for incremental parsing - -VERSION = "1.2.6" - -## -# Internal element class. This class defines the Element interface, -# and provides a reference implementation of this interface. -#

-# You should not create instances of this class directly. Use the -# appropriate factory functions instead, such as {@link #Element} -# and {@link #SubElement}. -# -# @see Element -# @see SubElement -# @see Comment -# @see ProcessingInstruction - -class _ElementInterface: - # text...tail - - ## - # (Attribute) Element tag. - - tag = None - - ## - # (Attribute) Element attribute dictionary. Where possible, use - # {@link #_ElementInterface.get}, - # {@link #_ElementInterface.set}, - # {@link #_ElementInterface.keys}, and - # {@link #_ElementInterface.items} to access - # element attributes. - - attrib = None - - ## - # (Attribute) Text before first subelement. This is either a - # string or the value None, if there was no text. - - text = None - - ## - # (Attribute) Text after this element's end tag, but before the - # next sibling element's start tag. This is either a string or - # the value None, if there was no text. - - tail = None # text after end tag, if any - - def __init__(self, tag, attrib): - self.tag = tag - self.attrib = attrib - self._children = [] - - def __repr__(self): - return "" % (self.tag, id(self)) - - ## - # Creates a new element object of the same type as this element. - # - # @param tag Element tag. - # @param attrib Element attributes, given as a dictionary. - # @return A new element instance. - - def makeelement(self, tag, attrib): - return Element(tag, attrib) - - ## - # Returns the number of subelements. - # - # @return The number of subelements. - - def __len__(self): - return len(self._children) - - ## - # Returns the given subelement. - # - # @param index What subelement to return. - # @return The given subelement. - # @exception IndexError If the given element does not exist. - - def __getitem__(self, index): - return self._children[index] - - ## - # Replaces the given subelement. - # - # @param index What subelement to replace. - # @param element The new element value. - # @exception IndexError If the given element does not exist. - # @exception AssertionError If element is not a valid object. - - def __setitem__(self, index, element): - assert iselement(element) - self._children[index] = element - - ## - # Deletes the given subelement. - # - # @param index What subelement to delete. - # @exception IndexError If the given element does not exist. - - def __delitem__(self, index): - del self._children[index] - - ## - # Returns a list containing subelements in the given range. - # - # @param start The first subelement to return. - # @param stop The first subelement that shouldn't be returned. - # @return A sequence object containing subelements. - - def __getslice__(self, start, stop): - return self._children[start:stop] - - ## - # Replaces a number of subelements with elements from a sequence. - # - # @param start The first subelement to replace. - # @param stop The first subelement that shouldn't be replaced. - # @param elements A sequence object with zero or more elements. - # @exception AssertionError If a sequence member is not a valid object. - - def __setslice__(self, start, stop, elements): - for element in elements: - assert iselement(element) - self._children[start:stop] = list(elements) - - ## - # Deletes a number of subelements. - # - # @param start The first subelement to delete. - # @param stop The first subelement to leave in there. - - def __delslice__(self, start, stop): - del self._children[start:stop] - - ## - # Adds a subelement to the end of this element. - # - # @param element The element to add. - # @exception AssertionError If a sequence member is not a valid object. - - def append(self, element): - assert iselement(element) - self._children.append(element) - - ## - # Inserts a subelement at the given position in this element. - # - # @param index Where to insert the new subelement. - # @exception AssertionError If the element is not a valid object. - - def insert(self, index, element): - assert iselement(element) - self._children.insert(index, element) - - ## - # Removes a matching subelement. Unlike the find methods, - # this method compares elements based on identity, not on tag - # value or contents. - # - # @param element What element to remove. - # @exception ValueError If a matching element could not be found. - # @exception AssertionError If the element is not a valid object. - - def remove(self, element): - assert iselement(element) - self._children.remove(element) - - ## - # Returns all subelements. The elements are returned in document - # order. - # - # @return A list of subelements. - # @defreturn list of Element instances - - def getchildren(self): - return self._children - - ## - # Finds the first matching subelement, by tag name or path. - # - # @param path What element to look for. - # @return The first matching element, or None if no element was found. - # @defreturn Element or None - - def find(self, path): - return ElementPath.find(self, path) - - ## - # Finds text for the first matching subelement, by tag name or path. - # - # @param path What element to look for. - # @param default What to return if the element was not found. - # @return The text content of the first matching element, or the - # default value no element was found. Note that if the element - # has is found, but has no text content, this method returns an - # empty string. - # @defreturn string - - def findtext(self, path, default=None): - return ElementPath.findtext(self, path, default) - - ## - # Finds all matching subelements, by tag name or path. - # - # @param path What element to look for. - # @return A list or iterator containing all matching elements, - # in document order. - # @defreturn list of Element instances - - def findall(self, path): - return ElementPath.findall(self, path) - - ## - # Resets an element. This function removes all subelements, clears - # all attributes, and sets the text and tail attributes to None. - - def clear(self): - self.attrib.clear() - self._children = [] - self.text = self.tail = None - - ## - # Gets an element attribute. - # - # @param key What attribute to look for. - # @param default What to return if the attribute was not found. - # @return The attribute value, or the default value, if the - # attribute was not found. - # @defreturn string or None - - def get(self, key, default=None): - return self.attrib.get(key, default) - - ## - # Sets an element attribute. - # - # @param key What attribute to set. - # @param value The attribute value. - - def set(self, key, value): - self.attrib[key] = value - - ## - # Gets a list of attribute names. The names are returned in an - # arbitrary order (just like for an ordinary Python dictionary). - # - # @return A list of element attribute names. - # @defreturn list of strings - - def keys(self): - return self.attrib.keys() - - ## - # Gets element attributes, as a sequence. The attributes are - # returned in an arbitrary order. - # - # @return A list of (name, value) tuples for all attributes. - # @defreturn list of (string, string) tuples - - def items(self): - return self.attrib.items() - - ## - # Creates a tree iterator. The iterator loops over this element - # and all subelements, in document order, and returns all elements - # with a matching tag. - #

- # If the tree structure is modified during iteration, the result - # is undefined. - # - # @param tag What tags to look for (default is to return all elements). - # @return A list or iterator containing all the matching elements. - # @defreturn list or iterator - - def getiterator(self, tag=None): - nodes = [] - if tag == "*": - tag = None - if tag is None or self.tag == tag: - nodes.append(self) - for node in self._children: - nodes.extend(node.getiterator(tag)) - return nodes - -# compatibility -_Element = _ElementInterface - -## -# Element factory. This function returns an object implementing the -# standard Element interface. The exact class or type of that object -# is implementation dependent, but it will always be compatible with -# the {@link #_ElementInterface} class in this module. -#

-# The element name, attribute names, and attribute values can be -# either 8-bit ASCII strings or Unicode strings. -# -# @param tag The element name. -# @param attrib An optional dictionary, containing element attributes. -# @param **extra Additional attributes, given as keyword arguments. -# @return An element instance. -# @defreturn Element - -def Element(tag, attrib={}, **extra): - attrib = attrib.copy() - attrib.update(extra) - return _ElementInterface(tag, attrib) - -## -# Subelement factory. This function creates an element instance, and -# appends it to an existing element. -#

-# The element name, attribute names, and attribute values can be -# either 8-bit ASCII strings or Unicode strings. -# -# @param parent The parent element. -# @param tag The subelement name. -# @param attrib An optional dictionary, containing element attributes. -# @param **extra Additional attributes, given as keyword arguments. -# @return An element instance. -# @defreturn Element - -def SubElement(parent, tag, attrib={}, **extra): - attrib = attrib.copy() - attrib.update(extra) - element = parent.makeelement(tag, attrib) - parent.append(element) - return element - -## -# Comment element factory. This factory function creates a special -# element that will be serialized as an XML comment. -#

-# The comment string can be either an 8-bit ASCII string or a Unicode -# string. -# -# @param text A string containing the comment string. -# @return An element instance, representing a comment. -# @defreturn Element - -def Comment(text=None): - element = Element(Comment) - element.text = text - return element - -## -# PI element factory. This factory function creates a special element -# that will be serialized as an XML processing instruction. -# -# @param target A string containing the PI target. -# @param text A string containing the PI contents, if any. -# @return An element instance, representing a PI. -# @defreturn Element - -def ProcessingInstruction(target, text=None): - element = Element(ProcessingInstruction) - element.text = target - if text: - element.text = element.text + " " + text - return element - -PI = ProcessingInstruction - -## -# QName wrapper. This can be used to wrap a QName attribute value, in -# order to get proper namespace handling on output. -# -# @param text A string containing the QName value, in the form {uri}local, -# or, if the tag argument is given, the URI part of a QName. -# @param tag Optional tag. If given, the first argument is interpreted as -# an URI, and this argument is interpreted as a local name. -# @return An opaque object, representing the QName. - -class QName: - def __init__(self, text_or_uri, tag=None): - if tag: - text_or_uri = "{%s}%s" % (text_or_uri, tag) - self.text = text_or_uri - def __str__(self): - return self.text - def __hash__(self): - return hash(self.text) - def __cmp__(self, other): - if isinstance(other, QName): - return cmp(self.text, other.text) - return cmp(self.text, other) - -## -# ElementTree wrapper class. This class represents an entire element -# hierarchy, and adds some extra support for serialization to and from -# standard XML. -# -# @param element Optional root element. -# @keyparam file Optional file handle or name. If given, the -# tree is initialized with the contents of this XML file. - -class ElementTree: - - def __init__(self, element=None, file=None): - assert element is None or iselement(element) - self._root = element # first node - if file: - self.parse(file) - - ## - # Gets the root element for this tree. - # - # @return An element instance. - # @defreturn Element - - def getroot(self): - return self._root - - ## - # Replaces the root element for this tree. This discards the - # current contents of the tree, and replaces it with the given - # element. Use with care. - # - # @param element An element instance. - - def _setroot(self, element): - assert iselement(element) - self._root = element - - ## - # Loads an external XML document into this element tree. - # - # @param source A file name or file object. - # @param parser An optional parser instance. If not given, the - # standard {@link XMLTreeBuilder} parser is used. - # @return The document root element. - # @defreturn Element - - def parse(self, source, parser=None): - if not hasattr(source, "read"): - source = open(source, "rb") - if not parser: - parser = XMLTreeBuilder() - while 1: - data = source.read(32768) - if not data: - break - parser.feed(data) - self._root = parser.close() - return self._root - - ## - # Creates a tree iterator for the root element. The iterator loops - # over all elements in this tree, in document order. - # - # @param tag What tags to look for (default is to return all elements) - # @return An iterator. - # @defreturn iterator - - def getiterator(self, tag=None): - assert self._root is not None - return self._root.getiterator(tag) - - ## - # Finds the first toplevel element with given tag. - # Same as getroot().find(path). - # - # @param path What element to look for. - # @return The first matching element, or None if no element was found. - # @defreturn Element or None - - def find(self, path): - assert self._root is not None - if path[:1] == "/": - path = "." + path - return self._root.find(path) - - ## - # Finds the element text for the first toplevel element with given - # tag. Same as getroot().findtext(path). - # - # @param path What toplevel element to look for. - # @param default What to return if the element was not found. - # @return The text content of the first matching element, or the - # default value no element was found. Note that if the element - # has is found, but has no text content, this method returns an - # empty string. - # @defreturn string - - def findtext(self, path, default=None): - assert self._root is not None - if path[:1] == "/": - path = "." + path - return self._root.findtext(path, default) - - ## - # Finds all toplevel elements with the given tag. - # Same as getroot().findall(path). - # - # @param path What element to look for. - # @return A list or iterator containing all matching elements, - # in document order. - # @defreturn list of Element instances - - def findall(self, path): - assert self._root is not None - if path[:1] == "/": - path = "." + path - return self._root.findall(path) - - ## - # Writes the element tree to a file, as XML. - # - # @param file A file name, or a file object opened for writing. - # @param encoding Optional output encoding (default is US-ASCII). - - def write(self, file, encoding="us-ascii"): - assert self._root is not None - if not hasattr(file, "write"): - file = open(file, "wb") - if not encoding: - encoding = "us-ascii" - elif encoding != "utf-8" and encoding != "us-ascii": - file.write("\n" % encoding) - self._write(file, self._root, encoding, {}) - - def _write(self, file, node, encoding, namespaces): - # write XML to file - tag = node.tag - if tag is Comment: - file.write("" % _escape_cdata(node.text, encoding)) - elif tag is ProcessingInstruction: - file.write("" % _escape_cdata(node.text, encoding)) - else: - items = node.items() - xmlns_items = [] # new namespaces in this scope - try: - if isinstance(tag, QName) or tag[:1] == "{": - tag, xmlns = fixtag(tag, namespaces) - if xmlns: xmlns_items.append(xmlns) - except TypeError: - _raise_serialization_error(tag) - file.write("<" + _encode(tag, encoding)) - if items or xmlns_items: - items.sort() # lexical order - for k, v in items: - try: - if isinstance(k, QName) or k[:1] == "{": - k, xmlns = fixtag(k, namespaces) - if xmlns: xmlns_items.append(xmlns) - except TypeError: - _raise_serialization_error(k) - try: - if isinstance(v, QName): - v, xmlns = fixtag(v, namespaces) - if xmlns: xmlns_items.append(xmlns) - except TypeError: - _raise_serialization_error(v) - file.write(" %s=\"%s\"" % (_encode(k, encoding), - _escape_attrib(v, encoding))) - for k, v in xmlns_items: - file.write(" %s=\"%s\"" % (_encode(k, encoding), - _escape_attrib(v, encoding))) - if node.text or len(node): - file.write(">") - if node.text: - file.write(_escape_cdata(node.text, encoding)) - for n in node: - self._write(file, n, encoding, namespaces) - file.write("") - else: - file.write(" />") - for k, v in xmlns_items: - del namespaces[v] - if node.tail: - file.write(_escape_cdata(node.tail, encoding)) - -# -------------------------------------------------------------------- -# helpers - -## -# Checks if an object appears to be a valid element object. -# -# @param An element instance. -# @return A true value if this is an element object. -# @defreturn flag - -def iselement(element): - # FIXME: not sure about this; might be a better idea to look - # for tag/attrib/text attributes - return isinstance(element, _ElementInterface) or hasattr(element, "tag") - -## -# Writes an element tree or element structure to sys.stdout. This -# function should be used for debugging only. -#

-# The exact output format is implementation dependent. In this -# version, it's written as an ordinary XML file. -# -# @param elem An element tree or an individual element. - -def dump(elem): - # debugging - if not isinstance(elem, ElementTree): - elem = ElementTree(elem) - elem.write(sys.stdout) - tail = elem.getroot().tail - if not tail or tail[-1] != "\n": - sys.stdout.write("\n") - -def _encode(s, encoding): - try: - return s.encode(encoding) - except AttributeError: - return s # 1.5.2: assume the string uses the right encoding - -if sys.version[:3] == "1.5": - _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2 -else: - _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"')) - -_escape_map = { - "&": "&", - "<": "<", - ">": ">", - '"': """, -} - -_namespace_map = { - # "well-known" namespace prefixes - "http://www.w3.org/XML/1998/namespace": "xml", - "http://www.w3.org/1999/xhtml": "html", - "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", - "http://schemas.xmlsoap.org/wsdl/": "wsdl", -} - -def _raise_serialization_error(text): - raise TypeError( - "cannot serialize %r (type %s)" % (text, type(text).__name__) - ) - -def _encode_entity(text, pattern=_escape): - # map reserved and non-ascii characters to numerical entities - def escape_entities(m, map=_escape_map): - out = [] - append = out.append - for char in m.group(): - text = map.get(char) - if text is None: - text = "&#%d;" % ord(char) - append(text) - return string.join(out, "") - try: - return _encode(pattern.sub(escape_entities, text), "ascii") - except TypeError: - _raise_serialization_error(text) - -# -# the following functions assume an ascii-compatible encoding -# (or "utf-16") - -def _escape_cdata(text, encoding=None, replace=string.replace): - # escape character data - try: - if encoding: - try: - text = _encode(text, encoding) - except UnicodeError: - return _encode_entity(text) - text = replace(text, "&", "&") - text = replace(text, "<", "<") - text = replace(text, ">", ">") - return text - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_attrib(text, encoding=None, replace=string.replace): - # escape attribute value - try: - if encoding: - try: - text = _encode(text, encoding) - except UnicodeError: - return _encode_entity(text) - text = replace(text, "&", "&") - text = replace(text, "'", "'") # FIXME: overkill - text = replace(text, "\"", """) - text = replace(text, "<", "<") - text = replace(text, ">", ">") - return text - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def fixtag(tag, namespaces): - # given a decorated tag (of the form {uri}tag), return prefixed - # tag and namespace declaration, if any - if isinstance(tag, QName): - tag = tag.text - namespace_uri, tag = string.split(tag[1:], "}", 1) - prefix = namespaces.get(namespace_uri) - if prefix is None: - prefix = _namespace_map.get(namespace_uri) - if prefix is None: - prefix = "ns%d" % len(namespaces) - namespaces[namespace_uri] = prefix - if prefix == "xml": - xmlns = None - else: - xmlns = ("xmlns:%s" % prefix, namespace_uri) - else: - xmlns = None - return "%s:%s" % (prefix, tag), xmlns - -## -# Parses an XML document into an element tree. -# -# @param source A filename or file object containing XML data. -# @param parser An optional parser instance. If not given, the -# standard {@link XMLTreeBuilder} parser is used. -# @return An ElementTree instance - -def parse(source, parser=None): - tree = ElementTree() - tree.parse(source, parser) - return tree - -## -# Parses an XML document into an element tree incrementally, and reports -# what's going on to the user. -# -# @param source A filename or file object containing XML data. -# @param events A list of events to report back. If omitted, only "end" -# events are reported. -# @return A (event, elem) iterator. - -class iterparse: - - def __init__(self, source, events=None): - if not hasattr(source, "read"): - source = open(source, "rb") - self._file = source - self._events = [] - self._index = 0 - self.root = self._root = None - self._parser = XMLTreeBuilder() - # wire up the parser for event reporting - parser = self._parser._parser - append = self._events.append - if events is None: - events = ["end"] - for event in events: - if event == "start": - try: - parser.ordered_attributes = 1 - parser.specified_attributes = 1 - def handler(tag, attrib_in, event=event, append=append, - start=self._parser._start_list): - append((event, start(tag, attrib_in))) - parser.StartElementHandler = handler - except AttributeError: - def handler(tag, attrib_in, event=event, append=append, - start=self._parser._start): - append((event, start(tag, attrib_in))) - parser.StartElementHandler = handler - elif event == "end": - def handler(tag, event=event, append=append, - end=self._parser._end): - append((event, end(tag))) - parser.EndElementHandler = handler - elif event == "start-ns": - def handler(prefix, uri, event=event, append=append): - try: - uri = _encode(uri, "ascii") - except UnicodeError: - pass - append((event, (prefix or "", uri))) - parser.StartNamespaceDeclHandler = handler - elif event == "end-ns": - def handler(prefix, event=event, append=append): - append((event, None)) - parser.EndNamespaceDeclHandler = handler - - def next(self): - while 1: - try: - item = self._events[self._index] - except IndexError: - if self._parser is None: - self.root = self._root - try: - raise StopIteration - except NameError: - raise IndexError - # load event buffer - del self._events[:] - self._index = 0 - data = self._file.read(16384) - if data: - self._parser.feed(data) - else: - self._root = self._parser.close() - self._parser = None - else: - self._index = self._index + 1 - return item - - try: - iter - def __iter__(self): - return self - except NameError: - def __getitem__(self, index): - return self.next() - -## -# Parses an XML document from a string constant. This function can -# be used to embed "XML literals" in Python code. -# -# @param source A string containing XML data. -# @return An Element instance. -# @defreturn Element - -def XML(text): - parser = XMLTreeBuilder() - parser.feed(text) - return parser.close() - -## -# Parses an XML document from a string constant, and also returns -# a dictionary which maps from element id:s to elements. -# -# @param source A string containing XML data. -# @return A tuple containing an Element instance and a dictionary. -# @defreturn (Element, dictionary) - -def XMLID(text): - parser = XMLTreeBuilder() - parser.feed(text) - tree = parser.close() - ids = {} - for elem in tree.getiterator(): - id = elem.get("id") - if id: - ids[id] = elem - return tree, ids - -## -# Parses an XML document from a string constant. Same as {@link #XML}. -# -# @def fromstring(text) -# @param source A string containing XML data. -# @return An Element instance. -# @defreturn Element - -fromstring = XML - -## -# Generates a string representation of an XML element, including all -# subelements. -# -# @param element An Element instance. -# @return An encoded string containing the XML data. -# @defreturn string - -def tostring(element, encoding=None): - class dummy: - pass - data = [] - file = dummy() - file.write = data.append - ElementTree(element).write(file, encoding) - return string.join(data, "") - -## -# Generic element structure builder. This builder converts a sequence -# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link -# #TreeBuilder.end} method calls to a well-formed element structure. -#

-# You can use this class to build an element structure using a custom XML -# parser, or a parser for some other XML-like format. -# -# @param element_factory Optional element factory. This factory -# is called to create new Element instances, as necessary. - -class TreeBuilder: - - def __init__(self, element_factory=None): - self._data = [] # data collector - self._elem = [] # element stack - self._last = None # last element - self._tail = None # true if we're after an end tag - if element_factory is None: - element_factory = _ElementInterface - self._factory = element_factory - - ## - # Flushes the parser buffers, and returns the toplevel documen - # element. - # - # @return An Element instance. - # @defreturn Element - - def close(self): - assert len(self._elem) == 0, "missing end tags" - assert self._last != None, "missing toplevel element" - return self._last - - def _flush(self): - if self._data: - if self._last is not None: - text = string.join(self._data, "") - if self._tail: - assert self._last.tail is None, "internal error (tail)" - self._last.tail = text - else: - assert self._last.text is None, "internal error (text)" - self._last.text = text - self._data = [] - - ## - # Adds text to the current element. - # - # @param data A string. This should be either an 8-bit string - # containing ASCII text, or a Unicode string. - - def data(self, data): - self._data.append(data) - - ## - # Opens a new element. - # - # @param tag The element name. - # @param attrib A dictionary containing element attributes. - # @return The opened element. - # @defreturn Element - - def start(self, tag, attrs): - self._flush() - self._last = elem = self._factory(tag, attrs) - if self._elem: - self._elem[-1].append(elem) - self._elem.append(elem) - self._tail = 0 - return elem - - ## - # Closes the current element. - # - # @param tag The element name. - # @return The closed element. - # @defreturn Element - - def end(self, tag): - self._flush() - self._last = self._elem.pop() - assert self._last.tag == tag,\ - "end tag mismatch (expected %s, got %s)" % ( - self._last.tag, tag) - self._tail = 1 - return self._last - -## -# Element structure builder for XML source data, based on the -# expat parser. -# -# @keyparam target Target object. If omitted, the builder uses an -# instance of the standard {@link #TreeBuilder} class. -# @keyparam html Predefine HTML entities. This flag is not supported -# by the current implementation. -# @see #ElementTree -# @see #TreeBuilder - -class XMLTreeBuilder: - - def __init__(self, html=0, target=None): - try: - from xml.parsers import expat - except ImportError: - raise ImportError( - "No module named expat; use SimpleXMLTreeBuilder instead" - ) - self._parser = parser = expat.ParserCreate(None, "}") - if target is None: - target = TreeBuilder() - self._target = target - self._names = {} # name memo cache - # callbacks - parser.DefaultHandlerExpand = self._default - parser.StartElementHandler = self._start - parser.EndElementHandler = self._end - parser.CharacterDataHandler = self._data - # let expat do the buffering, if supported - try: - self._parser.buffer_text = 1 - except AttributeError: - pass - # use new-style attribute handling, if supported - try: - self._parser.ordered_attributes = 1 - self._parser.specified_attributes = 1 - parser.StartElementHandler = self._start_list - except AttributeError: - pass - encoding = None - if not parser.returns_unicode: - encoding = "utf-8" - # target.xml(encoding, None) - self._doctype = None - self.entity = {} - - def _fixtext(self, text): - # convert text string to ascii, if possible - try: - return _encode(text, "ascii") - except UnicodeError: - return text - - def _fixname(self, key): - # expand qname, and convert name string to ascii, if possible - try: - name = self._names[key] - except KeyError: - name = key - if "}" in name: - name = "{" + name - self._names[key] = name = self._fixtext(name) - return name - - def _start(self, tag, attrib_in): - fixname = self._fixname - tag = fixname(tag) - attrib = {} - for key, value in attrib_in.items(): - attrib[fixname(key)] = self._fixtext(value) - return self._target.start(tag, attrib) - - def _start_list(self, tag, attrib_in): - fixname = self._fixname - tag = fixname(tag) - attrib = {} - if attrib_in: - for i in range(0, len(attrib_in), 2): - attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1]) - return self._target.start(tag, attrib) - - def _data(self, text): - return self._target.data(self._fixtext(text)) - - def _end(self, tag): - return self._target.end(self._fixname(tag)) - - def _default(self, text): - prefix = text[:1] - if prefix == "&": - # deal with undefined entities - try: - self._target.data(self.entity[text[1:-1]]) - except KeyError: - from xml.parsers import expat - raise expat.error( - "undefined entity %s: line %d, column %d" % - (text, self._parser.ErrorLineNumber, - self._parser.ErrorColumnNumber) - ) - elif prefix == "<" and text[:9] == "": - self._doctype = None - return - text = string.strip(text) - if not text: - return - self._doctype.append(text) - n = len(self._doctype) - if n > 2: - type = self._doctype[1] - if type == "PUBLIC" and n == 4: - name, type, pubid, system = self._doctype - elif type == "SYSTEM" and n == 3: - name, type, system = self._doctype - pubid = None - else: - return - if pubid: - pubid = pubid[1:-1] - self.doctype(name, pubid, system[1:-1]) - self._doctype = None - - ## - # Handles a doctype declaration. - # - # @param name Doctype name. - # @param pubid Public identifier. - # @param system System identifier. - - def doctype(self, name, pubid, system): - pass - - ## - # Feeds data to the parser. - # - # @param data Encoded data. - - def feed(self, data): - self._parser.Parse(data, 0) - - ## - # Finishes feeding data to the parser. - # - # @return An element structure. - # @defreturn Element - - def close(self): - self._parser.Parse("", 1) # end of data - tree = self._target.close() - del self._target, self._parser # get rid of circular references - return tree diff --git a/Lib/xml/etree/__init__.py b/Lib/xml/etree/__init__.py deleted file mode 100644 index ef8d14c..0000000 --- a/Lib/xml/etree/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# $Id: __init__.py 1821 2004-06-03 16:57:49Z fredrik $ -# elementtree package - -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2004 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- diff --git a/Lib/xml/parsers/__init__.py b/Lib/xml/parsers/__init__.py deleted file mode 100644 index eb314a3..0000000 --- a/Lib/xml/parsers/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Python interfaces to XML parsers. - -This package contains one module: - -expat -- Python wrapper for James Clark's Expat parser, with namespace - support. - -""" diff --git a/Lib/xml/parsers/expat.py b/Lib/xml/parsers/expat.py deleted file mode 100644 index 11359a0..0000000 --- a/Lib/xml/parsers/expat.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Interface to the Expat non-validating XML parser.""" -__version__ = '$Revision$' - -from pyexpat import * diff --git a/Lib/xml/sax/__init__.py b/Lib/xml/sax/__init__.py deleted file mode 100644 index 6b1b1ba..0000000 --- a/Lib/xml/sax/__init__.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Simple API for XML (SAX) implementation for Python. - -This module provides an implementation of the SAX 2 interface; -information about the Java version of the interface can be found at -http://www.megginson.com/SAX/. The Python version of the interface is -documented at <...>. - -This package contains the following modules: - -handler -- Base classes and constants which define the SAX 2 API for - the 'client-side' of SAX for Python. - -saxutils -- Implementation of the convenience classes commonly used to - work with SAX. - -xmlreader -- Base classes and constants which define the SAX 2 API for - the parsers used with SAX for Python. - -expatreader -- Driver that allows use of the Expat parser with SAX. -""" - -from xmlreader import InputSource -from handler import ContentHandler, ErrorHandler -from _exceptions import SAXException, SAXNotRecognizedException, \ - SAXParseException, SAXNotSupportedException, \ - SAXReaderNotAvailable - - -def parse(source, handler, errorHandler=ErrorHandler()): - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - parser.parse(source) - -def parseString(string, handler, errorHandler=ErrorHandler()): - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - - if errorHandler is None: - errorHandler = ErrorHandler() - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - - inpsrc = InputSource() - inpsrc.setByteStream(StringIO(string)) - parser.parse(inpsrc) - -# this is the parser list used by the make_parser function if no -# alternatives are given as parameters to the function - -default_parser_list = ["xml.sax.expatreader"] - -# tell modulefinder that importing sax potentially imports expatreader -_false = 0 -if _false: - import xml.sax.expatreader - -import os, sys -if os.environ.has_key("PY_SAX_PARSER"): - default_parser_list = os.environ["PY_SAX_PARSER"].split(",") -del os - -_key = "python.xml.sax.parser" -if sys.platform[:4] == "java" and sys.registry.containsKey(_key): - default_parser_list = sys.registry.getProperty(_key).split(",") - - -def make_parser(parser_list = []): - """Creates and returns a SAX parser. - - Creates the first parser it is able to instantiate of the ones - given in the list created by doing parser_list + - default_parser_list. The lists must contain the names of Python - modules containing both a SAX parser and a create_parser function.""" - - for parser_name in parser_list + default_parser_list: - try: - return _create_parser(parser_name) - except ImportError,e: - import sys - if sys.modules.has_key(parser_name): - # The parser module was found, but importing it - # failed unexpectedly, pass this exception through - raise - except SAXReaderNotAvailable: - # The parser module detected that it won't work properly, - # so try the next one - pass - - raise SAXReaderNotAvailable("No parsers found", None) - -# --- Internal utility methods used by make_parser - -if sys.platform[ : 4] == "java": - def _create_parser(parser_name): - from org.python.core import imp - drv_module = imp.importName(parser_name, 0, globals()) - return drv_module.create_parser() - -else: - def _create_parser(parser_name): - drv_module = __import__(parser_name,{},{},['create_parser']) - return drv_module.create_parser() - -del sys diff --git a/Lib/xml/sax/_exceptions.py b/Lib/xml/sax/_exceptions.py deleted file mode 100644 index fdd614a..0000000 --- a/Lib/xml/sax/_exceptions.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Different kinds of SAX Exceptions""" -import sys -if sys.platform[:4] == "java": - from java.lang import Exception -del sys - -# ===== SAXEXCEPTION ===== - -class SAXException(Exception): - """Encapsulate an XML error or warning. This class can contain - basic error or warning information from either the XML parser or - the application: you can subclass it to provide additional - functionality, or to add localization. Note that although you will - receive a SAXException as the argument to the handlers in the - ErrorHandler interface, you are not actually required to throw - the exception; instead, you can simply read the information in - it.""" - - def __init__(self, msg, exception=None): - """Creates an exception. The message is required, but the exception - is optional.""" - self._msg = msg - self._exception = exception - Exception.__init__(self, msg) - - def getMessage(self): - "Return a message for this exception." - return self._msg - - def getException(self): - "Return the embedded exception, or None if there was none." - return self._exception - - def __str__(self): - "Create a string representation of the exception." - return self._msg - - def __getitem__(self, ix): - """Avoids weird error messages if someone does exception[ix] by - mistake, since Exception has __getitem__ defined.""" - raise AttributeError("__getitem__") - - -# ===== SAXPARSEEXCEPTION ===== - -class SAXParseException(SAXException): - """Encapsulate an XML parse error or warning. - - This exception will include information for locating the error in - the original XML document. Note that although the application will - receive a SAXParseException as the argument to the handlers in the - ErrorHandler interface, the application is not actually required - to throw the exception; instead, it can simply read the - information in it and take a different action. - - Since this exception is a subclass of SAXException, it inherits - the ability to wrap another exception.""" - - def __init__(self, msg, exception, locator): - "Creates the exception. The exception parameter is allowed to be None." - SAXException.__init__(self, msg, exception) - self._locator = locator - - # We need to cache this stuff at construction time. - # If this exception is thrown, the objects through which we must - # traverse to get this information may be deleted by the time - # it gets caught. - self._systemId = self._locator.getSystemId() - self._colnum = self._locator.getColumnNumber() - self._linenum = self._locator.getLineNumber() - - def getColumnNumber(self): - """The column number of the end of the text where the exception - occurred.""" - return self._colnum - - def getLineNumber(self): - "The line number of the end of the text where the exception occurred." - return self._linenum - - def getPublicId(self): - "Get the public identifier of the entity where the exception occurred." - return self._locator.getPublicId() - - def getSystemId(self): - "Get the system identifier of the entity where the exception occurred." - return self._systemId - - def __str__(self): - "Create a string representation of the exception." - sysid = self.getSystemId() - if sysid is None: - sysid = "" - linenum = self.getLineNumber() - if linenum is None: - linenum = "?" - colnum = self.getColumnNumber() - if colnum is None: - colnum = "?" - return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg) - - -# ===== SAXNOTRECOGNIZEDEXCEPTION ===== - -class SAXNotRecognizedException(SAXException): - """Exception class for an unrecognized identifier. - - An XMLReader will raise this exception when it is confronted with an - unrecognized feature or property. SAX applications and extensions may - use this class for similar purposes.""" - - -# ===== SAXNOTSUPPORTEDEXCEPTION ===== - -class SAXNotSupportedException(SAXException): - """Exception class for an unsupported operation. - - An XMLReader will raise this exception when a service it cannot - perform is requested (specifically setting a state or value). SAX - applications and extensions may use this class for similar - purposes.""" - -# ===== SAXNOTSUPPORTEDEXCEPTION ===== - -class SAXReaderNotAvailable(SAXNotSupportedException): - """Exception class for a missing driver. - - An XMLReader module (driver) should raise this exception when it - is first imported, e.g. when a support module cannot be imported. - It also may be raised during parsing, e.g. if executing an external - program is not permitted.""" diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py deleted file mode 100644 index bb9c294..0000000 --- a/Lib/xml/sax/expatreader.py +++ /dev/null @@ -1,414 +0,0 @@ -""" -SAX driver for the pyexpat C module. This driver works with -pyexpat.__version__ == '2.22'. -""" - -version = "0.20" - -from xml.sax._exceptions import * -from xml.sax.handler import feature_validation, feature_namespaces -from xml.sax.handler import feature_namespace_prefixes -from xml.sax.handler import feature_external_ges, feature_external_pes -from xml.sax.handler import feature_string_interning -from xml.sax.handler import property_xml_string, property_interning_dict - -# xml.parsers.expat does not raise ImportError in Jython -import sys -if sys.platform[:4] == "java": - raise SAXReaderNotAvailable("expat not available in Java", None) -del sys - -try: - from xml.parsers import expat -except ImportError: - raise SAXReaderNotAvailable("expat not supported", None) -else: - if not hasattr(expat, "ParserCreate"): - raise SAXReaderNotAvailable("expat not supported", None) -from xml.sax import xmlreader, saxutils, handler - -AttributesImpl = xmlreader.AttributesImpl -AttributesNSImpl = xmlreader.AttributesNSImpl - -# If we're using a sufficiently recent version of Python, we can use -# weak references to avoid cycles between the parser and content -# handler, otherwise we'll just have to pretend. -try: - import _weakref -except ImportError: - def _mkproxy(o): - return o -else: - import weakref - _mkproxy = weakref.proxy - del weakref, _weakref - -# --- ExpatLocator - -class ExpatLocator(xmlreader.Locator): - """Locator for use with the ExpatParser class. - - This uses a weak reference to the parser object to avoid creating - a circular reference between the parser and the content handler. - """ - def __init__(self, parser): - self._ref = _mkproxy(parser) - - def getColumnNumber(self): - parser = self._ref - if parser._parser is None: - return None - return parser._parser.ErrorColumnNumber - - def getLineNumber(self): - parser = self._ref - if parser._parser is None: - return 1 - return parser._parser.ErrorLineNumber - - def getPublicId(self): - parser = self._ref - if parser is None: - return None - return parser._source.getPublicId() - - def getSystemId(self): - parser = self._ref - if parser is None: - return None - return parser._source.getSystemId() - - -# --- ExpatParser - -class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): - """SAX driver for the pyexpat C module.""" - - def __init__(self, namespaceHandling=0, bufsize=2**16-20): - xmlreader.IncrementalParser.__init__(self, bufsize) - self._source = xmlreader.InputSource() - self._parser = None - self._namespaces = namespaceHandling - self._lex_handler_prop = None - self._parsing = 0 - self._entity_stack = [] - self._external_ges = 1 - self._interning = None - - # XMLReader methods - - def parse(self, source): - "Parse an XML document from a URL or an InputSource." - source = saxutils.prepare_input_source(source) - - self._source = source - self.reset() - self._cont_handler.setDocumentLocator(ExpatLocator(self)) - xmlreader.IncrementalParser.parse(self, source) - - def prepareParser(self, source): - if source.getSystemId() != None: - self._parser.SetBase(source.getSystemId()) - - # Redefined setContentHandler to allow changing handlers during parsing - - def setContentHandler(self, handler): - xmlreader.IncrementalParser.setContentHandler(self, handler) - if self._parsing: - self._reset_cont_handler() - - def getFeature(self, name): - if name == feature_namespaces: - return self._namespaces - elif name == feature_string_interning: - return self._interning is not None - elif name in (feature_validation, feature_external_pes, - feature_namespace_prefixes): - return 0 - elif name == feature_external_ges: - return self._external_ges - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def setFeature(self, name, state): - if self._parsing: - raise SAXNotSupportedException("Cannot set features while parsing") - - if name == feature_namespaces: - self._namespaces = state - elif name == feature_external_ges: - self._external_ges = state - elif name == feature_string_interning: - if state: - if self._interning is None: - self._interning = {} - else: - self._interning = None - elif name == feature_validation: - if state: - raise SAXNotSupportedException( - "expat does not support validation") - elif name == feature_external_pes: - if state: - raise SAXNotSupportedException( - "expat does not read external parameter entities") - elif name == feature_namespace_prefixes: - if state: - raise SAXNotSupportedException( - "expat does not report namespace prefixes") - else: - raise SAXNotRecognizedException( - "Feature '%s' not recognized" % name) - - def getProperty(self, name): - if name == handler.property_lexical_handler: - return self._lex_handler_prop - elif name == property_interning_dict: - return self._interning - elif name == property_xml_string: - if self._parser: - if hasattr(self._parser, "GetInputContext"): - return self._parser.GetInputContext() - else: - raise SAXNotRecognizedException( - "This version of expat does not support getting" - " the XML string") - else: - raise SAXNotSupportedException( - "XML string cannot be returned when not parsing") - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - if name == handler.property_lexical_handler: - self._lex_handler_prop = value - if self._parsing: - self._reset_lex_handler_prop() - elif name == property_interning_dict: - self._interning = value - elif name == property_xml_string: - raise SAXNotSupportedException("Property '%s' cannot be set" % - name) - else: - raise SAXNotRecognizedException("Property '%s' not recognized" % - name) - - # IncrementalParser methods - - def feed(self, data, isFinal = 0): - if not self._parsing: - self.reset() - self._parsing = 1 - self._cont_handler.startDocument() - - try: - # The isFinal parameter is internal to the expat reader. - # If it is set to true, expat will check validity of the entire - # document. When feeding chunks, they are not normally final - - # except when invoked from close. - self._parser.Parse(data, isFinal) - except expat.error, e: - exc = SAXParseException(expat.ErrorString(e.code), e, self) - # FIXME: when to invoke error()? - self._err_handler.fatalError(exc) - - def close(self): - if self._entity_stack: - # If we are completing an external entity, do nothing here - return - self.feed("", isFinal = 1) - self._cont_handler.endDocument() - self._parsing = 0 - # break cycle created by expat handlers pointing to our methods - self._parser = None - - def _reset_cont_handler(self): - self._parser.ProcessingInstructionHandler = \ - self._cont_handler.processingInstruction - self._parser.CharacterDataHandler = self._cont_handler.characters - - def _reset_lex_handler_prop(self): - lex = self._lex_handler_prop - parser = self._parser - if lex is None: - parser.CommentHandler = None - parser.StartCdataSectionHandler = None - parser.EndCdataSectionHandler = None - parser.StartDoctypeDeclHandler = None - parser.EndDoctypeDeclHandler = None - else: - parser.CommentHandler = lex.comment - parser.StartCdataSectionHandler = lex.startCDATA - parser.EndCdataSectionHandler = lex.endCDATA - parser.StartDoctypeDeclHandler = self.start_doctype_decl - parser.EndDoctypeDeclHandler = lex.endDTD - - def reset(self): - if self._namespaces: - self._parser = expat.ParserCreate(self._source.getEncoding(), " ", - intern=self._interning) - self._parser.namespace_prefixes = 1 - self._parser.StartElementHandler = self.start_element_ns - self._parser.EndElementHandler = self.end_element_ns - else: - self._parser = expat.ParserCreate(self._source.getEncoding(), - intern = self._interning) - self._parser.StartElementHandler = self.start_element - self._parser.EndElementHandler = self.end_element - - self._reset_cont_handler() - self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl - self._parser.NotationDeclHandler = self.notation_decl - self._parser.StartNamespaceDeclHandler = self.start_namespace_decl - self._parser.EndNamespaceDeclHandler = self.end_namespace_decl - - self._decl_handler_prop = None - if self._lex_handler_prop: - self._reset_lex_handler_prop() -# self._parser.DefaultHandler = -# self._parser.DefaultHandlerExpand = -# self._parser.NotStandaloneHandler = - self._parser.ExternalEntityRefHandler = self.external_entity_ref - try: - self._parser.SkippedEntityHandler = self.skipped_entity_handler - except AttributeError: - # This pyexpat does not support SkippedEntity - pass - self._parser.SetParamEntityParsing( - expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) - - self._parsing = 0 - self._entity_stack = [] - - # Locator methods - - def getColumnNumber(self): - if self._parser is None: - return None - return self._parser.ErrorColumnNumber - - def getLineNumber(self): - if self._parser is None: - return 1 - return self._parser.ErrorLineNumber - - def getPublicId(self): - return self._source.getPublicId() - - def getSystemId(self): - return self._source.getSystemId() - - # event handlers - def start_element(self, name, attrs): - self._cont_handler.startElement(name, AttributesImpl(attrs)) - - def end_element(self, name): - self._cont_handler.endElement(name) - - def start_element_ns(self, name, attrs): - pair = name.split() - if len(pair) == 1: - # no namespace - pair = (None, name) - elif len(pair) == 3: - pair = pair[0], pair[1] - else: - # default namespace - pair = tuple(pair) - - newattrs = {} - qnames = {} - for (aname, value) in attrs.items(): - parts = aname.split() - length = len(parts) - if length == 1: - # no namespace - qname = aname - apair = (None, aname) - elif length == 3: - qname = "%s:%s" % (parts[2], parts[1]) - apair = parts[0], parts[1] - else: - # default namespace - qname = parts[1] - apair = tuple(parts) - - newattrs[apair] = value - qnames[apair] = qname - - self._cont_handler.startElementNS(pair, None, - AttributesNSImpl(newattrs, qnames)) - - def end_element_ns(self, name): - pair = name.split() - if len(pair) == 1: - pair = (None, name) - elif len(pair) == 3: - pair = pair[0], pair[1] - else: - pair = tuple(pair) - - self._cont_handler.endElementNS(pair, None) - - # this is not used (call directly to ContentHandler) - def processing_instruction(self, target, data): - self._cont_handler.processingInstruction(target, data) - - # this is not used (call directly to ContentHandler) - def character_data(self, data): - self._cont_handler.characters(data) - - def start_namespace_decl(self, prefix, uri): - self._cont_handler.startPrefixMapping(prefix, uri) - - def end_namespace_decl(self, prefix): - self._cont_handler.endPrefixMapping(prefix) - - def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): - self._lex_handler_prop.startDTD(name, pubid, sysid) - - def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): - self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) - - def notation_decl(self, name, base, sysid, pubid): - self._dtd_handler.notationDecl(name, pubid, sysid) - - def external_entity_ref(self, context, base, sysid, pubid): - if not self._external_ges: - return 1 - - source = self._ent_handler.resolveEntity(pubid, sysid) - source = saxutils.prepare_input_source(source, - self._source.getSystemId() or - "") - - self._entity_stack.append((self._parser, self._source)) - self._parser = self._parser.ExternalEntityParserCreate(context) - self._source = source - - try: - xmlreader.IncrementalParser.parse(self, source) - except: - return 0 # FIXME: save error info here? - - (self._parser, self._source) = self._entity_stack[-1] - del self._entity_stack[-1] - return 1 - - def skipped_entity_handler(self, name, is_pe): - if is_pe: - # The SAX spec requires to report skipped PEs with a '%' - name = '%'+name - self._cont_handler.skippedEntity(name) - -# --- - -def create_parser(*args, **kwargs): - return ExpatParser(*args, **kwargs) - -# --- - -if __name__ == "__main__": - import xml.sax - p = create_parser() - p.setContentHandler(xml.sax.XMLGenerator()) - p.setErrorHandler(xml.sax.ErrorHandler()) - p.parse("../../../hamlet.xml") diff --git a/Lib/xml/sax/handler.py b/Lib/xml/sax/handler.py deleted file mode 100644 index f9e91b6..0000000 --- a/Lib/xml/sax/handler.py +++ /dev/null @@ -1,342 +0,0 @@ -""" -This module contains the core classes of version 2.0 of SAX for Python. -This file provides only default classes with absolutely minimum -functionality, from which drivers and applications can be subclassed. - -Many of these classes are empty and are included only as documentation -of the interfaces. - -$Id$ -""" - -version = '2.0beta' - -#============================================================================ -# -# HANDLER INTERFACES -# -#============================================================================ - -# ===== ERRORHANDLER ===== - -class ErrorHandler: - """Basic interface for SAX error handlers. - - If you create an object that implements this interface, then - register the object with your XMLReader, the parser will call the - methods in your object to report all warnings and errors. There - are three levels of errors available: warnings, (possibly) - recoverable errors, and unrecoverable errors. All methods take a - SAXParseException as the only parameter.""" - - def error(self, exception): - "Handle a recoverable error." - raise exception - - def fatalError(self, exception): - "Handle a non-recoverable error." - raise exception - - def warning(self, exception): - "Handle a warning." - print exception - - -# ===== CONTENTHANDLER ===== - -class ContentHandler: - """Interface for receiving logical document content events. - - This is the main callback interface in SAX, and the one most - important to applications. The order of events in this interface - mirrors the order of the information in the document.""" - - def __init__(self): - self._locator = None - - def setDocumentLocator(self, locator): - """Called by the parser to give the application a locator for - locating the origin of document events. - - SAX parsers are strongly encouraged (though not absolutely - required) to supply a locator: if it does so, it must supply - the locator to the application by invoking this method before - invoking any of the other methods in the DocumentHandler - interface. - - The locator allows the application to determine the end - position of any document-related event, even if the parser is - not reporting an error. Typically, the application will use - this information for reporting its own errors (such as - character content that does not match an application's - business rules). The information returned by the locator is - probably not sufficient for use with a search engine. - - Note that the locator will return correct information only - during the invocation of the events in this interface. The - application should not attempt to use it at any other time.""" - self._locator = locator - - def startDocument(self): - """Receive notification of the beginning of a document. - - The SAX parser will invoke this method only once, before any - other methods in this interface or in DTDHandler (except for - setDocumentLocator).""" - - def endDocument(self): - """Receive notification of the end of a document. - - The SAX parser will invoke this method only once, and it will - be the last method invoked during the parse. The parser shall - not invoke this method until it has either abandoned parsing - (because of an unrecoverable error) or reached the end of - input.""" - - def startPrefixMapping(self, prefix, uri): - """Begin the scope of a prefix-URI Namespace mapping. - - The information from this event is not necessary for normal - Namespace processing: the SAX XML reader will automatically - replace prefixes for element and attribute names when the - http://xml.org/sax/features/namespaces feature is true (the - default). - - There are cases, however, when applications need to use - prefixes in character data or in attribute values, where they - cannot safely be expanded automatically; the - start/endPrefixMapping event supplies the information to the - application to expand prefixes in those contexts itself, if - necessary. - - Note that start/endPrefixMapping events are not guaranteed to - be properly nested relative to each-other: all - startPrefixMapping events will occur before the corresponding - startElement event, and all endPrefixMapping events will occur - after the corresponding endElement event, but their order is - not guaranteed.""" - - def endPrefixMapping(self, prefix): - """End the scope of a prefix-URI mapping. - - See startPrefixMapping for details. This event will always - occur after the corresponding endElement event, but the order - of endPrefixMapping events is not otherwise guaranteed.""" - - def startElement(self, name, attrs): - """Signals the start of an element in non-namespace mode. - - The name parameter contains the raw XML 1.0 name of the - element type as a string and the attrs parameter holds an - instance of the Attributes class containing the attributes of - the element.""" - - def endElement(self, name): - """Signals the end of an element in non-namespace mode. - - The name parameter contains the name of the element type, just - as with the startElement event.""" - - def startElementNS(self, name, qname, attrs): - """Signals the start of an element in namespace mode. - - The name parameter contains the name of the element type as a - (uri, localname) tuple, the qname parameter the raw XML 1.0 - name used in the source document, and the attrs parameter - holds an instance of the Attributes class containing the - attributes of the element. - - The uri part of the name tuple is None for elements which have - no namespace.""" - - def endElementNS(self, name, qname): - """Signals the end of an element in namespace mode. - - The name parameter contains the name of the element type, just - as with the startElementNS event.""" - - def characters(self, content): - """Receive notification of character data. - - The Parser will call this method to report each chunk of - character data. SAX parsers may return all contiguous - character data in a single chunk, or they may split it into - several chunks; however, all of the characters in any single - event must come from the same external entity so that the - Locator provides useful information.""" - - def ignorableWhitespace(self, whitespace): - """Receive notification of ignorable whitespace in element content. - - Validating Parsers must use this method to report each chunk - of ignorable whitespace (see the W3C XML 1.0 recommendation, - section 2.10): non-validating parsers may also use this method - if they are capable of parsing and using content models. - - SAX parsers may return all contiguous whitespace in a single - chunk, or they may split it into several chunks; however, all - of the characters in any single event must come from the same - external entity, so that the Locator provides useful - information.""" - - def processingInstruction(self, target, data): - """Receive notification of a processing instruction. - - The Parser will invoke this method once for each processing - instruction found: note that processing instructions may occur - before or after the main document element. - - A SAX parser should never report an XML declaration (XML 1.0, - section 2.8) or a text declaration (XML 1.0, section 4.3.1) - using this method.""" - - def skippedEntity(self, name): - """Receive notification of a skipped entity. - - The Parser will invoke this method once for each entity - skipped. Non-validating processors may skip entities if they - have not seen the declarations (because, for example, the - entity was declared in an external DTD subset). All processors - may skip external entities, depending on the values of the - http://xml.org/sax/features/external-general-entities and the - http://xml.org/sax/features/external-parameter-entities - properties.""" - - -# ===== DTDHandler ===== - -class DTDHandler: - """Handle DTD events. - - This interface specifies only those DTD events required for basic - parsing (unparsed entities and attributes).""" - - def notationDecl(self, name, publicId, systemId): - "Handle a notation declaration event." - - def unparsedEntityDecl(self, name, publicId, systemId, ndata): - "Handle an unparsed entity declaration event." - - -# ===== ENTITYRESOLVER ===== - -class EntityResolver: - """Basic interface for resolving entities. If you create an object - implementing this interface, then register the object with your - Parser, the parser will call the method in your object to - resolve all external entities. Note that DefaultHandler implements - this interface with the default behaviour.""" - - def resolveEntity(self, publicId, systemId): - """Resolve the system identifier of an entity and return either - the system identifier to read from as a string, or an InputSource - to read from.""" - return systemId - - -#============================================================================ -# -# CORE FEATURES -# -#============================================================================ - -feature_namespaces = "http://xml.org/sax/features/namespaces" -# true: Perform Namespace processing (default). -# false: Optionally do not perform Namespace processing -# (implies namespace-prefixes). -# access: (parsing) read-only; (not parsing) read/write - -feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" -# true: Report the original prefixed names and attributes used for Namespace -# declarations. -# false: Do not report attributes used for Namespace declarations, and -# optionally do not report original prefixed names (default). -# access: (parsing) read-only; (not parsing) read/write - -feature_string_interning = "http://xml.org/sax/features/string-interning" -# true: All element names, prefixes, attribute names, Namespace URIs, and -# local names are interned using the built-in intern function. -# false: Names are not necessarily interned, although they may be (default). -# access: (parsing) read-only; (not parsing) read/write - -feature_validation = "http://xml.org/sax/features/validation" -# true: Report all validation errors (implies external-general-entities and -# external-parameter-entities). -# false: Do not report validation errors. -# access: (parsing) read-only; (not parsing) read/write - -feature_external_ges = "http://xml.org/sax/features/external-general-entities" -# true: Include all external general (text) entities. -# false: Do not include external general entities. -# access: (parsing) read-only; (not parsing) read/write - -feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" -# true: Include all external parameter entities, including the external -# DTD subset. -# false: Do not include any external parameter entities, even the external -# DTD subset. -# access: (parsing) read-only; (not parsing) read/write - -all_features = [feature_namespaces, - feature_namespace_prefixes, - feature_string_interning, - feature_validation, - feature_external_ges, - feature_external_pes] - - -#============================================================================ -# -# CORE PROPERTIES -# -#============================================================================ - -property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" -# data type: xml.sax.sax2lib.LexicalHandler -# description: An optional extension handler for lexical events like comments. -# access: read/write - -property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" -# data type: xml.sax.sax2lib.DeclHandler -# description: An optional extension handler for DTD-related events other -# than notations and unparsed entities. -# access: read/write - -property_dom_node = "http://xml.org/sax/properties/dom-node" -# data type: org.w3c.dom.Node -# description: When parsing, the current DOM node being visited if this is -# a DOM iterator; when not parsing, the root DOM node for -# iteration. -# access: (parsing) read-only; (not parsing) read/write - -property_xml_string = "http://xml.org/sax/properties/xml-string" -# data type: String -# description: The literal string of characters that was the source for -# the current event. -# access: read-only - -property_encoding = "http://www.python.org/sax/properties/encoding" -# data type: String -# description: The name of the encoding to assume for input data. -# access: write: set the encoding, e.g. established by a higher-level -# protocol. May change during parsing (e.g. after -# processing a META tag) -# read: return the current encoding (possibly established through -# auto-detection. -# initial value: UTF-8 -# - -property_interning_dict = "http://www.python.org/sax/properties/interning-dict" -# data type: Dictionary -# description: The dictionary used to intern common strings in the document -# access: write: Request that the parser uses a specific dictionary, to -# allow interning across different documents -# read: return the current interning dictionary, or None -# - -all_properties = [property_lexical_handler, - property_dom_node, - property_declaration_handler, - property_xml_string, - property_encoding, - property_interning_dict] diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py deleted file mode 100644 index 582b008..0000000 --- a/Lib/xml/sax/saxutils.py +++ /dev/null @@ -1,297 +0,0 @@ -"""\ -A library of useful helper classes to the SAX classes, for the -convenience of application and driver writers. -""" - -import os, urlparse, urllib, types -import handler -import xmlreader - -try: - _StringTypes = [types.StringType, types.UnicodeType] -except AttributeError: - _StringTypes = [types.StringType] - -# See whether the xmlcharrefreplace error handler is -# supported -try: - from codecs import xmlcharrefreplace_errors - _error_handling = "xmlcharrefreplace" - del xmlcharrefreplace_errors -except ImportError: - _error_handling = "strict" - -def __dict_replace(s, d): - """Replace substrings of a string using a dictionary.""" - for key, value in d.items(): - s = s.replace(key, value) - return s - -def escape(data, entities={}): - """Escape &, <, and > in a string of data. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - - # must do ampersand first - data = data.replace("&", "&") - data = data.replace(">", ">") - data = data.replace("<", "<") - if entities: - data = __dict_replace(data, entities) - return data - -def unescape(data, entities={}): - """Unescape &, <, and > in a string of data. - - You can unescape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - data = data.replace("<", "<") - data = data.replace(">", ">") - if entities: - data = __dict_replace(data, entities) - # must do ampersand last - return data.replace("&", "&") - -def quoteattr(data, entities={}): - """Escape and quote an attribute value. - - Escape &, <, and > in a string of data, then quote it for use as - an attribute value. The \" character will be escaped as well, if - necessary. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - data = escape(data, entities) - if '"' in data: - if "'" in data: - data = '"%s"' % data.replace('"', """) - else: - data = "'%s'" % data - else: - data = '"%s"' % data - return data - - -class XMLGenerator(handler.ContentHandler): - - def __init__(self, out=None, encoding="iso-8859-1"): - if out is None: - import sys - out = sys.stdout - handler.ContentHandler.__init__(self) - self._out = out - self._ns_contexts = [{}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - self._undeclared_ns_maps = [] - self._encoding = encoding - - def _write(self, text): - if isinstance(text, str): - self._out.write(text) - else: - self._out.write(text.encode(self._encoding, _error_handling)) - - # ContentHandler methods - - def startDocument(self): - self._write('\n' % - self._encoding) - - def startPrefixMapping(self, prefix, uri): - self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix - self._undeclared_ns_maps.append((prefix, uri)) - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts[-1] - del self._ns_contexts[-1] - - def startElement(self, name, attrs): - self._write('<' + name) - for (name, value) in attrs.items(): - self._write(' %s=%s' % (name, quoteattr(value))) - self._write('>') - - def endElement(self, name): - self._write('' % name) - - def startElementNS(self, name, qname, attrs): - if name[0] is None: - # if the name was not namespace-scoped, use the unqualified part - name = name[1] - else: - # else try to restore the original prefix from the namespace - name = self._current_context[name[0]] + ":" + name[1] - self._write('<' + name) - - for pair in self._undeclared_ns_maps: - self._write(' xmlns:%s="%s"' % pair) - self._undeclared_ns_maps = [] - - for (name, value) in attrs.items(): - name = self._current_context[name[0]] + ":" + name[1] - self._write(' %s=%s' % (name, quoteattr(value))) - self._write('>') - - def endElementNS(self, name, qname): - if name[0] is None: - name = name[1] - else: - name = self._current_context[name[0]] + ":" + name[1] - self._write('' % name) - - def characters(self, content): - self._write(escape(content)) - - def ignorableWhitespace(self, content): - self._write(content) - - def processingInstruction(self, target, data): - self._write('' % (target, data)) - - -class XMLFilterBase(xmlreader.XMLReader): - """This class is designed to sit between an XMLReader and the - client application's event handlers. By default, it does nothing - but pass requests up to the reader and events on to the handlers - unmodified, but subclasses can override specific methods to modify - the event stream or the configuration requests as they pass - through.""" - - def __init__(self, parent = None): - xmlreader.XMLReader.__init__(self) - self._parent = parent - - # ErrorHandler methods - - def error(self, exception): - self._err_handler.error(exception) - - def fatalError(self, exception): - self._err_handler.fatalError(exception) - - def warning(self, exception): - self._err_handler.warning(exception) - - # ContentHandler methods - - def setDocumentLocator(self, locator): - self._cont_handler.setDocumentLocator(locator) - - def startDocument(self): - self._cont_handler.startDocument() - - def endDocument(self): - self._cont_handler.endDocument() - - def startPrefixMapping(self, prefix, uri): - self._cont_handler.startPrefixMapping(prefix, uri) - - def endPrefixMapping(self, prefix): - self._cont_handler.endPrefixMapping(prefix) - - def startElement(self, name, attrs): - self._cont_handler.startElement(name, attrs) - - def endElement(self, name): - self._cont_handler.endElement(name) - - def startElementNS(self, name, qname, attrs): - self._cont_handler.startElementNS(name, qname, attrs) - - def endElementNS(self, name, qname): - self._cont_handler.endElementNS(name, qname) - - def characters(self, content): - self._cont_handler.characters(content) - - def ignorableWhitespace(self, chars): - self._cont_handler.ignorableWhitespace(chars) - - def processingInstruction(self, target, data): - self._cont_handler.processingInstruction(target, data) - - def skippedEntity(self, name): - self._cont_handler.skippedEntity(name) - - # DTDHandler methods - - def notationDecl(self, name, publicId, systemId): - self._dtd_handler.notationDecl(name, publicId, systemId) - - def unparsedEntityDecl(self, name, publicId, systemId, ndata): - self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) - - # EntityResolver methods - - def resolveEntity(self, publicId, systemId): - return self._ent_handler.resolveEntity(publicId, systemId) - - # XMLReader methods - - def parse(self, source): - self._parent.setContentHandler(self) - self._parent.setErrorHandler(self) - self._parent.setEntityResolver(self) - self._parent.setDTDHandler(self) - self._parent.parse(source) - - def setLocale(self, locale): - self._parent.setLocale(locale) - - def getFeature(self, name): - return self._parent.getFeature(name) - - def setFeature(self, name, state): - self._parent.setFeature(name, state) - - def getProperty(self, name): - return self._parent.getProperty(name) - - def setProperty(self, name, value): - self._parent.setProperty(name, value) - - # XMLFilter methods - - def getParent(self): - return self._parent - - def setParent(self, parent): - self._parent = parent - -# --- Utility functions - -def prepare_input_source(source, base = ""): - """This function takes an InputSource and an optional base URL and - returns a fully resolved InputSource object ready for reading.""" - - if type(source) in _StringTypes: - source = xmlreader.InputSource(source) - elif hasattr(source, "read"): - f = source - source = xmlreader.InputSource() - source.setByteStream(f) - if hasattr(f, "name"): - source.setSystemId(f.name) - - if source.getByteStream() is None: - sysid = source.getSystemId() - basehead = os.path.dirname(os.path.normpath(base)) - sysidfilename = os.path.join(basehead, sysid) - if os.path.isfile(sysidfilename): - source.setSystemId(sysidfilename) - f = open(sysidfilename, "rb") - else: - source.setSystemId(urlparse.urljoin(base, sysid)) - f = urllib.urlopen(source.getSystemId()) - - source.setByteStream(f) - - return source diff --git a/Lib/xml/sax/xmlreader.py b/Lib/xml/sax/xmlreader.py deleted file mode 100644 index 9a2361e..0000000 --- a/Lib/xml/sax/xmlreader.py +++ /dev/null @@ -1,381 +0,0 @@ -"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers -should be based on this code. """ - -import handler - -from _exceptions import SAXNotSupportedException, SAXNotRecognizedException - - -# ===== XMLREADER ===== - -class XMLReader: - """Interface for reading an XML document using callbacks. - - XMLReader is the interface that an XML parser's SAX2 driver must - implement. This interface allows an application to set and query - features and properties in the parser, to register event handlers - for document processing, and to initiate a document parse. - - All SAX interfaces are assumed to be synchronous: the parse - methods must not return until parsing is complete, and readers - must wait for an event-handler callback to return before reporting - the next event.""" - - def __init__(self): - self._cont_handler = handler.ContentHandler() - self._dtd_handler = handler.DTDHandler() - self._ent_handler = handler.EntityResolver() - self._err_handler = handler.ErrorHandler() - - def parse(self, source): - "Parse an XML document from a system identifier or an InputSource." - raise NotImplementedError("This method must be implemented!") - - def getContentHandler(self): - "Returns the current ContentHandler." - return self._cont_handler - - def setContentHandler(self, handler): - "Registers a new object to receive document content events." - self._cont_handler = handler - - def getDTDHandler(self): - "Returns the current DTD handler." - return self._dtd_handler - - def setDTDHandler(self, handler): - "Register an object to receive basic DTD-related events." - self._dtd_handler = handler - - def getEntityResolver(self): - "Returns the current EntityResolver." - return self._ent_handler - - def setEntityResolver(self, resolver): - "Register an object to resolve external entities." - self._ent_handler = resolver - - def getErrorHandler(self): - "Returns the current ErrorHandler." - return self._err_handler - - def setErrorHandler(self, handler): - "Register an object to receive error-message events." - self._err_handler = handler - - def setLocale(self, locale): - """Allow an application to set the locale for errors and warnings. - - SAX parsers are not required to provide localization for errors - and warnings; if they cannot support the requested locale, - however, they must throw a SAX exception. Applications may - request a locale change in the middle of a parse.""" - raise SAXNotSupportedException("Locale support not implemented") - - def getFeature(self, name): - "Looks up and returns the state of a SAX2 feature." - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def setFeature(self, name, state): - "Sets the state of a SAX2 feature." - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def getProperty(self, name): - "Looks up and returns the value of a SAX2 property." - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - "Sets the value of a SAX2 property." - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - -class IncrementalParser(XMLReader): - """This interface adds three extra methods to the XMLReader - interface that allow XML parsers to support incremental - parsing. Support for this interface is optional, since not all - underlying XML parsers support this functionality. - - When the parser is instantiated it is ready to begin accepting - data from the feed method immediately. After parsing has been - finished with a call to close the reset method must be called to - make the parser ready to accept new data, either from feed or - using the parse method. - - Note that these methods must _not_ be called during parsing, that - is, after parse has been called and before it returns. - - By default, the class also implements the parse method of the XMLReader - interface using the feed, close and reset methods of the - IncrementalParser interface as a convenience to SAX 2.0 driver - writers.""" - - def __init__(self, bufsize=2**16): - self._bufsize = bufsize - XMLReader.__init__(self) - - def parse(self, source): - import saxutils - source = saxutils.prepare_input_source(source) - - self.prepareParser(source) - file = source.getByteStream() - buffer = file.read(self._bufsize) - while buffer != "": - self.feed(buffer) - buffer = file.read(self._bufsize) - self.close() - - def feed(self, data): - """This method gives the raw XML data in the data parameter to - the parser and makes it parse the data, emitting the - corresponding events. It is allowed for XML constructs to be - split across several calls to feed. - - feed may raise SAXException.""" - raise NotImplementedError("This method must be implemented!") - - def prepareParser(self, source): - """This method is called by the parse implementation to allow - the SAX 2.0 driver to prepare itself for parsing.""" - raise NotImplementedError("prepareParser must be overridden!") - - def close(self): - """This method is called when the entire XML document has been - passed to the parser through the feed method, to notify the - parser that there are no more data. This allows the parser to - do the final checks on the document and empty the internal - data buffer. - - The parser will not be ready to parse another document until - the reset method has been called. - - close may raise SAXException.""" - raise NotImplementedError("This method must be implemented!") - - def reset(self): - """This method is called after close has been called to reset - the parser so that it is ready to parse new documents. The - results of calling parse or feed after close without calling - reset are undefined.""" - raise NotImplementedError("This method must be implemented!") - -# ===== LOCATOR ===== - -class Locator: - """Interface for associating a SAX event with a document - location. A locator object will return valid results only during - calls to DocumentHandler methods; at any other time, the - results are unpredictable.""" - - def getColumnNumber(self): - "Return the column number where the current event ends." - return -1 - - def getLineNumber(self): - "Return the line number where the current event ends." - return -1 - - def getPublicId(self): - "Return the public identifier for the current event." - return None - - def getSystemId(self): - "Return the system identifier for the current event." - return None - -# ===== INPUTSOURCE ===== - -class InputSource: - """Encapsulation of the information needed by the XMLReader to - read entities. - - This class may include information about the public identifier, - system identifier, byte stream (possibly with character encoding - information) and/or the character stream of an entity. - - Applications will create objects of this class for use in the - XMLReader.parse method and for returning from - EntityResolver.resolveEntity. - - An InputSource belongs to the application, the XMLReader is not - allowed to modify InputSource objects passed to it from the - application, although it may make copies and modify those.""" - - def __init__(self, system_id = None): - self.__system_id = system_id - self.__public_id = None - self.__encoding = None - self.__bytefile = None - self.__charfile = None - - def setPublicId(self, public_id): - "Sets the public identifier of this InputSource." - self.__public_id = public_id - - def getPublicId(self): - "Returns the public identifier of this InputSource." - return self.__public_id - - def setSystemId(self, system_id): - "Sets the system identifier of this InputSource." - self.__system_id = system_id - - def getSystemId(self): - "Returns the system identifier of this InputSource." - return self.__system_id - - def setEncoding(self, encoding): - """Sets the character encoding of this InputSource. - - The encoding must be a string acceptable for an XML encoding - declaration (see section 4.3.3 of the XML recommendation). - - The encoding attribute of the InputSource is ignored if the - InputSource also contains a character stream.""" - self.__encoding = encoding - - def getEncoding(self): - "Get the character encoding of this InputSource." - return self.__encoding - - def setByteStream(self, bytefile): - """Set the byte stream (a Python file-like object which does - not perform byte-to-character conversion) for this input - source. - - The SAX parser will ignore this if there is also a character - stream specified, but it will use a byte stream in preference - to opening a URI connection itself. - - If the application knows the character encoding of the byte - stream, it should set it with the setEncoding method.""" - self.__bytefile = bytefile - - def getByteStream(self): - """Get the byte stream for this input source. - - The getEncoding method will return the character encoding for - this byte stream, or None if unknown.""" - return self.__bytefile - - def setCharacterStream(self, charfile): - """Set the character stream for this input source. (The stream - must be a Python 2.0 Unicode-wrapped file-like that performs - conversion to Unicode strings.) - - If there is a character stream specified, the SAX parser will - ignore any byte stream and will not attempt to open a URI - connection to the system identifier.""" - self.__charfile = charfile - - def getCharacterStream(self): - "Get the character stream for this input source." - return self.__charfile - -# ===== ATTRIBUTESIMPL ===== - -class AttributesImpl: - - def __init__(self, attrs): - """Non-NS-aware implementation. - - attrs should be of the form {name : value}.""" - self._attrs = attrs - - def getLength(self): - return len(self._attrs) - - def getType(self, name): - return "CDATA" - - def getValue(self, name): - return self._attrs[name] - - def getValueByQName(self, name): - return self._attrs[name] - - def getNameByQName(self, name): - if not self._attrs.has_key(name): - raise KeyError, name - return name - - def getQNameByName(self, name): - if not self._attrs.has_key(name): - raise KeyError, name - return name - - def getNames(self): - return self._attrs.keys() - - def getQNames(self): - return self._attrs.keys() - - def __len__(self): - return len(self._attrs) - - def __getitem__(self, name): - return self._attrs[name] - - def keys(self): - return self._attrs.keys() - - def has_key(self, name): - return self._attrs.has_key(name) - - def __contains__(self, name): - return self._attrs.has_key(name) - - def get(self, name, alternative=None): - return self._attrs.get(name, alternative) - - def copy(self): - return self.__class__(self._attrs) - - def items(self): - return self._attrs.items() - - def values(self): - return self._attrs.values() - -# ===== ATTRIBUTESNSIMPL ===== - -class AttributesNSImpl(AttributesImpl): - - def __init__(self, attrs, qnames): - """NS-aware implementation. - - attrs should be of the form {(ns_uri, lname): value, ...}. - qnames of the form {(ns_uri, lname): qname, ...}.""" - self._attrs = attrs - self._qnames = qnames - - def getValueByQName(self, name): - for (nsname, qname) in self._qnames.items(): - if qname == name: - return self._attrs[nsname] - - raise KeyError, name - - def getNameByQName(self, name): - for (nsname, qname) in self._qnames.items(): - if qname == name: - return nsname - - raise KeyError, name - - def getQNameByName(self, name): - return self._qnames[name] - - def getQNames(self): - return self._qnames.values() - - def copy(self): - return self.__class__(self._attrs, self._qnames) - - -def _test(): - XMLReader() - IncrementalParser() - Locator() - -if __name__ == "__main__": - _test() diff --git a/Lib/xmlcore/dom/NodeFilter.py b/Lib/xmlcore/dom/NodeFilter.py new file mode 100644 index 0000000..fc05245 --- /dev/null +++ b/Lib/xmlcore/dom/NodeFilter.py @@ -0,0 +1,27 @@ +# This is the Python mapping for interface NodeFilter from +# DOM2-Traversal-Range. It contains only constants. + +class NodeFilter: + """ + This is the DOM2 NodeFilter interface. It contains only constants. + """ + FILTER_ACCEPT = 1 + FILTER_REJECT = 2 + FILTER_SKIP = 3 + + SHOW_ALL = 0xFFFFFFFFL + SHOW_ELEMENT = 0x00000001 + SHOW_ATTRIBUTE = 0x00000002 + SHOW_TEXT = 0x00000004 + SHOW_CDATA_SECTION = 0x00000008 + SHOW_ENTITY_REFERENCE = 0x00000010 + SHOW_ENTITY = 0x00000020 + SHOW_PROCESSING_INSTRUCTION = 0x00000040 + SHOW_COMMENT = 0x00000080 + SHOW_DOCUMENT = 0x00000100 + SHOW_DOCUMENT_TYPE = 0x00000200 + SHOW_DOCUMENT_FRAGMENT = 0x00000400 + SHOW_NOTATION = 0x00000800 + + def acceptNode(self, node): + raise NotImplementedError diff --git a/Lib/xmlcore/dom/__init__.py b/Lib/xmlcore/dom/__init__.py new file mode 100644 index 0000000..6363d00 --- /dev/null +++ b/Lib/xmlcore/dom/__init__.py @@ -0,0 +1,139 @@ +"""W3C Document Object Model implementation for Python. + +The Python mapping of the Document Object Model is documented in the +Python Library Reference in the section on the xml.dom package. + +This package contains the following modules: + +minidom -- A simple implementation of the Level 1 DOM with namespace + support added (based on the Level 2 specification) and other + minor Level 2 functionality. + +pulldom -- DOM builder supporting on-demand tree-building for selected + subtrees of the document. + +""" + + +class Node: + """Class giving the NodeType constants.""" + + # DOM implementations may use this as a base class for their own + # Node implementations. If they don't, the constants defined here + # should still be used as the canonical definitions as they match + # the values given in the W3C recommendation. Client code can + # safely refer to these values in all tests of Node.nodeType + # values. + + ELEMENT_NODE = 1 + ATTRIBUTE_NODE = 2 + TEXT_NODE = 3 + CDATA_SECTION_NODE = 4 + ENTITY_REFERENCE_NODE = 5 + ENTITY_NODE = 6 + PROCESSING_INSTRUCTION_NODE = 7 + COMMENT_NODE = 8 + DOCUMENT_NODE = 9 + DOCUMENT_TYPE_NODE = 10 + DOCUMENT_FRAGMENT_NODE = 11 + NOTATION_NODE = 12 + + +#ExceptionCode +INDEX_SIZE_ERR = 1 +DOMSTRING_SIZE_ERR = 2 +HIERARCHY_REQUEST_ERR = 3 +WRONG_DOCUMENT_ERR = 4 +INVALID_CHARACTER_ERR = 5 +NO_DATA_ALLOWED_ERR = 6 +NO_MODIFICATION_ALLOWED_ERR = 7 +NOT_FOUND_ERR = 8 +NOT_SUPPORTED_ERR = 9 +INUSE_ATTRIBUTE_ERR = 10 +INVALID_STATE_ERR = 11 +SYNTAX_ERR = 12 +INVALID_MODIFICATION_ERR = 13 +NAMESPACE_ERR = 14 +INVALID_ACCESS_ERR = 15 +VALIDATION_ERR = 16 + + +class DOMException(Exception): + """Abstract base class for DOM exceptions. + Exceptions with specific codes are specializations of this class.""" + + def __init__(self, *args, **kw): + if self.__class__ is DOMException: + raise RuntimeError( + "DOMException should not be instantiated directly") + Exception.__init__(self, *args, **kw) + + def _get_code(self): + return self.code + + +class IndexSizeErr(DOMException): + code = INDEX_SIZE_ERR + +class DomstringSizeErr(DOMException): + code = DOMSTRING_SIZE_ERR + +class HierarchyRequestErr(DOMException): + code = HIERARCHY_REQUEST_ERR + +class WrongDocumentErr(DOMException): + code = WRONG_DOCUMENT_ERR + +class InvalidCharacterErr(DOMException): + code = INVALID_CHARACTER_ERR + +class NoDataAllowedErr(DOMException): + code = NO_DATA_ALLOWED_ERR + +class NoModificationAllowedErr(DOMException): + code = NO_MODIFICATION_ALLOWED_ERR + +class NotFoundErr(DOMException): + code = NOT_FOUND_ERR + +class NotSupportedErr(DOMException): + code = NOT_SUPPORTED_ERR + +class InuseAttributeErr(DOMException): + code = INUSE_ATTRIBUTE_ERR + +class InvalidStateErr(DOMException): + code = INVALID_STATE_ERR + +class SyntaxErr(DOMException): + code = SYNTAX_ERR + +class InvalidModificationErr(DOMException): + code = INVALID_MODIFICATION_ERR + +class NamespaceErr(DOMException): + code = NAMESPACE_ERR + +class InvalidAccessErr(DOMException): + code = INVALID_ACCESS_ERR + +class ValidationErr(DOMException): + code = VALIDATION_ERR + +class UserDataHandler: + """Class giving the operation constants for UserDataHandler.handle().""" + + # Based on DOM Level 3 (WD 9 April 2002) + + NODE_CLONED = 1 + NODE_IMPORTED = 2 + NODE_DELETED = 3 + NODE_RENAMED = 4 + +XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" +XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" +XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" +EMPTY_NAMESPACE = None +EMPTY_PREFIX = None + +from domreg import getDOMImplementation,registerDOMImplementation diff --git a/Lib/xmlcore/dom/domreg.py b/Lib/xmlcore/dom/domreg.py new file mode 100644 index 0000000..d60ed64 --- /dev/null +++ b/Lib/xmlcore/dom/domreg.py @@ -0,0 +1,99 @@ +"""Registration facilities for DOM. This module should not be used +directly. Instead, the functions getDOMImplementation and +registerDOMImplementation should be imported from xml.dom.""" + +from xmlcore.dom.minicompat import * # isinstance, StringTypes + +# This is a list of well-known implementations. Well-known names +# should be published by posting to xml-sig@python.org, and are +# subsequently recorded in this file. + +well_known_implementations = { + 'minidom':'xml.dom.minidom', + '4DOM': 'xml.dom.DOMImplementation', + } + +# DOM implementations not officially registered should register +# themselves with their + +registered = {} + +def registerDOMImplementation(name, factory): + """registerDOMImplementation(name, factory) + + Register the factory function with the name. The factory function + should return an object which implements the DOMImplementation + interface. The factory function can either return the same object, + or a new one (e.g. if that implementation supports some + customization).""" + + registered[name] = factory + +def _good_enough(dom, features): + "_good_enough(dom, features) -> Return 1 if the dom offers the features" + for f,v in features: + if not dom.hasFeature(f,v): + return 0 + return 1 + +def getDOMImplementation(name = None, features = ()): + """getDOMImplementation(name = None, features = ()) -> DOM implementation. + + Return a suitable DOM implementation. The name is either + well-known, the module name of a DOM implementation, or None. If + it is not None, imports the corresponding module and returns + DOMImplementation object if the import succeeds. + + If name is not given, consider the available implementations to + find one with the required feature set. If no implementation can + be found, raise an ImportError. The features list must be a sequence + of (feature, version) pairs which are passed to hasFeature.""" + + import os + creator = None + mod = well_known_implementations.get(name) + if mod: + mod = __import__(mod, {}, {}, ['getDOMImplementation']) + return mod.getDOMImplementation() + elif name: + return registered[name]() + elif os.environ.has_key("PYTHON_DOM"): + return getDOMImplementation(name = os.environ["PYTHON_DOM"]) + + # User did not specify a name, try implementations in arbitrary + # order, returning the one that has the required features + if isinstance(features, StringTypes): + features = _parse_feature_string(features) + for creator in registered.values(): + dom = creator() + if _good_enough(dom, features): + return dom + + for creator in well_known_implementations.keys(): + try: + dom = getDOMImplementation(name = creator) + except StandardError: # typically ImportError, or AttributeError + continue + if _good_enough(dom, features): + return dom + + raise ImportError,"no suitable DOM implementation found" + +def _parse_feature_string(s): + features = [] + parts = s.split() + i = 0 + length = len(parts) + while i < length: + feature = parts[i] + if feature[0] in "0123456789": + raise ValueError, "bad feature name: %r" % (feature,) + i = i + 1 + version = None + if i < length: + v = parts[i] + if v[0] in "0123456789": + i = i + 1 + version = v + features.append((feature, version)) + return tuple(features) diff --git a/Lib/xmlcore/dom/expatbuilder.py b/Lib/xmlcore/dom/expatbuilder.py new file mode 100644 index 0000000..81d9c2b --- /dev/null +++ b/Lib/xmlcore/dom/expatbuilder.py @@ -0,0 +1,983 @@ +"""Facility to use the Expat parser to load a minidom instance +from a string or file. + +This avoids all the overhead of SAX and pulldom to gain performance. +""" + +# Warning! +# +# This module is tightly bound to the implementation details of the +# minidom DOM and can't be used with other DOM implementations. This +# is due, in part, to a lack of appropriate methods in the DOM (there is +# no way to create Entity and Notation nodes via the DOM Level 2 +# interface), and for performance. The later is the cause of some fairly +# cryptic code. +# +# Performance hacks: +# +# - .character_data_handler() has an extra case in which continuing +# data is appended to an existing Text node; this can be a +# speedup since pyexpat can break up character data into multiple +# callbacks even though we set the buffer_text attribute on the +# parser. This also gives us the advantage that we don't need a +# separate normalization pass. +# +# - Determining that a node exists is done using an identity comparison +# with None rather than a truth test; this avoids searching for and +# calling any methods on the node object if it exists. (A rather +# nice speedup is achieved this way as well!) + +from xmlcore.dom import xmlbuilder, minidom, Node +from xmlcore.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE +from xmlcore.parsers import expat +from xmlcore.dom.minidom import _append_child, _set_attribute_node +from xmlcore.dom.NodeFilter import NodeFilter + +from xmlcore.dom.minicompat import * + +TEXT_NODE = Node.TEXT_NODE +CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE +DOCUMENT_NODE = Node.DOCUMENT_NODE + +FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT +FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT +FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP +FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT + +theDOMImplementation = minidom.getDOMImplementation() + +# Expat typename -> TypeInfo +_typeinfo_map = { + "CDATA": minidom.TypeInfo(None, "cdata"), + "ENUM": minidom.TypeInfo(None, "enumeration"), + "ENTITY": minidom.TypeInfo(None, "entity"), + "ENTITIES": minidom.TypeInfo(None, "entities"), + "ID": minidom.TypeInfo(None, "id"), + "IDREF": minidom.TypeInfo(None, "idref"), + "IDREFS": minidom.TypeInfo(None, "idrefs"), + "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), + "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), + } + +class ElementInfo(NewStyle): + __slots__ = '_attr_info', '_model', 'tagName' + + def __init__(self, tagName, model=None): + self.tagName = tagName + self._attr_info = [] + self._model = model + + def __getstate__(self): + return self._attr_info, self._model, self.tagName + + def __setstate__(self, state): + self._attr_info, self._model, self.tagName = state + + def getAttributeType(self, aname): + for info in self._attr_info: + if info[1] == aname: + t = info[-2] + if t[0] == "(": + return _typeinfo_map["ENUM"] + else: + return _typeinfo_map[info[-2]] + return minidom._no_type + + def getAttributeTypeNS(self, namespaceURI, localName): + return minidom._no_type + + def isElementContent(self): + if self._model: + type = self._model[0] + return type not in (expat.model.XML_CTYPE_ANY, + expat.model.XML_CTYPE_MIXED) + else: + return False + + def isEmpty(self): + if self._model: + return self._model[0] == expat.model.XML_CTYPE_EMPTY + else: + return False + + def isId(self, aname): + for info in self._attr_info: + if info[1] == aname: + return info[-2] == "ID" + return False + + def isIdNS(self, euri, ename, auri, aname): + # not sure this is meaningful + return self.isId((auri, aname)) + +def _intern(builder, s): + return builder._intern_setdefault(s, s) + +def _parse_ns_name(builder, name): + assert ' ' in name + parts = name.split(' ') + intern = builder._intern_setdefault + if len(parts) == 3: + uri, localname, prefix = parts + prefix = intern(prefix, prefix) + qname = "%s:%s" % (prefix, localname) + qname = intern(qname, qname) + localname = intern(localname, localname) + else: + uri, localname = parts + prefix = EMPTY_PREFIX + qname = localname = intern(localname, localname) + return intern(uri, uri), localname, prefix, qname + + +class ExpatBuilder: + """Document builder that uses Expat to build a ParsedXML.DOM document + instance.""" + + def __init__(self, options=None): + if options is None: + options = xmlbuilder.Options() + self._options = options + if self._options.filter is not None: + self._filter = FilterVisibilityController(self._options.filter) + else: + self._filter = None + # This *really* doesn't do anything in this case, so + # override it with something fast & minimal. + self._finish_start_element = id + self._parser = None + self.reset() + + def createParser(self): + """Create a new parser object.""" + return expat.ParserCreate() + + def getParser(self): + """Return the parser object, creating a new one if needed.""" + if not self._parser: + self._parser = self.createParser() + self._intern_setdefault = self._parser.intern.setdefault + self._parser.buffer_text = True + self._parser.ordered_attributes = True + self._parser.specified_attributes = True + self.install(self._parser) + return self._parser + + def reset(self): + """Free all data structures used during DOM construction.""" + self.document = theDOMImplementation.createDocument( + EMPTY_NAMESPACE, None, None) + self.curNode = self.document + self._elem_info = self.document._elem_info + self._cdata = False + + def install(self, parser): + """Install the callbacks needed to build the DOM into the parser.""" + # This creates circular references! + parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler + parser.StartElementHandler = self.first_element_handler + parser.EndElementHandler = self.end_element_handler + parser.ProcessingInstructionHandler = self.pi_handler + if self._options.entities: + parser.EntityDeclHandler = self.entity_decl_handler + parser.NotationDeclHandler = self.notation_decl_handler + if self._options.comments: + parser.CommentHandler = self.comment_handler + if self._options.cdata_sections: + parser.StartCdataSectionHandler = self.start_cdata_section_handler + parser.EndCdataSectionHandler = self.end_cdata_section_handler + parser.CharacterDataHandler = self.character_data_handler_cdata + else: + parser.CharacterDataHandler = self.character_data_handler + parser.ExternalEntityRefHandler = self.external_entity_ref_handler + parser.XmlDeclHandler = self.xml_decl_handler + parser.ElementDeclHandler = self.element_decl_handler + parser.AttlistDeclHandler = self.attlist_decl_handler + + def parseFile(self, file): + """Parse a document from a file object, returning the document + node.""" + parser = self.getParser() + first_buffer = True + try: + while 1: + buffer = file.read(16*1024) + if not buffer: + break + parser.Parse(buffer, 0) + if first_buffer and self.document.documentElement: + self._setup_subset(buffer) + first_buffer = False + parser.Parse("", True) + except ParseEscape: + pass + doc = self.document + self.reset() + self._parser = None + return doc + + def parseString(self, string): + """Parse a document from a string, returning the document node.""" + parser = self.getParser() + try: + parser.Parse(string, True) + self._setup_subset(string) + except ParseEscape: + pass + doc = self.document + self.reset() + self._parser = None + return doc + + def _setup_subset(self, buffer): + """Load the internal subset if there might be one.""" + if self.document.doctype: + extractor = InternalSubsetExtractor() + extractor.parseString(buffer) + subset = extractor.getSubset() + self.document.doctype.internalSubset = subset + + def start_doctype_decl_handler(self, doctypeName, systemId, publicId, + has_internal_subset): + doctype = self.document.implementation.createDocumentType( + doctypeName, publicId, systemId) + doctype.ownerDocument = self.document + self.document.childNodes.append(doctype) + self.document.doctype = doctype + if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: + self.document.doctype = None + del self.document.childNodes[-1] + doctype = None + self._parser.EntityDeclHandler = None + self._parser.NotationDeclHandler = None + if has_internal_subset: + if doctype is not None: + doctype.entities._seq = [] + doctype.notations._seq = [] + self._parser.CommentHandler = None + self._parser.ProcessingInstructionHandler = None + self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler + + def end_doctype_decl_handler(self): + if self._options.comments: + self._parser.CommentHandler = self.comment_handler + self._parser.ProcessingInstructionHandler = self.pi_handler + if not (self._elem_info or self._filter): + self._finish_end_element = id + + def pi_handler(self, target, data): + node = self.document.createProcessingInstruction(target, data) + _append_child(self.curNode, node) + if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: + self.curNode.removeChild(node) + + def character_data_handler_cdata(self, data): + childNodes = self.curNode.childNodes + if self._cdata: + if ( self._cdata_continue + and childNodes[-1].nodeType == CDATA_SECTION_NODE): + childNodes[-1].appendData(data) + return + node = self.document.createCDATASection(data) + self._cdata_continue = True + elif childNodes and childNodes[-1].nodeType == TEXT_NODE: + node = childNodes[-1] + value = node.data + data + d = node.__dict__ + d['data'] = d['nodeValue'] = value + return + else: + node = minidom.Text() + d = node.__dict__ + d['data'] = d['nodeValue'] = data + d['ownerDocument'] = self.document + _append_child(self.curNode, node) + + def character_data_handler(self, data): + childNodes = self.curNode.childNodes + if childNodes and childNodes[-1].nodeType == TEXT_NODE: + node = childNodes[-1] + d = node.__dict__ + d['data'] = d['nodeValue'] = node.data + data + return + node = minidom.Text() + d = node.__dict__ + d['data'] = d['nodeValue'] = node.data + data + d['ownerDocument'] = self.document + _append_child(self.curNode, node) + + def entity_decl_handler(self, entityName, is_parameter_entity, value, + base, systemId, publicId, notationName): + if is_parameter_entity: + # we don't care about parameter entities for the DOM + return + if not self._options.entities: + return + node = self.document._create_entity(entityName, publicId, + systemId, notationName) + if value is not None: + # internal entity + # node *should* be readonly, but we'll cheat + child = self.document.createTextNode(value) + node.childNodes.append(child) + self.document.doctype.entities._seq.append(node) + if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: + del self.document.doctype.entities._seq[-1] + + def notation_decl_handler(self, notationName, base, systemId, publicId): + node = self.document._create_notation(notationName, publicId, systemId) + self.document.doctype.notations._seq.append(node) + if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: + del self.document.doctype.notations._seq[-1] + + def comment_handler(self, data): + node = self.document.createComment(data) + _append_child(self.curNode, node) + if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: + self.curNode.removeChild(node) + + def start_cdata_section_handler(self): + self._cdata = True + self._cdata_continue = False + + def end_cdata_section_handler(self): + self._cdata = False + self._cdata_continue = False + + def external_entity_ref_handler(self, context, base, systemId, publicId): + return 1 + + def first_element_handler(self, name, attributes): + if self._filter is None and not self._elem_info: + self._finish_end_element = id + self.getParser().StartElementHandler = self.start_element_handler + self.start_element_handler(name, attributes) + + def start_element_handler(self, name, attributes): + node = self.document.createElement(name) + _append_child(self.curNode, node) + self.curNode = node + + if attributes: + for i in range(0, len(attributes), 2): + a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, + None, EMPTY_PREFIX) + value = attributes[i+1] + d = a.childNodes[0].__dict__ + d['data'] = d['nodeValue'] = value + d = a.__dict__ + d['value'] = d['nodeValue'] = value + d['ownerDocument'] = self.document + _set_attribute_node(node, a) + + if node is not self.document.documentElement: + self._finish_start_element(node) + + def _finish_start_element(self, node): + if self._filter: + # To be general, we'd have to call isSameNode(), but this + # is sufficient for minidom: + if node is self.document.documentElement: + return + filt = self._filter.startContainer(node) + if filt == FILTER_REJECT: + # ignore this node & all descendents + Rejecter(self) + elif filt == FILTER_SKIP: + # ignore this node, but make it's children become + # children of the parent node + Skipper(self) + else: + return + self.curNode = node.parentNode + node.parentNode.removeChild(node) + node.unlink() + + # If this ever changes, Namespaces.end_element_handler() needs to + # be changed to match. + # + def end_element_handler(self, name): + curNode = self.curNode + self.curNode = curNode.parentNode + self._finish_end_element(curNode) + + def _finish_end_element(self, curNode): + info = self._elem_info.get(curNode.tagName) + if info: + self._handle_white_text_nodes(curNode, info) + if self._filter: + if curNode is self.document.documentElement: + return + if self._filter.acceptNode(curNode) == FILTER_REJECT: + self.curNode.removeChild(curNode) + curNode.unlink() + + def _handle_white_text_nodes(self, node, info): + if (self._options.whitespace_in_element_content + or not info.isElementContent()): + return + + # We have element type information and should remove ignorable + # whitespace; identify for text nodes which contain only + # whitespace. + L = [] + for child in node.childNodes: + if child.nodeType == TEXT_NODE and not child.data.strip(): + L.append(child) + + # Remove ignorable whitespace from the tree. + for child in L: + node.removeChild(child) + + def element_decl_handler(self, name, model): + info = self._elem_info.get(name) + if info is None: + self._elem_info[name] = ElementInfo(name, model) + else: + assert info._model is None + info._model = model + + def attlist_decl_handler(self, elem, name, type, default, required): + info = self._elem_info.get(elem) + if info is None: + info = ElementInfo(elem) + self._elem_info[elem] = info + info._attr_info.append( + [None, name, None, None, default, 0, type, required]) + + def xml_decl_handler(self, version, encoding, standalone): + self.document.version = version + self.document.encoding = encoding + # This is still a little ugly, thanks to the pyexpat API. ;-( + if standalone >= 0: + if standalone: + self.document.standalone = True + else: + self.document.standalone = False + + +# Don't include FILTER_INTERRUPT, since that's checked separately +# where allowed. +_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) + +class FilterVisibilityController(NewStyle): + """Wrapper around a DOMBuilderFilter which implements the checks + to make the whatToShow filter attribute work.""" + + __slots__ = 'filter', + + def __init__(self, filter): + self.filter = filter + + def startContainer(self, node): + mask = self._nodetype_mask[node.nodeType] + if self.filter.whatToShow & mask: + val = self.filter.startContainer(node) + if val == FILTER_INTERRUPT: + raise ParseEscape + if val not in _ALLOWED_FILTER_RETURNS: + raise ValueError, \ + "startContainer() returned illegal value: " + repr(val) + return val + else: + return FILTER_ACCEPT + + def acceptNode(self, node): + mask = self._nodetype_mask[node.nodeType] + if self.filter.whatToShow & mask: + val = self.filter.acceptNode(node) + if val == FILTER_INTERRUPT: + raise ParseEscape + if val == FILTER_SKIP: + # move all child nodes to the parent, and remove this node + parent = node.parentNode + for child in node.childNodes[:]: + parent.appendChild(child) + # node is handled by the caller + return FILTER_REJECT + if val not in _ALLOWED_FILTER_RETURNS: + raise ValueError, \ + "acceptNode() returned illegal value: " + repr(val) + return val + else: + return FILTER_ACCEPT + + _nodetype_mask = { + Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, + Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, + Node.TEXT_NODE: NodeFilter.SHOW_TEXT, + Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, + Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, + Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, + Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, + Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, + Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, + Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, + Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, + Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, + } + + +class FilterCrutch(NewStyle): + __slots__ = '_builder', '_level', '_old_start', '_old_end' + + def __init__(self, builder): + self._level = 0 + self._builder = builder + parser = builder._parser + self._old_start = parser.StartElementHandler + self._old_end = parser.EndElementHandler + parser.StartElementHandler = self.start_element_handler + parser.EndElementHandler = self.end_element_handler + +class Rejecter(FilterCrutch): + __slots__ = () + + def __init__(self, builder): + FilterCrutch.__init__(self, builder) + parser = builder._parser + for name in ("ProcessingInstructionHandler", + "CommentHandler", + "CharacterDataHandler", + "StartCdataSectionHandler", + "EndCdataSectionHandler", + "ExternalEntityRefHandler", + ): + setattr(parser, name, None) + + def start_element_handler(self, *args): + self._level = self._level + 1 + + def end_element_handler(self, *args): + if self._level == 0: + # restore the old handlers + parser = self._builder._parser + self._builder.install(parser) + parser.StartElementHandler = self._old_start + parser.EndElementHandler = self._old_end + else: + self._level = self._level - 1 + +class Skipper(FilterCrutch): + __slots__ = () + + def start_element_handler(self, *args): + node = self._builder.curNode + self._old_start(*args) + if self._builder.curNode is not node: + self._level = self._level + 1 + + def end_element_handler(self, *args): + if self._level == 0: + # We're popping back out of the node we're skipping, so we + # shouldn't need to do anything but reset the handlers. + self._builder._parser.StartElementHandler = self._old_start + self._builder._parser.EndElementHandler = self._old_end + self._builder = None + else: + self._level = self._level - 1 + self._old_end(*args) + + +# framework document used by the fragment builder. +# Takes a string for the doctype, subset string, and namespace attrs string. + +_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ + "http://xml.python.org/entities/fragment-builder/internal" + +_FRAGMENT_BUILDER_TEMPLATE = ( + '''\ + +%%s +]> +&fragment-builder-internal;''' + % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) + + +class FragmentBuilder(ExpatBuilder): + """Builder which constructs document fragments given XML source + text and a context node. + + The context node is expected to provide information about the + namespace declarations which are in scope at the start of the + fragment. + """ + + def __init__(self, context, options=None): + if context.nodeType == DOCUMENT_NODE: + self.originalDocument = context + self.context = context + else: + self.originalDocument = context.ownerDocument + self.context = context + ExpatBuilder.__init__(self, options) + + def reset(self): + ExpatBuilder.reset(self) + self.fragment = None + + def parseFile(self, file): + """Parse a document fragment from a file object, returning the + fragment node.""" + return self.parseString(file.read()) + + def parseString(self, string): + """Parse a document fragment from a string, returning the + fragment node.""" + self._source = string + parser = self.getParser() + doctype = self.originalDocument.doctype + ident = "" + if doctype: + subset = doctype.internalSubset or self._getDeclarations() + if doctype.publicId: + ident = ('PUBLIC "%s" "%s"' + % (doctype.publicId, doctype.systemId)) + elif doctype.systemId: + ident = 'SYSTEM "%s"' % doctype.systemId + else: + subset = "" + nsattrs = self._getNSattrs() # get ns decls from node's ancestors + document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) + try: + parser.Parse(document, 1) + except: + self.reset() + raise + fragment = self.fragment + self.reset() +## self._parser = None + return fragment + + def _getDeclarations(self): + """Re-create the internal subset from the DocumentType node. + + This is only needed if we don't already have the + internalSubset as a string. + """ + doctype = self.context.ownerDocument.doctype + s = "" + if doctype: + for i in range(doctype.notations.length): + notation = doctype.notations.item(i) + if s: + s = s + "\n " + s = "%s' \ + % (s, notation.publicId, notation.systemId) + else: + s = '%s SYSTEM "%s">' % (s, notation.systemId) + for i in range(doctype.entities.length): + entity = doctype.entities.item(i) + if s: + s = s + "\n " + s = "%s" + return s + + def _getNSattrs(self): + return "" + + def external_entity_ref_handler(self, context, base, systemId, publicId): + if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: + # this entref is the one that we made to put the subtree + # in; all of our given input is parsed in here. + old_document = self.document + old_cur_node = self.curNode + parser = self._parser.ExternalEntityParserCreate(context) + # put the real document back, parse into the fragment to return + self.document = self.originalDocument + self.fragment = self.document.createDocumentFragment() + self.curNode = self.fragment + try: + parser.Parse(self._source, 1) + finally: + self.curNode = old_cur_node + self.document = old_document + self._source = None + return -1 + else: + return ExpatBuilder.external_entity_ref_handler( + self, context, base, systemId, publicId) + + +class Namespaces: + """Mix-in class for builders; adds support for namespaces.""" + + def _initNamespaces(self): + # list of (prefix, uri) ns declarations. Namespace attrs are + # constructed from this and added to the element's attrs. + self._ns_ordered_prefixes = [] + + def createParser(self): + """Create a new namespace-handling parser.""" + parser = expat.ParserCreate(namespace_separator=" ") + parser.namespace_prefixes = True + return parser + + def install(self, parser): + """Insert the namespace-handlers onto the parser.""" + ExpatBuilder.install(self, parser) + if self._options.namespace_declarations: + parser.StartNamespaceDeclHandler = ( + self.start_namespace_decl_handler) + + def start_namespace_decl_handler(self, prefix, uri): + """Push this namespace declaration on our storage.""" + self._ns_ordered_prefixes.append((prefix, uri)) + + def start_element_handler(self, name, attributes): + if ' ' in name: + uri, localname, prefix, qname = _parse_ns_name(self, name) + else: + uri = EMPTY_NAMESPACE + qname = name + localname = None + prefix = EMPTY_PREFIX + node = minidom.Element(qname, uri, prefix, localname) + node.ownerDocument = self.document + _append_child(self.curNode, node) + self.curNode = node + + if self._ns_ordered_prefixes: + for prefix, uri in self._ns_ordered_prefixes: + if prefix: + a = minidom.Attr(_intern(self, 'xmlns:' + prefix), + XMLNS_NAMESPACE, prefix, "xmlns") + else: + a = minidom.Attr("xmlns", XMLNS_NAMESPACE, + "xmlns", EMPTY_PREFIX) + d = a.childNodes[0].__dict__ + d['data'] = d['nodeValue'] = uri + d = a.__dict__ + d['value'] = d['nodeValue'] = uri + d['ownerDocument'] = self.document + _set_attribute_node(node, a) + del self._ns_ordered_prefixes[:] + + if attributes: + _attrs = node._attrs + _attrsNS = node._attrsNS + for i in range(0, len(attributes), 2): + aname = attributes[i] + value = attributes[i+1] + if ' ' in aname: + uri, localname, prefix, qname = _parse_ns_name(self, aname) + a = minidom.Attr(qname, uri, localname, prefix) + _attrs[qname] = a + _attrsNS[(uri, localname)] = a + else: + a = minidom.Attr(aname, EMPTY_NAMESPACE, + aname, EMPTY_PREFIX) + _attrs[aname] = a + _attrsNS[(EMPTY_NAMESPACE, aname)] = a + d = a.childNodes[0].__dict__ + d['data'] = d['nodeValue'] = value + d = a.__dict__ + d['ownerDocument'] = self.document + d['value'] = d['nodeValue'] = value + d['ownerElement'] = node + + if __debug__: + # This only adds some asserts to the original + # end_element_handler(), so we only define this when -O is not + # used. If changing one, be sure to check the other to see if + # it needs to be changed as well. + # + def end_element_handler(self, name): + curNode = self.curNode + if ' ' in name: + uri, localname, prefix, qname = _parse_ns_name(self, name) + assert (curNode.namespaceURI == uri + and curNode.localName == localname + and curNode.prefix == prefix), \ + "element stack messed up! (namespace)" + else: + assert curNode.nodeName == name, \ + "element stack messed up - bad nodeName" + assert curNode.namespaceURI == EMPTY_NAMESPACE, \ + "element stack messed up - bad namespaceURI" + self.curNode = curNode.parentNode + self._finish_end_element(curNode) + + +class ExpatBuilderNS(Namespaces, ExpatBuilder): + """Document builder that supports namespaces.""" + + def reset(self): + ExpatBuilder.reset(self) + self._initNamespaces() + + +class FragmentBuilderNS(Namespaces, FragmentBuilder): + """Fragment builder that supports namespaces.""" + + def reset(self): + FragmentBuilder.reset(self) + self._initNamespaces() + + def _getNSattrs(self): + """Return string of namespace attributes from this element and + ancestors.""" + # XXX This needs to be re-written to walk the ancestors of the + # context to build up the namespace information from + # declarations, elements, and attributes found in context. + # Otherwise we have to store a bunch more data on the DOM + # (though that *might* be more reliable -- not clear). + attrs = "" + context = self.context + L = [] + while context: + if hasattr(context, '_ns_prefix_uri'): + for prefix, uri in context._ns_prefix_uri.items(): + # add every new NS decl from context to L and attrs string + if prefix in L: + continue + L.append(prefix) + if prefix: + declname = "xmlns:" + prefix + else: + declname = "xmlns" + if attrs: + attrs = "%s\n %s='%s'" % (attrs, declname, uri) + else: + attrs = " %s='%s'" % (declname, uri) + context = context.parentNode + return attrs + + +class ParseEscape(Exception): + """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" + pass + +class InternalSubsetExtractor(ExpatBuilder): + """XML processor which can rip out the internal document type subset.""" + + subset = None + + def getSubset(self): + """Return the internal subset as a string.""" + return self.subset + + def parseFile(self, file): + try: + ExpatBuilder.parseFile(self, file) + except ParseEscape: + pass + + def parseString(self, string): + try: + ExpatBuilder.parseString(self, string) + except ParseEscape: + pass + + def install(self, parser): + parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler + parser.StartElementHandler = self.start_element_handler + + def start_doctype_decl_handler(self, name, publicId, systemId, + has_internal_subset): + if has_internal_subset: + parser = self.getParser() + self.subset = [] + parser.DefaultHandler = self.subset.append + parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler + else: + raise ParseEscape() + + def end_doctype_decl_handler(self): + s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') + self.subset = s + raise ParseEscape() + + def start_element_handler(self, name, attrs): + raise ParseEscape() + + +def parse(file, namespaces=1): + """Parse a document, returning the resulting Document node. + + 'file' may be either a file name or an open file object. + """ + if namespaces: + builder = ExpatBuilderNS() + else: + builder = ExpatBuilder() + + if isinstance(file, StringTypes): + fp = open(file, 'rb') + try: + result = builder.parseFile(fp) + finally: + fp.close() + else: + result = builder.parseFile(file) + return result + + +def parseString(string, namespaces=1): + """Parse a document from a string, returning the resulting + Document node. + """ + if namespaces: + builder = ExpatBuilderNS() + else: + builder = ExpatBuilder() + return builder.parseString(string) + + +def parseFragment(file, context, namespaces=1): + """Parse a fragment of a document, given the context from which it + was originally extracted. context should be the parent of the + node(s) which are in the fragment. + + 'file' may be either a file name or an open file object. + """ + if namespaces: + builder = FragmentBuilderNS(context) + else: + builder = FragmentBuilder(context) + + if isinstance(file, StringTypes): + fp = open(file, 'rb') + try: + result = builder.parseFile(fp) + finally: + fp.close() + else: + result = builder.parseFile(file) + return result + + +def parseFragmentString(string, context, namespaces=1): + """Parse a fragment of a document from a string, given the context + from which it was originally extracted. context should be the + parent of the node(s) which are in the fragment. + """ + if namespaces: + builder = FragmentBuilderNS(context) + else: + builder = FragmentBuilder(context) + return builder.parseString(string) + + +def makeBuilder(options): + """Create a builder based on an Options object.""" + if options.namespaces: + return ExpatBuilderNS(options) + else: + return ExpatBuilder(options) diff --git a/Lib/xmlcore/dom/minicompat.py b/Lib/xmlcore/dom/minicompat.py new file mode 100644 index 0000000..364ca45 --- /dev/null +++ b/Lib/xmlcore/dom/minicompat.py @@ -0,0 +1,184 @@ +"""Python version compatibility support for minidom.""" + +# This module should only be imported using "import *". +# +# The following names are defined: +# +# isinstance -- version of the isinstance() function that accepts +# tuples as the second parameter regardless of the +# Python version +# +# NodeList -- lightest possible NodeList implementation +# +# EmptyNodeList -- lightest possible NodeList that is guarateed to +# remain empty (immutable) +# +# StringTypes -- tuple of defined string types +# +# GetattrMagic -- base class used to make _get_ be magically +# invoked when available +# defproperty -- function used in conjunction with GetattrMagic; +# using these together is needed to make them work +# as efficiently as possible in both Python 2.2+ +# and older versions. For example: +# +# class MyClass(GetattrMagic): +# def _get_myattr(self): +# return something +# +# defproperty(MyClass, "myattr", +# "return some value") +# +# For Python 2.2 and newer, this will construct a +# property object on the class, which avoids +# needing to override __getattr__(). It will only +# work for read-only attributes. +# +# For older versions of Python, inheriting from +# GetattrMagic will use the traditional +# __getattr__() hackery to achieve the same effect, +# but less efficiently. +# +# defproperty() should be used for each version of +# the relevant _get_() function. +# +# NewStyle -- base class to cause __slots__ to be honored in +# the new world +# +# True, False -- only for Python 2.2 and earlier + +__all__ = ["NodeList", "EmptyNodeList", "NewStyle", + "StringTypes", "defproperty", "GetattrMagic"] + +import xmlcore.dom + +try: + unicode +except NameError: + StringTypes = type(''), +else: + StringTypes = type(''), type(unicode('')) + + +# define True and False only if not defined as built-ins +try: + True +except NameError: + True = 1 + False = 0 + __all__.extend(["True", "False"]) + + +try: + isinstance('', StringTypes) +except TypeError: + # + # Wrap isinstance() to make it compatible with the version in + # Python 2.2 and newer. + # + _isinstance = isinstance + def isinstance(obj, type_or_seq): + try: + return _isinstance(obj, type_or_seq) + except TypeError: + for t in type_or_seq: + if _isinstance(obj, t): + return 1 + return 0 + __all__.append("isinstance") + + +if list is type([]): + class NodeList(list): + __slots__ = () + + def item(self, index): + if 0 <= index < len(self): + return self[index] + + def _get_length(self): + return len(self) + + def _set_length(self, value): + raise xmlcore.dom.NoModificationAllowedErr( + "attempt to modify read-only attribute 'length'") + + length = property(_get_length, _set_length, + doc="The number of nodes in the NodeList.") + + def __getstate__(self): + return list(self) + + def __setstate__(self, state): + self[:] = state + + class EmptyNodeList(tuple): + __slots__ = () + + def __add__(self, other): + NL = NodeList() + NL.extend(other) + return NL + + def __radd__(self, other): + NL = NodeList() + NL.extend(other) + return NL + + def item(self, index): + return None + + def _get_length(self): + return 0 + + def _set_length(self, value): + raise xmlcore.dom.NoModificationAllowedErr( + "attempt to modify read-only attribute 'length'") + + length = property(_get_length, _set_length, + doc="The number of nodes in the NodeList.") + +else: + def NodeList(): + return [] + + def EmptyNodeList(): + return [] + + +try: + property +except NameError: + def defproperty(klass, name, doc): + # taken care of by the base __getattr__() + pass + + class GetattrMagic: + def __getattr__(self, key): + if key.startswith("_"): + raise AttributeError, key + + try: + get = getattr(self, "_get_" + key) + except AttributeError: + raise AttributeError, key + return get() + + class NewStyle: + pass + +else: + def defproperty(klass, name, doc): + get = getattr(klass, ("_get_" + name)).im_func + def set(self, value, name=name): + raise xmlcore.dom.NoModificationAllowedErr( + "attempt to modify read-only attribute " + repr(name)) + assert not hasattr(klass, "_set_" + name), \ + "expected not to find _set_" + name + prop = property(get, set, doc=doc) + setattr(klass, name, prop) + + class GetattrMagic: + pass + + NewStyle = object diff --git a/Lib/xmlcore/dom/minidom.py b/Lib/xmlcore/dom/minidom.py new file mode 100644 index 0000000..54620e1 --- /dev/null +++ b/Lib/xmlcore/dom/minidom.py @@ -0,0 +1,1938 @@ +"""\ +minidom.py -- a lightweight DOM implementation. + +parse("foo.xml") + +parseString("") + +Todo: +===== + * convenience methods for getting elements and text. + * more testing + * bring some of the writer and linearizer code into conformance with this + interface + * SAX 2 namespaces +""" + +import xmlcore.dom + +from xmlcore.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg +from xmlcore.dom.minicompat import * +from xmlcore.dom.xmlbuilder import DOMImplementationLS, DocumentLS + +_TupleType = type(()) + +# This is used by the ID-cache invalidation checks; the list isn't +# actually complete, since the nodes being checked will never be the +# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is +# the node being added or removed, not the node being modified.) +# +_nodeTypes_with_children = (xmlcore.dom.Node.ELEMENT_NODE, + xmlcore.dom.Node.ENTITY_REFERENCE_NODE) + + +class Node(xmlcore.dom.Node, GetattrMagic): + namespaceURI = None # this is non-null only for elements and attributes + parentNode = None + ownerDocument = None + nextSibling = None + previousSibling = None + + prefix = EMPTY_PREFIX # non-null only for NS elements and attributes + + def __nonzero__(self): + return True + + def toxml(self, encoding = None): + return self.toprettyxml("", "", encoding) + + def toprettyxml(self, indent="\t", newl="\n", encoding = None): + # indent = the indentation string to prepend, per level + # newl = the newline string to append + writer = _get_StringIO() + if encoding is not None: + import codecs + # Can't use codecs.getwriter to preserve 2.0 compatibility + writer = codecs.lookup(encoding)[3](writer) + if self.nodeType == Node.DOCUMENT_NODE: + # Can pass encoding only to document, to put it into XML header + self.writexml(writer, "", indent, newl, encoding) + else: + self.writexml(writer, "", indent, newl) + return writer.getvalue() + + def hasChildNodes(self): + if self.childNodes: + return True + else: + return False + + def _get_childNodes(self): + return self.childNodes + + def _get_firstChild(self): + if self.childNodes: + return self.childNodes[0] + + def _get_lastChild(self): + if self.childNodes: + return self.childNodes[-1] + + def insertBefore(self, newChild, refChild): + if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: + for c in tuple(newChild.childNodes): + self.insertBefore(c, refChild) + ### The DOM does not clearly specify what to return in this case + return newChild + if newChild.nodeType not in self._child_node_types: + raise xmlcore.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(newChild), repr(self))) + if newChild.parentNode is not None: + newChild.parentNode.removeChild(newChild) + if refChild is None: + self.appendChild(newChild) + else: + try: + index = self.childNodes.index(refChild) + except ValueError: + raise xmlcore.dom.NotFoundErr() + if newChild.nodeType in _nodeTypes_with_children: + _clear_id_cache(self) + self.childNodes.insert(index, newChild) + newChild.nextSibling = refChild + refChild.previousSibling = newChild + if index: + node = self.childNodes[index-1] + node.nextSibling = newChild + newChild.previousSibling = node + else: + newChild.previousSibling = None + newChild.parentNode = self + return newChild + + def appendChild(self, node): + if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: + for c in tuple(node.childNodes): + self.appendChild(c) + ### The DOM does not clearly specify what to return in this case + return node + if node.nodeType not in self._child_node_types: + raise xmlcore.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(node), repr(self))) + elif node.nodeType in _nodeTypes_with_children: + _clear_id_cache(self) + if node.parentNode is not None: + node.parentNode.removeChild(node) + _append_child(self, node) + node.nextSibling = None + return node + + def replaceChild(self, newChild, oldChild): + if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: + refChild = oldChild.nextSibling + self.removeChild(oldChild) + return self.insertBefore(newChild, refChild) + if newChild.nodeType not in self._child_node_types: + raise xmlcore.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(newChild), repr(self))) + if newChild is oldChild: + return + if newChild.parentNode is not None: + newChild.parentNode.removeChild(newChild) + try: + index = self.childNodes.index(oldChild) + except ValueError: + raise xmlcore.dom.NotFoundErr() + self.childNodes[index] = newChild + newChild.parentNode = self + oldChild.parentNode = None + if (newChild.nodeType in _nodeTypes_with_children + or oldChild.nodeType in _nodeTypes_with_children): + _clear_id_cache(self) + newChild.nextSibling = oldChild.nextSibling + newChild.previousSibling = oldChild.previousSibling + oldChild.nextSibling = None + oldChild.previousSibling = None + if newChild.previousSibling: + newChild.previousSibling.nextSibling = newChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + return oldChild + + def removeChild(self, oldChild): + try: + self.childNodes.remove(oldChild) + except ValueError: + raise xmlcore.dom.NotFoundErr() + if oldChild.nextSibling is not None: + oldChild.nextSibling.previousSibling = oldChild.previousSibling + if oldChild.previousSibling is not None: + oldChild.previousSibling.nextSibling = oldChild.nextSibling + oldChild.nextSibling = oldChild.previousSibling = None + if oldChild.nodeType in _nodeTypes_with_children: + _clear_id_cache(self) + + oldChild.parentNode = None + return oldChild + + def normalize(self): + L = [] + for child in self.childNodes: + if child.nodeType == Node.TEXT_NODE: + data = child.data + if data and L and L[-1].nodeType == child.nodeType: + # collapse text node + node = L[-1] + node.data = node.data + child.data + node.nextSibling = child.nextSibling + child.unlink() + elif data: + if L: + L[-1].nextSibling = child + child.previousSibling = L[-1] + else: + child.previousSibling = None + L.append(child) + else: + # empty text node; discard + child.unlink() + else: + if L: + L[-1].nextSibling = child + child.previousSibling = L[-1] + else: + child.previousSibling = None + L.append(child) + if child.nodeType == Node.ELEMENT_NODE: + child.normalize() + self.childNodes[:] = L + + def cloneNode(self, deep): + return _clone_node(self, deep, self.ownerDocument or self) + + def isSupported(self, feature, version): + return self.ownerDocument.implementation.hasFeature(feature, version) + + def _get_localName(self): + # Overridden in Element and Attr where localName can be Non-Null + return None + + # Node interfaces from Level 3 (WD 9 April 2002) + + def isSameNode(self, other): + return self is other + + def getInterface(self, feature): + if self.isSupported(feature, None): + return self + else: + return None + + # The "user data" functions use a dictionary that is only present + # if some user data has been set, so be careful not to assume it + # exists. + + def getUserData(self, key): + try: + return self._user_data[key][0] + except (AttributeError, KeyError): + return None + + def setUserData(self, key, data, handler): + old = None + try: + d = self._user_data + except AttributeError: + d = {} + self._user_data = d + if d.has_key(key): + old = d[key][0] + if data is None: + # ignore handlers passed for None + handler = None + if old is not None: + del d[key] + else: + d[key] = (data, handler) + return old + + def _call_user_data_handler(self, operation, src, dst): + if hasattr(self, "_user_data"): + for key, (data, handler) in self._user_data.items(): + if handler is not None: + handler.handle(operation, key, data, src, dst) + + # minidom-specific API: + + def unlink(self): + self.parentNode = self.ownerDocument = None + if self.childNodes: + for child in self.childNodes: + child.unlink() + self.childNodes = NodeList() + self.previousSibling = None + self.nextSibling = None + +defproperty(Node, "firstChild", doc="First child node, or None.") +defproperty(Node, "lastChild", doc="Last child node, or None.") +defproperty(Node, "localName", doc="Namespace-local name of this node.") + + +def _append_child(self, node): + # fast path with less checks; usable by DOM builders if careful + childNodes = self.childNodes + if childNodes: + last = childNodes[-1] + node.__dict__["previousSibling"] = last + last.__dict__["nextSibling"] = node + childNodes.append(node) + node.__dict__["parentNode"] = self + +def _in_document(node): + # return True iff node is part of a document tree + while node is not None: + if node.nodeType == Node.DOCUMENT_NODE: + return True + node = node.parentNode + return False + +def _write_data(writer, data): + "Writes datachars to writer." + data = data.replace("&", "&").replace("<", "<") + data = data.replace("\"", """).replace(">", ">") + writer.write(data) + +def _get_elements_by_tagName_helper(parent, name, rc): + for node in parent.childNodes: + if node.nodeType == Node.ELEMENT_NODE and \ + (name == "*" or node.tagName == name): + rc.append(node) + _get_elements_by_tagName_helper(node, name, rc) + return rc + +def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): + for node in parent.childNodes: + if node.nodeType == Node.ELEMENT_NODE: + if ((localName == "*" or node.localName == localName) and + (nsURI == "*" or node.namespaceURI == nsURI)): + rc.append(node) + _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) + return rc + +class DocumentFragment(Node): + nodeType = Node.DOCUMENT_FRAGMENT_NODE + nodeName = "#document-fragment" + nodeValue = None + attributes = None + parentNode = None + _child_node_types = (Node.ELEMENT_NODE, + Node.TEXT_NODE, + Node.CDATA_SECTION_NODE, + Node.ENTITY_REFERENCE_NODE, + Node.PROCESSING_INSTRUCTION_NODE, + Node.COMMENT_NODE, + Node.NOTATION_NODE) + + def __init__(self): + self.childNodes = NodeList() + + +class Attr(Node): + nodeType = Node.ATTRIBUTE_NODE + attributes = None + ownerElement = None + specified = False + _is_id = False + + _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) + + def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, + prefix=None): + # skip setattr for performance + d = self.__dict__ + d["nodeName"] = d["name"] = qName + d["namespaceURI"] = namespaceURI + d["prefix"] = prefix + d['childNodes'] = NodeList() + + # Add the single child node that represents the value of the attr + self.childNodes.append(Text()) + + # nodeValue and value are set elsewhere + + def _get_localName(self): + return self.nodeName.split(":", 1)[-1] + + def _get_name(self): + return self.name + + def _get_specified(self): + return self.specified + + def __setattr__(self, name, value): + d = self.__dict__ + if name in ("value", "nodeValue"): + d["value"] = d["nodeValue"] = value + d2 = self.childNodes[0].__dict__ + d2["data"] = d2["nodeValue"] = value + if self.ownerElement is not None: + _clear_id_cache(self.ownerElement) + elif name in ("name", "nodeName"): + d["name"] = d["nodeName"] = value + if self.ownerElement is not None: + _clear_id_cache(self.ownerElement) + else: + d[name] = value + + def _set_prefix(self, prefix): + nsuri = self.namespaceURI + if prefix == "xmlns": + if nsuri and nsuri != XMLNS_NAMESPACE: + raise xmlcore.dom.NamespaceErr( + "illegal use of 'xmlns' prefix for the wrong namespace") + d = self.__dict__ + d['prefix'] = prefix + if prefix is None: + newName = self.localName + else: + newName = "%s:%s" % (prefix, self.localName) + if self.ownerElement: + _clear_id_cache(self.ownerElement) + d['nodeName'] = d['name'] = newName + + def _set_value(self, value): + d = self.__dict__ + d['value'] = d['nodeValue'] = value + if self.ownerElement: + _clear_id_cache(self.ownerElement) + self.childNodes[0].data = value + + def unlink(self): + # This implementation does not call the base implementation + # since most of that is not needed, and the expense of the + # method call is not warranted. We duplicate the removal of + # children, but that's all we needed from the base class. + elem = self.ownerElement + if elem is not None: + del elem._attrs[self.nodeName] + del elem._attrsNS[(self.namespaceURI, self.localName)] + if self._is_id: + self._is_id = False + elem._magic_id_nodes -= 1 + self.ownerDocument._magic_id_count -= 1 + for child in self.childNodes: + child.unlink() + del self.childNodes[:] + + def _get_isId(self): + if self._is_id: + return True + doc = self.ownerDocument + elem = self.ownerElement + if doc is None or elem is None: + return False + + info = doc._get_elem_info(elem) + if info is None: + return False + if self.namespaceURI: + return info.isIdNS(self.namespaceURI, self.localName) + else: + return info.isId(self.nodeName) + + def _get_schemaType(self): + doc = self.ownerDocument + elem = self.ownerElement + if doc is None or elem is None: + return _no_type + + info = doc._get_elem_info(elem) + if info is None: + return _no_type + if self.namespaceURI: + return info.getAttributeTypeNS(self.namespaceURI, self.localName) + else: + return info.getAttributeType(self.nodeName) + +defproperty(Attr, "isId", doc="True if this attribute is an ID.") +defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") +defproperty(Attr, "schemaType", doc="Schema type for this attribute.") + + +class NamedNodeMap(NewStyle, GetattrMagic): + """The attribute list is a transient interface to the underlying + dictionaries. Mutations here will change the underlying element's + dictionary. + + Ordering is imposed artificially and does not reflect the order of + attributes as found in an input document. + """ + + __slots__ = ('_attrs', '_attrsNS', '_ownerElement') + + def __init__(self, attrs, attrsNS, ownerElement): + self._attrs = attrs + self._attrsNS = attrsNS + self._ownerElement = ownerElement + + def _get_length(self): + return len(self._attrs) + + def item(self, index): + try: + return self[self._attrs.keys()[index]] + except IndexError: + return None + + def items(self): + L = [] + for node in self._attrs.values(): + L.append((node.nodeName, node.value)) + return L + + def itemsNS(self): + L = [] + for node in self._attrs.values(): + L.append(((node.namespaceURI, node.localName), node.value)) + return L + + def has_key(self, key): + if isinstance(key, StringTypes): + return self._attrs.has_key(key) + else: + return self._attrsNS.has_key(key) + + def keys(self): + return self._attrs.keys() + + def keysNS(self): + return self._attrsNS.keys() + + def values(self): + return self._attrs.values() + + def get(self, name, value=None): + return self._attrs.get(name, value) + + __len__ = _get_length + + def __cmp__(self, other): + if self._attrs is getattr(other, "_attrs", None): + return 0 + else: + return cmp(id(self), id(other)) + + def __getitem__(self, attname_or_tuple): + if isinstance(attname_or_tuple, _TupleType): + return self._attrsNS[attname_or_tuple] + else: + return self._attrs[attname_or_tuple] + + # same as set + def __setitem__(self, attname, value): + if isinstance(value, StringTypes): + try: + node = self._attrs[attname] + except KeyError: + node = Attr(attname) + node.ownerDocument = self._ownerElement.ownerDocument + self.setNamedItem(node) + node.value = value + else: + if not isinstance(value, Attr): + raise TypeError, "value must be a string or Attr object" + node = value + self.setNamedItem(node) + + def getNamedItem(self, name): + try: + return self._attrs[name] + except KeyError: + return None + + def getNamedItemNS(self, namespaceURI, localName): + try: + return self._attrsNS[(namespaceURI, localName)] + except KeyError: + return None + + def removeNamedItem(self, name): + n = self.getNamedItem(name) + if n is not None: + _clear_id_cache(self._ownerElement) + del self._attrs[n.nodeName] + del self._attrsNS[(n.namespaceURI, n.localName)] + if n.__dict__.has_key('ownerElement'): + n.__dict__['ownerElement'] = None + return n + else: + raise xmlcore.dom.NotFoundErr() + + def removeNamedItemNS(self, namespaceURI, localName): + n = self.getNamedItemNS(namespaceURI, localName) + if n is not None: + _clear_id_cache(self._ownerElement) + del self._attrsNS[(n.namespaceURI, n.localName)] + del self._attrs[n.nodeName] + if n.__dict__.has_key('ownerElement'): + n.__dict__['ownerElement'] = None + return n + else: + raise xmlcore.dom.NotFoundErr() + + def setNamedItem(self, node): + if not isinstance(node, Attr): + raise xmlcore.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(node), repr(self))) + old = self._attrs.get(node.name) + if old: + old.unlink() + self._attrs[node.name] = node + self._attrsNS[(node.namespaceURI, node.localName)] = node + node.ownerElement = self._ownerElement + _clear_id_cache(node.ownerElement) + return old + + def setNamedItemNS(self, node): + return self.setNamedItem(node) + + def __delitem__(self, attname_or_tuple): + node = self[attname_or_tuple] + _clear_id_cache(node.ownerElement) + node.unlink() + + def __getstate__(self): + return self._attrs, self._attrsNS, self._ownerElement + + def __setstate__(self, state): + self._attrs, self._attrsNS, self._ownerElement = state + +defproperty(NamedNodeMap, "length", + doc="Number of nodes in the NamedNodeMap.") + +AttributeList = NamedNodeMap + + +class TypeInfo(NewStyle): + __slots__ = 'namespace', 'name' + + def __init__(self, namespace, name): + self.namespace = namespace + self.name = name + + def __repr__(self): + if self.namespace: + return "" % (self.name, self.namespace) + else: + return "" % self.name + + def _get_name(self): + return self.name + + def _get_namespace(self): + return self.namespace + +_no_type = TypeInfo(None, None) + +class Element(Node): + nodeType = Node.ELEMENT_NODE + nodeValue = None + schemaType = _no_type + + _magic_id_nodes = 0 + + _child_node_types = (Node.ELEMENT_NODE, + Node.PROCESSING_INSTRUCTION_NODE, + Node.COMMENT_NODE, + Node.TEXT_NODE, + Node.CDATA_SECTION_NODE, + Node.ENTITY_REFERENCE_NODE) + + def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, + localName=None): + self.tagName = self.nodeName = tagName + self.prefix = prefix + self.namespaceURI = namespaceURI + self.childNodes = NodeList() + + self._attrs = {} # attributes are double-indexed: + self._attrsNS = {} # tagName -> Attribute + # URI,localName -> Attribute + # in the future: consider lazy generation + # of attribute objects this is too tricky + # for now because of headaches with + # namespaces. + + def _get_localName(self): + return self.tagName.split(":", 1)[-1] + + def _get_tagName(self): + return self.tagName + + def unlink(self): + for attr in self._attrs.values(): + attr.unlink() + self._attrs = None + self._attrsNS = None + Node.unlink(self) + + def getAttribute(self, attname): + try: + return self._attrs[attname].value + except KeyError: + return "" + + def getAttributeNS(self, namespaceURI, localName): + try: + return self._attrsNS[(namespaceURI, localName)].value + except KeyError: + return "" + + def setAttribute(self, attname, value): + attr = self.getAttributeNode(attname) + if attr is None: + attr = Attr(attname) + # for performance + d = attr.__dict__ + d["value"] = d["nodeValue"] = value + d["ownerDocument"] = self.ownerDocument + self.setAttributeNode(attr) + elif value != attr.value: + d = attr.__dict__ + d["value"] = d["nodeValue"] = value + if attr.isId: + _clear_id_cache(self) + + def setAttributeNS(self, namespaceURI, qualifiedName, value): + prefix, localname = _nssplit(qualifiedName) + attr = self.getAttributeNodeNS(namespaceURI, localname) + if attr is None: + # for performance + attr = Attr(qualifiedName, namespaceURI, localname, prefix) + d = attr.__dict__ + d["prefix"] = prefix + d["nodeName"] = qualifiedName + d["value"] = d["nodeValue"] = value + d["ownerDocument"] = self.ownerDocument + self.setAttributeNode(attr) + else: + d = attr.__dict__ + if value != attr.value: + d["value"] = d["nodeValue"] = value + if attr.isId: + _clear_id_cache(self) + if attr.prefix != prefix: + d["prefix"] = prefix + d["nodeName"] = qualifiedName + + def getAttributeNode(self, attrname): + return self._attrs.get(attrname) + + def getAttributeNodeNS(self, namespaceURI, localName): + return self._attrsNS.get((namespaceURI, localName)) + + def setAttributeNode(self, attr): + if attr.ownerElement not in (None, self): + raise xmlcore.dom.InuseAttributeErr("attribute node already owned") + old1 = self._attrs.get(attr.name, None) + if old1 is not None: + self.removeAttributeNode(old1) + old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) + if old2 is not None and old2 is not old1: + self.removeAttributeNode(old2) + _set_attribute_node(self, attr) + + if old1 is not attr: + # It might have already been part of this node, in which case + # it doesn't represent a change, and should not be returned. + return old1 + if old2 is not attr: + return old2 + + setAttributeNodeNS = setAttributeNode + + def removeAttribute(self, name): + try: + attr = self._attrs[name] + except KeyError: + raise xmlcore.dom.NotFoundErr() + self.removeAttributeNode(attr) + + def removeAttributeNS(self, namespaceURI, localName): + try: + attr = self._attrsNS[(namespaceURI, localName)] + except KeyError: + raise xmlcore.dom.NotFoundErr() + self.removeAttributeNode(attr) + + def removeAttributeNode(self, node): + if node is None: + raise xmlcore.dom.NotFoundErr() + try: + self._attrs[node.name] + except KeyError: + raise xmlcore.dom.NotFoundErr() + _clear_id_cache(self) + node.unlink() + # Restore this since the node is still useful and otherwise + # unlinked + node.ownerDocument = self.ownerDocument + + removeAttributeNodeNS = removeAttributeNode + + def hasAttribute(self, name): + return self._attrs.has_key(name) + + def hasAttributeNS(self, namespaceURI, localName): + return self._attrsNS.has_key((namespaceURI, localName)) + + def getElementsByTagName(self, name): + return _get_elements_by_tagName_helper(self, name, NodeList()) + + def getElementsByTagNameNS(self, namespaceURI, localName): + return _get_elements_by_tagName_ns_helper( + self, namespaceURI, localName, NodeList()) + + def __repr__(self): + return "" % (self.tagName, id(self)) + + def writexml(self, writer, indent="", addindent="", newl=""): + # indent = current indentation + # addindent = indentation to add to higher levels + # newl = newline string + writer.write(indent+"<" + self.tagName) + + attrs = self._get_attributes() + a_names = attrs.keys() + a_names.sort() + + for a_name in a_names: + writer.write(" %s=\"" % a_name) + _write_data(writer, attrs[a_name].value) + writer.write("\"") + if self.childNodes: + writer.write(">%s"%(newl)) + for node in self.childNodes: + node.writexml(writer,indent+addindent,addindent,newl) + writer.write("%s%s" % (indent,self.tagName,newl)) + else: + writer.write("/>%s"%(newl)) + + def _get_attributes(self): + return NamedNodeMap(self._attrs, self._attrsNS, self) + + def hasAttributes(self): + if self._attrs: + return True + else: + return False + + # DOM Level 3 attributes, based on the 22 Oct 2002 draft + + def setIdAttribute(self, name): + idAttr = self.getAttributeNode(name) + self.setIdAttributeNode(idAttr) + + def setIdAttributeNS(self, namespaceURI, localName): + idAttr = self.getAttributeNodeNS(namespaceURI, localName) + self.setIdAttributeNode(idAttr) + + def setIdAttributeNode(self, idAttr): + if idAttr is None or not self.isSameNode(idAttr.ownerElement): + raise xmlcore.dom.NotFoundErr() + if _get_containing_entref(self) is not None: + raise xmlcore.dom.NoModificationAllowedErr() + if not idAttr._is_id: + idAttr.__dict__['_is_id'] = True + self._magic_id_nodes += 1 + self.ownerDocument._magic_id_count += 1 + _clear_id_cache(self) + +defproperty(Element, "attributes", + doc="NamedNodeMap of attributes on the element.") +defproperty(Element, "localName", + doc="Namespace-local name of this element.") + + +def _set_attribute_node(element, attr): + _clear_id_cache(element) + element._attrs[attr.name] = attr + element._attrsNS[(attr.namespaceURI, attr.localName)] = attr + + # This creates a circular reference, but Element.unlink() + # breaks the cycle since the references to the attribute + # dictionaries are tossed. + attr.__dict__['ownerElement'] = element + + +class Childless: + """Mixin that makes childless-ness easy to implement and avoids + the complexity of the Node methods that deal with children. + """ + + attributes = None + childNodes = EmptyNodeList() + firstChild = None + lastChild = None + + def _get_firstChild(self): + return None + + def _get_lastChild(self): + return None + + def appendChild(self, node): + raise xmlcore.dom.HierarchyRequestErr( + self.nodeName + " nodes cannot have children") + + def hasChildNodes(self): + return False + + def insertBefore(self, newChild, refChild): + raise xmlcore.dom.HierarchyRequestErr( + self.nodeName + " nodes do not have children") + + def removeChild(self, oldChild): + raise xmlcore.dom.NotFoundErr( + self.nodeName + " nodes do not have children") + + def replaceChild(self, newChild, oldChild): + raise xmlcore.dom.HierarchyRequestErr( + self.nodeName + " nodes do not have children") + + +class ProcessingInstruction(Childless, Node): + nodeType = Node.PROCESSING_INSTRUCTION_NODE + + def __init__(self, target, data): + self.target = self.nodeName = target + self.data = self.nodeValue = data + + def _get_data(self): + return self.data + def _set_data(self, value): + d = self.__dict__ + d['data'] = d['nodeValue'] = value + + def _get_target(self): + return self.target + def _set_target(self, value): + d = self.__dict__ + d['target'] = d['nodeName'] = value + + def __setattr__(self, name, value): + if name == "data" or name == "nodeValue": + self.__dict__['data'] = self.__dict__['nodeValue'] = value + elif name == "target" or name == "nodeName": + self.__dict__['target'] = self.__dict__['nodeName'] = value + else: + self.__dict__[name] = value + + def writexml(self, writer, indent="", addindent="", newl=""): + writer.write("%s%s" % (indent,self.target, self.data, newl)) + + +class CharacterData(Childless, Node): + def _get_length(self): + return len(self.data) + __len__ = _get_length + + def _get_data(self): + return self.__dict__['data'] + def _set_data(self, data): + d = self.__dict__ + d['data'] = d['nodeValue'] = data + + _get_nodeValue = _get_data + _set_nodeValue = _set_data + + def __setattr__(self, name, value): + if name == "data" or name == "nodeValue": + self.__dict__['data'] = self.__dict__['nodeValue'] = value + else: + self.__dict__[name] = value + + def __repr__(self): + data = self.data + if len(data) > 10: + dotdotdot = "..." + else: + dotdotdot = "" + return "" % ( + self.__class__.__name__, data[0:10], dotdotdot) + + def substringData(self, offset, count): + if offset < 0: + raise xmlcore.dom.IndexSizeErr("offset cannot be negative") + if offset >= len(self.data): + raise xmlcore.dom.IndexSizeErr("offset cannot be beyond end of data") + if count < 0: + raise xmlcore.dom.IndexSizeErr("count cannot be negative") + return self.data[offset:offset+count] + + def appendData(self, arg): + self.data = self.data + arg + + def insertData(self, offset, arg): + if offset < 0: + raise xmlcore.dom.IndexSizeErr("offset cannot be negative") + if offset >= len(self.data): + raise xmlcore.dom.IndexSizeErr("offset cannot be beyond end of data") + if arg: + self.data = "%s%s%s" % ( + self.data[:offset], arg, self.data[offset:]) + + def deleteData(self, offset, count): + if offset < 0: + raise xmlcore.dom.IndexSizeErr("offset cannot be negative") + if offset >= len(self.data): + raise xmlcore.dom.IndexSizeErr("offset cannot be beyond end of data") + if count < 0: + raise xmlcore.dom.IndexSizeErr("count cannot be negative") + if count: + self.data = self.data[:offset] + self.data[offset+count:] + + def replaceData(self, offset, count, arg): + if offset < 0: + raise xmlcore.dom.IndexSizeErr("offset cannot be negative") + if offset >= len(self.data): + raise xmlcore.dom.IndexSizeErr("offset cannot be beyond end of data") + if count < 0: + raise xmlcore.dom.IndexSizeErr("count cannot be negative") + if count: + self.data = "%s%s%s" % ( + self.data[:offset], arg, self.data[offset+count:]) + +defproperty(CharacterData, "length", doc="Length of the string data.") + + +class Text(CharacterData): + # Make sure we don't add an instance __dict__ if we don't already + # have one, at least when that's possible: + # XXX this does not work, CharacterData is an old-style class + # __slots__ = () + + nodeType = Node.TEXT_NODE + nodeName = "#text" + attributes = None + + def splitText(self, offset): + if offset < 0 or offset > len(self.data): + raise xmlcore.dom.IndexSizeErr("illegal offset value") + newText = self.__class__() + newText.data = self.data[offset:] + newText.ownerDocument = self.ownerDocument + next = self.nextSibling + if self.parentNode and self in self.parentNode.childNodes: + if next is None: + self.parentNode.appendChild(newText) + else: + self.parentNode.insertBefore(newText, next) + self.data = self.data[:offset] + return newText + + def writexml(self, writer, indent="", addindent="", newl=""): + _write_data(writer, "%s%s%s"%(indent, self.data, newl)) + + # DOM Level 3 (WD 9 April 2002) + + def _get_wholeText(self): + L = [self.data] + n = self.previousSibling + while n is not None: + if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + L.insert(0, n.data) + n = n.previousSibling + else: + break + n = self.nextSibling + while n is not None: + if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + L.append(n.data) + n = n.nextSibling + else: + break + return ''.join(L) + + def replaceWholeText(self, content): + # XXX This needs to be seriously changed if minidom ever + # supports EntityReference nodes. + parent = self.parentNode + n = self.previousSibling + while n is not None: + if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + next = n.previousSibling + parent.removeChild(n) + n = next + else: + break + n = self.nextSibling + if not content: + parent.removeChild(self) + while n is not None: + if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + next = n.nextSibling + parent.removeChild(n) + n = next + else: + break + if content: + d = self.__dict__ + d['data'] = content + d['nodeValue'] = content + return self + else: + return None + + def _get_isWhitespaceInElementContent(self): + if self.data.strip(): + return False + elem = _get_containing_element(self) + if elem is None: + return False + info = self.ownerDocument._get_elem_info(elem) + if info is None: + return False + else: + return info.isElementContent() + +defproperty(Text, "isWhitespaceInElementContent", + doc="True iff this text node contains only whitespace" + " and is in element content.") +defproperty(Text, "wholeText", + doc="The text of all logically-adjacent text nodes.") + + +def _get_containing_element(node): + c = node.parentNode + while c is not None: + if c.nodeType == Node.ELEMENT_NODE: + return c + c = c.parentNode + return None + +def _get_containing_entref(node): + c = node.parentNode + while c is not None: + if c.nodeType == Node.ENTITY_REFERENCE_NODE: + return c + c = c.parentNode + return None + + +class Comment(Childless, CharacterData): + nodeType = Node.COMMENT_NODE + nodeName = "#comment" + + def __init__(self, data): + self.data = self.nodeValue = data + + def writexml(self, writer, indent="", addindent="", newl=""): + writer.write("%s%s" % (indent, self.data, newl)) + + +class CDATASection(Text): + # Make sure we don't add an instance __dict__ if we don't already + # have one, at least when that's possible: + # XXX this does not work, Text is an old-style class + # __slots__ = () + + nodeType = Node.CDATA_SECTION_NODE + nodeName = "#cdata-section" + + def writexml(self, writer, indent="", addindent="", newl=""): + if self.data.find("]]>") >= 0: + raise ValueError("']]>' not allowed in a CDATA section") + writer.write("" % self.data) + + +class ReadOnlySequentialNamedNodeMap(NewStyle, GetattrMagic): + __slots__ = '_seq', + + def __init__(self, seq=()): + # seq should be a list or tuple + self._seq = seq + + def __len__(self): + return len(self._seq) + + def _get_length(self): + return len(self._seq) + + def getNamedItem(self, name): + for n in self._seq: + if n.nodeName == name: + return n + + def getNamedItemNS(self, namespaceURI, localName): + for n in self._seq: + if n.namespaceURI == namespaceURI and n.localName == localName: + return n + + def __getitem__(self, name_or_tuple): + if isinstance(name_or_tuple, _TupleType): + node = self.getNamedItemNS(*name_or_tuple) + else: + node = self.getNamedItem(name_or_tuple) + if node is None: + raise KeyError, name_or_tuple + return node + + def item(self, index): + if index < 0: + return None + try: + return self._seq[index] + except IndexError: + return None + + def removeNamedItem(self, name): + raise xmlcore.dom.NoModificationAllowedErr( + "NamedNodeMap instance is read-only") + + def removeNamedItemNS(self, namespaceURI, localName): + raise xmlcore.dom.NoModificationAllowedErr( + "NamedNodeMap instance is read-only") + + def setNamedItem(self, node): + raise xmlcore.dom.NoModificationAllowedErr( + "NamedNodeMap instance is read-only") + + def setNamedItemNS(self, node): + raise xmlcore.dom.NoModificationAllowedErr( + "NamedNodeMap instance is read-only") + + def __getstate__(self): + return [self._seq] + + def __setstate__(self, state): + self._seq = state[0] + +defproperty(ReadOnlySequentialNamedNodeMap, "length", + doc="Number of entries in the NamedNodeMap.") + + +class Identified: + """Mix-in class that supports the publicId and systemId attributes.""" + + # XXX this does not work, this is an old-style class + # __slots__ = 'publicId', 'systemId' + + def _identified_mixin_init(self, publicId, systemId): + self.publicId = publicId + self.systemId = systemId + + def _get_publicId(self): + return self.publicId + + def _get_systemId(self): + return self.systemId + +class DocumentType(Identified, Childless, Node): + nodeType = Node.DOCUMENT_TYPE_NODE + nodeValue = None + name = None + publicId = None + systemId = None + internalSubset = None + + def __init__(self, qualifiedName): + self.entities = ReadOnlySequentialNamedNodeMap() + self.notations = ReadOnlySequentialNamedNodeMap() + if qualifiedName: + prefix, localname = _nssplit(qualifiedName) + self.name = localname + self.nodeName = self.name + + def _get_internalSubset(self): + return self.internalSubset + + def cloneNode(self, deep): + if self.ownerDocument is None: + # it's ok + clone = DocumentType(None) + clone.name = self.name + clone.nodeName = self.name + operation = xmlcore.dom.UserDataHandler.NODE_CLONED + if deep: + clone.entities._seq = [] + clone.notations._seq = [] + for n in self.notations._seq: + notation = Notation(n.nodeName, n.publicId, n.systemId) + clone.notations._seq.append(notation) + n._call_user_data_handler(operation, n, notation) + for e in self.entities._seq: + entity = Entity(e.nodeName, e.publicId, e.systemId, + e.notationName) + entity.actualEncoding = e.actualEncoding + entity.encoding = e.encoding + entity.version = e.version + clone.entities._seq.append(entity) + e._call_user_data_handler(operation, n, entity) + self._call_user_data_handler(operation, self, clone) + return clone + else: + return None + + def writexml(self, writer, indent="", addindent="", newl=""): + writer.write(""+newl) + +class Entity(Identified, Node): + attributes = None + nodeType = Node.ENTITY_NODE + nodeValue = None + + actualEncoding = None + encoding = None + version = None + + def __init__(self, name, publicId, systemId, notation): + self.nodeName = name + self.notationName = notation + self.childNodes = NodeList() + self._identified_mixin_init(publicId, systemId) + + def _get_actualEncoding(self): + return self.actualEncoding + + def _get_encoding(self): + return self.encoding + + def _get_version(self): + return self.version + + def appendChild(self, newChild): + raise xmlcore.dom.HierarchyRequestErr( + "cannot append children to an entity node") + + def insertBefore(self, newChild, refChild): + raise xmlcore.dom.HierarchyRequestErr( + "cannot insert children below an entity node") + + def removeChild(self, oldChild): + raise xmlcore.dom.HierarchyRequestErr( + "cannot remove children from an entity node") + + def replaceChild(self, newChild, oldChild): + raise xmlcore.dom.HierarchyRequestErr( + "cannot replace children of an entity node") + +class Notation(Identified, Childless, Node): + nodeType = Node.NOTATION_NODE + nodeValue = None + + def __init__(self, name, publicId, systemId): + self.nodeName = name + self._identified_mixin_init(publicId, systemId) + + +class DOMImplementation(DOMImplementationLS): + _features = [("core", "1.0"), + ("core", "2.0"), + ("core", "3.0"), + ("core", None), + ("xml", "1.0"), + ("xml", "2.0"), + ("xml", "3.0"), + ("xml", None), + ("ls-load", "3.0"), + ("ls-load", None), + ] + + def hasFeature(self, feature, version): + if version == "": + version = None + return (feature.lower(), version) in self._features + + def createDocument(self, namespaceURI, qualifiedName, doctype): + if doctype and doctype.parentNode is not None: + raise xmlcore.dom.WrongDocumentErr( + "doctype object owned by another DOM tree") + doc = self._create_document() + + add_root_element = not (namespaceURI is None + and qualifiedName is None + and doctype is None) + + if not qualifiedName and add_root_element: + # The spec is unclear what to raise here; SyntaxErr + # would be the other obvious candidate. Since Xerces raises + # InvalidCharacterErr, and since SyntaxErr is not listed + # for createDocument, that seems to be the better choice. + # XXX: need to check for illegal characters here and in + # createElement. + + # DOM Level III clears this up when talking about the return value + # of this function. If namespaceURI, qName and DocType are + # Null the document is returned without a document element + # Otherwise if doctype or namespaceURI are not None + # Then we go back to the above problem + raise xmlcore.dom.InvalidCharacterErr("Element with no name") + + if add_root_element: + prefix, localname = _nssplit(qualifiedName) + if prefix == "xml" \ + and namespaceURI != "http://www.w3.org/XML/1998/namespace": + raise xmlcore.dom.NamespaceErr("illegal use of 'xml' prefix") + if prefix and not namespaceURI: + raise xmlcore.dom.NamespaceErr( + "illegal use of prefix without namespaces") + element = doc.createElementNS(namespaceURI, qualifiedName) + if doctype: + doc.appendChild(doctype) + doc.appendChild(element) + + if doctype: + doctype.parentNode = doctype.ownerDocument = doc + + doc.doctype = doctype + doc.implementation = self + return doc + + def createDocumentType(self, qualifiedName, publicId, systemId): + doctype = DocumentType(qualifiedName) + doctype.publicId = publicId + doctype.systemId = systemId + return doctype + + # DOM Level 3 (WD 9 April 2002) + + def getInterface(self, feature): + if self.hasFeature(feature, None): + return self + else: + return None + + # internal + def _create_document(self): + return Document() + +class ElementInfo(NewStyle): + """Object that represents content-model information for an element. + + This implementation is not expected to be used in practice; DOM + builders should provide implementations which do the right thing + using information available to it. + + """ + + __slots__ = 'tagName', + + def __init__(self, name): + self.tagName = name + + def getAttributeType(self, aname): + return _no_type + + def getAttributeTypeNS(self, namespaceURI, localName): + return _no_type + + def isElementContent(self): + return False + + def isEmpty(self): + """Returns true iff this element is declared to have an EMPTY + content model.""" + return False + + def isId(self, aname): + """Returns true iff the named attribte is a DTD-style ID.""" + return False + + def isIdNS(self, namespaceURI, localName): + """Returns true iff the identified attribute is a DTD-style ID.""" + return False + + def __getstate__(self): + return self.tagName + + def __setstate__(self, state): + self.tagName = state + +def _clear_id_cache(node): + if node.nodeType == Node.DOCUMENT_NODE: + node._id_cache.clear() + node._id_search_stack = None + elif _in_document(node): + node.ownerDocument._id_cache.clear() + node.ownerDocument._id_search_stack= None + +class Document(Node, DocumentLS): + _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, + Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) + + nodeType = Node.DOCUMENT_NODE + nodeName = "#document" + nodeValue = None + attributes = None + doctype = None + parentNode = None + previousSibling = nextSibling = None + + implementation = DOMImplementation() + + # Document attributes from Level 3 (WD 9 April 2002) + + actualEncoding = None + encoding = None + standalone = None + version = None + strictErrorChecking = False + errorHandler = None + documentURI = None + + _magic_id_count = 0 + + def __init__(self): + self.childNodes = NodeList() + # mapping of (namespaceURI, localName) -> ElementInfo + # and tagName -> ElementInfo + self._elem_info = {} + self._id_cache = {} + self._id_search_stack = None + + def _get_elem_info(self, element): + if element.namespaceURI: + key = element.namespaceURI, element.localName + else: + key = element.tagName + return self._elem_info.get(key) + + def _get_actualEncoding(self): + return self.actualEncoding + + def _get_doctype(self): + return self.doctype + + def _get_documentURI(self): + return self.documentURI + + def _get_encoding(self): + return self.encoding + + def _get_errorHandler(self): + return self.errorHandler + + def _get_standalone(self): + return self.standalone + + def _get_strictErrorChecking(self): + return self.strictErrorChecking + + def _get_version(self): + return self.version + + def appendChild(self, node): + if node.nodeType not in self._child_node_types: + raise xmlcore.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(node), repr(self))) + if node.parentNode is not None: + # This needs to be done before the next test since this + # may *be* the document element, in which case it should + # end up re-ordered to the end. + node.parentNode.removeChild(node) + + if node.nodeType == Node.ELEMENT_NODE \ + and self._get_documentElement(): + raise xmlcore.dom.HierarchyRequestErr( + "two document elements disallowed") + return Node.appendChild(self, node) + + def removeChild(self, oldChild): + try: + self.childNodes.remove(oldChild) + except ValueError: + raise xmlcore.dom.NotFoundErr() + oldChild.nextSibling = oldChild.previousSibling = None + oldChild.parentNode = None + if self.documentElement is oldChild: + self.documentElement = None + + return oldChild + + def _get_documentElement(self): + for node in self.childNodes: + if node.nodeType == Node.ELEMENT_NODE: + return node + + def unlink(self): + if self.doctype is not None: + self.doctype.unlink() + self.doctype = None + Node.unlink(self) + + def cloneNode(self, deep): + if not deep: + return None + clone = self.implementation.createDocument(None, None, None) + clone.encoding = self.encoding + clone.standalone = self.standalone + clone.version = self.version + for n in self.childNodes: + childclone = _clone_node(n, deep, clone) + assert childclone.ownerDocument.isSameNode(clone) + clone.childNodes.append(childclone) + if childclone.nodeType == Node.DOCUMENT_NODE: + assert clone.documentElement is None + elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: + assert clone.doctype is None + clone.doctype = childclone + childclone.parentNode = clone + self._call_user_data_handler(xmlcore.dom.UserDataHandler.NODE_CLONED, + self, clone) + return clone + + def createDocumentFragment(self): + d = DocumentFragment() + d.ownerDocument = self + return d + + def createElement(self, tagName): + e = Element(tagName) + e.ownerDocument = self + return e + + def createTextNode(self, data): + if not isinstance(data, StringTypes): + raise TypeError, "node contents must be a string" + t = Text() + t.data = data + t.ownerDocument = self + return t + + def createCDATASection(self, data): + if not isinstance(data, StringTypes): + raise TypeError, "node contents must be a string" + c = CDATASection() + c.data = data + c.ownerDocument = self + return c + + def createComment(self, data): + c = Comment(data) + c.ownerDocument = self + return c + + def createProcessingInstruction(self, target, data): + p = ProcessingInstruction(target, data) + p.ownerDocument = self + return p + + def createAttribute(self, qName): + a = Attr(qName) + a.ownerDocument = self + a.value = "" + return a + + def createElementNS(self, namespaceURI, qualifiedName): + prefix, localName = _nssplit(qualifiedName) + e = Element(qualifiedName, namespaceURI, prefix) + e.ownerDocument = self + return e + + def createAttributeNS(self, namespaceURI, qualifiedName): + prefix, localName = _nssplit(qualifiedName) + a = Attr(qualifiedName, namespaceURI, localName, prefix) + a.ownerDocument = self + a.value = "" + return a + + # A couple of implementation-specific helpers to create node types + # not supported by the W3C DOM specs: + + def _create_entity(self, name, publicId, systemId, notationName): + e = Entity(name, publicId, systemId, notationName) + e.ownerDocument = self + return e + + def _create_notation(self, name, publicId, systemId): + n = Notation(name, publicId, systemId) + n.ownerDocument = self + return n + + def getElementById(self, id): + if self._id_cache.has_key(id): + return self._id_cache[id] + if not (self._elem_info or self._magic_id_count): + return None + + stack = self._id_search_stack + if stack is None: + # we never searched before, or the cache has been cleared + stack = [self.documentElement] + self._id_search_stack = stack + elif not stack: + # Previous search was completed and cache is still valid; + # no matching node. + return None + + result = None + while stack: + node = stack.pop() + # add child elements to stack for continued searching + stack.extend([child for child in node.childNodes + if child.nodeType in _nodeTypes_with_children]) + # check this node + info = self._get_elem_info(node) + if info: + # We have to process all ID attributes before + # returning in order to get all the attributes set to + # be IDs using Element.setIdAttribute*(). + for attr in node.attributes.values(): + if attr.namespaceURI: + if info.isIdNS(attr.namespaceURI, attr.localName): + self._id_cache[attr.value] = node + if attr.value == id: + result = node + elif not node._magic_id_nodes: + break + elif info.isId(attr.name): + self._id_cache[attr.value] = node + if attr.value == id: + result = node + elif not node._magic_id_nodes: + break + elif attr._is_id: + self._id_cache[attr.value] = node + if attr.value == id: + result = node + elif node._magic_id_nodes == 1: + break + elif node._magic_id_nodes: + for attr in node.attributes.values(): + if attr._is_id: + self._id_cache[attr.value] = node + if attr.value == id: + result = node + if result is not None: + break + return result + + def getElementsByTagName(self, name): + return _get_elements_by_tagName_helper(self, name, NodeList()) + + def getElementsByTagNameNS(self, namespaceURI, localName): + return _get_elements_by_tagName_ns_helper( + self, namespaceURI, localName, NodeList()) + + def isSupported(self, feature, version): + return self.implementation.hasFeature(feature, version) + + def importNode(self, node, deep): + if node.nodeType == Node.DOCUMENT_NODE: + raise xmlcore.dom.NotSupportedErr("cannot import document nodes") + elif node.nodeType == Node.DOCUMENT_TYPE_NODE: + raise xmlcore.dom.NotSupportedErr("cannot import document type nodes") + return _clone_node(node, deep, self) + + def writexml(self, writer, indent="", addindent="", newl="", + encoding = None): + if encoding is None: + writer.write(''+newl) + else: + writer.write('%s' % (encoding, newl)) + for node in self.childNodes: + node.writexml(writer, indent, addindent, newl) + + # DOM Level 3 (WD 9 April 2002) + + def renameNode(self, n, namespaceURI, name): + if n.ownerDocument is not self: + raise xmlcore.dom.WrongDocumentErr( + "cannot rename nodes from other documents;\n" + "expected %s,\nfound %s" % (self, n.ownerDocument)) + if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): + raise xmlcore.dom.NotSupportedErr( + "renameNode() only applies to element and attribute nodes") + if namespaceURI != EMPTY_NAMESPACE: + if ':' in name: + prefix, localName = name.split(':', 1) + if ( prefix == "xmlns" + and namespaceURI != xmlcore.dom.XMLNS_NAMESPACE): + raise xmlcore.dom.NamespaceErr( + "illegal use of 'xmlns' prefix") + else: + if ( name == "xmlns" + and namespaceURI != xmlcore.dom.XMLNS_NAMESPACE + and n.nodeType == Node.ATTRIBUTE_NODE): + raise xmlcore.dom.NamespaceErr( + "illegal use of the 'xmlns' attribute") + prefix = None + localName = name + else: + prefix = None + localName = None + if n.nodeType == Node.ATTRIBUTE_NODE: + element = n.ownerElement + if element is not None: + is_id = n._is_id + element.removeAttributeNode(n) + else: + element = None + # avoid __setattr__ + d = n.__dict__ + d['prefix'] = prefix + d['localName'] = localName + d['namespaceURI'] = namespaceURI + d['nodeName'] = name + if n.nodeType == Node.ELEMENT_NODE: + d['tagName'] = name + else: + # attribute node + d['name'] = name + if element is not None: + element.setAttributeNode(n) + if is_id: + element.setIdAttributeNode(n) + # It's not clear from a semantic perspective whether we should + # call the user data handlers for the NODE_RENAMED event since + # we're re-using the existing node. The draft spec has been + # interpreted as meaning "no, don't call the handler unless a + # new node is created." + return n + +defproperty(Document, "documentElement", + doc="Top-level element of this document.") + + +def _clone_node(node, deep, newOwnerDocument): + """ + Clone a node and give it the new owner document. + Called by Node.cloneNode and Document.importNode + """ + if node.ownerDocument.isSameNode(newOwnerDocument): + operation = xmlcore.dom.UserDataHandler.NODE_CLONED + else: + operation = xmlcore.dom.UserDataHandler.NODE_IMPORTED + if node.nodeType == Node.ELEMENT_NODE: + clone = newOwnerDocument.createElementNS(node.namespaceURI, + node.nodeName) + for attr in node.attributes.values(): + clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) + a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) + a.specified = attr.specified + + if deep: + for child in node.childNodes: + c = _clone_node(child, deep, newOwnerDocument) + clone.appendChild(c) + + elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: + clone = newOwnerDocument.createDocumentFragment() + if deep: + for child in node.childNodes: + c = _clone_node(child, deep, newOwnerDocument) + clone.appendChild(c) + + elif node.nodeType == Node.TEXT_NODE: + clone = newOwnerDocument.createTextNode(node.data) + elif node.nodeType == Node.CDATA_SECTION_NODE: + clone = newOwnerDocument.createCDATASection(node.data) + elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: + clone = newOwnerDocument.createProcessingInstruction(node.target, + node.data) + elif node.nodeType == Node.COMMENT_NODE: + clone = newOwnerDocument.createComment(node.data) + elif node.nodeType == Node.ATTRIBUTE_NODE: + clone = newOwnerDocument.createAttributeNS(node.namespaceURI, + node.nodeName) + clone.specified = True + clone.value = node.value + elif node.nodeType == Node.DOCUMENT_TYPE_NODE: + assert node.ownerDocument is not newOwnerDocument + operation = xmlcore.dom.UserDataHandler.NODE_IMPORTED + clone = newOwnerDocument.implementation.createDocumentType( + node.name, node.publicId, node.systemId) + clone.ownerDocument = newOwnerDocument + if deep: + clone.entities._seq = [] + clone.notations._seq = [] + for n in node.notations._seq: + notation = Notation(n.nodeName, n.publicId, n.systemId) + notation.ownerDocument = newOwnerDocument + clone.notations._seq.append(notation) + if hasattr(n, '_call_user_data_handler'): + n._call_user_data_handler(operation, n, notation) + for e in node.entities._seq: + entity = Entity(e.nodeName, e.publicId, e.systemId, + e.notationName) + entity.actualEncoding = e.actualEncoding + entity.encoding = e.encoding + entity.version = e.version + entity.ownerDocument = newOwnerDocument + clone.entities._seq.append(entity) + if hasattr(e, '_call_user_data_handler'): + e._call_user_data_handler(operation, n, entity) + else: + # Note the cloning of Document and DocumentType nodes is + # implemenetation specific. minidom handles those cases + # directly in the cloneNode() methods. + raise xmlcore.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) + + # Check for _call_user_data_handler() since this could conceivably + # used with other DOM implementations (one of the FourThought + # DOMs, perhaps?). + if hasattr(node, '_call_user_data_handler'): + node._call_user_data_handler(operation, node, clone) + return clone + + +def _nssplit(qualifiedName): + fields = qualifiedName.split(':', 1) + if len(fields) == 2: + return fields + else: + return (None, fields[0]) + + +def _get_StringIO(): + # we can't use cStringIO since it doesn't support Unicode strings + from StringIO import StringIO + return StringIO() + +def _do_pulldom_parse(func, args, kwargs): + events = func(*args, **kwargs) + toktype, rootNode = events.getEvent() + events.expandNode(rootNode) + events.clear() + return rootNode + +def parse(file, parser=None, bufsize=None): + """Parse a file into a DOM by filename or file object.""" + if parser is None and not bufsize: + from xmlcore.dom import expatbuilder + return expatbuilder.parse(file) + else: + from xmlcore.dom import pulldom + return _do_pulldom_parse(pulldom.parse, (file,), + {'parser': parser, 'bufsize': bufsize}) + +def parseString(string, parser=None): + """Parse a file into a DOM from a string.""" + if parser is None: + from xmlcore.dom import expatbuilder + return expatbuilder.parseString(string) + else: + from xmlcore.dom import pulldom + return _do_pulldom_parse(pulldom.parseString, (string,), + {'parser': parser}) + +def getDOMImplementation(features=None): + if features: + if isinstance(features, StringTypes): + features = domreg._parse_feature_string(features) + for f, v in features: + if not Document.implementation.hasFeature(f, v): + return None + return Document.implementation diff --git a/Lib/xmlcore/dom/pulldom.py b/Lib/xmlcore/dom/pulldom.py new file mode 100644 index 0000000..dad3718 --- /dev/null +++ b/Lib/xmlcore/dom/pulldom.py @@ -0,0 +1,351 @@ +import xmlcore.sax +import xmlcore.sax.handler +import types + +try: + _StringTypes = [types.StringType, types.UnicodeType] +except AttributeError: + _StringTypes = [types.StringType] + +START_ELEMENT = "START_ELEMENT" +END_ELEMENT = "END_ELEMENT" +COMMENT = "COMMENT" +START_DOCUMENT = "START_DOCUMENT" +END_DOCUMENT = "END_DOCUMENT" +PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION" +IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" +CHARACTERS = "CHARACTERS" + +class PullDOM(xmlcore.sax.ContentHandler): + _locator = None + document = None + + def __init__(self, documentFactory=None): + from xmlcore.dom import XML_NAMESPACE + self.documentFactory = documentFactory + self.firstEvent = [None, None] + self.lastEvent = self.firstEvent + self.elementStack = [] + self.push = self.elementStack.append + try: + self.pop = self.elementStack.pop + except AttributeError: + # use class' pop instead + pass + self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts + self._current_context = self._ns_contexts[-1] + self.pending_events = [] + + def pop(self): + result = self.elementStack[-1] + del self.elementStack[-1] + return result + + def setDocumentLocator(self, locator): + self._locator = locator + + def startPrefixMapping(self, prefix, uri): + if not hasattr(self, '_xmlns_attrs'): + self._xmlns_attrs = [] + self._xmlns_attrs.append((prefix or 'xmlns', uri)) + self._ns_contexts.append(self._current_context.copy()) + self._current_context[uri] = prefix or None + + def endPrefixMapping(self, prefix): + self._current_context = self._ns_contexts.pop() + + def startElementNS(self, name, tagName , attrs): + # Retrieve xml namespace declaration attributes. + xmlns_uri = 'http://www.w3.org/2000/xmlns/' + xmlns_attrs = getattr(self, '_xmlns_attrs', None) + if xmlns_attrs is not None: + for aname, value in xmlns_attrs: + attrs._attrs[(xmlns_uri, aname)] = value + self._xmlns_attrs = [] + uri, localname = name + if uri: + # When using namespaces, the reader may or may not + # provide us with the original name. If not, create + # *a* valid tagName from the current context. + if tagName is None: + prefix = self._current_context[uri] + if prefix: + tagName = prefix + ":" + localname + else: + tagName = localname + if self.document: + node = self.document.createElementNS(uri, tagName) + else: + node = self.buildDocument(uri, tagName) + else: + # When the tagname is not prefixed, it just appears as + # localname + if self.document: + node = self.document.createElement(localname) + else: + node = self.buildDocument(None, localname) + + for aname,value in attrs.items(): + a_uri, a_localname = aname + if a_uri == xmlns_uri: + if a_localname == 'xmlns': + qname = a_localname + else: + qname = 'xmlns:' + a_localname + attr = self.document.createAttributeNS(a_uri, qname) + node.setAttributeNodeNS(attr) + elif a_uri: + prefix = self._current_context[a_uri] + if prefix: + qname = prefix + ":" + a_localname + else: + qname = a_localname + attr = self.document.createAttributeNS(a_uri, qname) + node.setAttributeNodeNS(attr) + else: + attr = self.document.createAttribute(a_localname) + node.setAttributeNode(attr) + attr.value = value + + self.lastEvent[1] = [(START_ELEMENT, node), None] + self.lastEvent = self.lastEvent[1] + self.push(node) + + def endElementNS(self, name, tagName): + self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] + self.lastEvent = self.lastEvent[1] + + def startElement(self, name, attrs): + if self.document: + node = self.document.createElement(name) + else: + node = self.buildDocument(None, name) + + for aname,value in attrs.items(): + attr = self.document.createAttribute(aname) + attr.value = value + node.setAttributeNode(attr) + + self.lastEvent[1] = [(START_ELEMENT, node), None] + self.lastEvent = self.lastEvent[1] + self.push(node) + + def endElement(self, name): + self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] + self.lastEvent = self.lastEvent[1] + + def comment(self, s): + if self.document: + node = self.document.createComment(s) + self.lastEvent[1] = [(COMMENT, node), None] + self.lastEvent = self.lastEvent[1] + else: + event = [(COMMENT, s), None] + self.pending_events.append(event) + + def processingInstruction(self, target, data): + if self.document: + node = self.document.createProcessingInstruction(target, data) + self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None] + self.lastEvent = self.lastEvent[1] + else: + event = [(PROCESSING_INSTRUCTION, target, data), None] + self.pending_events.append(event) + + def ignorableWhitespace(self, chars): + node = self.document.createTextNode(chars) + self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] + self.lastEvent = self.lastEvent[1] + + def characters(self, chars): + node = self.document.createTextNode(chars) + self.lastEvent[1] = [(CHARACTERS, node), None] + self.lastEvent = self.lastEvent[1] + + def startDocument(self): + if self.documentFactory is None: + import xmlcore.dom.minidom + self.documentFactory = xmlcore.dom.minidom.Document.implementation + + def buildDocument(self, uri, tagname): + # Can't do that in startDocument, since we need the tagname + # XXX: obtain DocumentType + node = self.documentFactory.createDocument(uri, tagname, None) + self.document = node + self.lastEvent[1] = [(START_DOCUMENT, node), None] + self.lastEvent = self.lastEvent[1] + self.push(node) + # Put everything we have seen so far into the document + for e in self.pending_events: + if e[0][0] == PROCESSING_INSTRUCTION: + _,target,data = e[0] + n = self.document.createProcessingInstruction(target, data) + e[0] = (PROCESSING_INSTRUCTION, n) + elif e[0][0] == COMMENT: + n = self.document.createComment(e[0][1]) + e[0] = (COMMENT, n) + else: + raise AssertionError("Unknown pending event ",e[0][0]) + self.lastEvent[1] = e + self.lastEvent = e + self.pending_events = None + return node.firstChild + + def endDocument(self): + self.lastEvent[1] = [(END_DOCUMENT, self.document), None] + self.pop() + + def clear(self): + "clear(): Explicitly release parsing structures" + self.document = None + +class ErrorHandler: + def warning(self, exception): + print exception + def error(self, exception): + raise exception + def fatalError(self, exception): + raise exception + +class DOMEventStream: + def __init__(self, stream, parser, bufsize): + self.stream = stream + self.parser = parser + self.bufsize = bufsize + if not hasattr(self.parser, 'feed'): + self.getEvent = self._slurp + self.reset() + + def reset(self): + self.pulldom = PullDOM() + # This content handler relies on namespace support + self.parser.setFeature(xmlcore.sax.handler.feature_namespaces, 1) + self.parser.setContentHandler(self.pulldom) + + def __getitem__(self, pos): + rc = self.getEvent() + if rc: + return rc + raise IndexError + + def next(self): + rc = self.getEvent() + if rc: + return rc + raise StopIteration + + def __iter__(self): + return self + + def expandNode(self, node): + event = self.getEvent() + parents = [node] + while event: + token, cur_node = event + if cur_node is node: + return + if token != END_ELEMENT: + parents[-1].appendChild(cur_node) + if token == START_ELEMENT: + parents.append(cur_node) + elif token == END_ELEMENT: + del parents[-1] + event = self.getEvent() + + def getEvent(self): + # use IncrementalParser interface, so we get the desired + # pull effect + if not self.pulldom.firstEvent[1]: + self.pulldom.lastEvent = self.pulldom.firstEvent + while not self.pulldom.firstEvent[1]: + buf = self.stream.read(self.bufsize) + if not buf: + self.parser.close() + return None + self.parser.feed(buf) + rc = self.pulldom.firstEvent[1][0] + self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] + return rc + + def _slurp(self): + """ Fallback replacement for getEvent() using the + standard SAX2 interface, which means we slurp the + SAX events into memory (no performance gain, but + we are compatible to all SAX parsers). + """ + self.parser.parse(self.stream) + self.getEvent = self._emit + return self._emit() + + def _emit(self): + """ Fallback replacement for getEvent() that emits + the events that _slurp() read previously. + """ + rc = self.pulldom.firstEvent[1][0] + self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] + return rc + + def clear(self): + """clear(): Explicitly release parsing objects""" + self.pulldom.clear() + del self.pulldom + self.parser = None + self.stream = None + +class SAX2DOM(PullDOM): + + def startElementNS(self, name, tagName , attrs): + PullDOM.startElementNS(self, name, tagName, attrs) + curNode = self.elementStack[-1] + parentNode = self.elementStack[-2] + parentNode.appendChild(curNode) + + def startElement(self, name, attrs): + PullDOM.startElement(self, name, attrs) + curNode = self.elementStack[-1] + parentNode = self.elementStack[-2] + parentNode.appendChild(curNode) + + def processingInstruction(self, target, data): + PullDOM.processingInstruction(self, target, data) + node = self.lastEvent[0][1] + parentNode = self.elementStack[-1] + parentNode.appendChild(node) + + def ignorableWhitespace(self, chars): + PullDOM.ignorableWhitespace(self, chars) + node = self.lastEvent[0][1] + parentNode = self.elementStack[-1] + parentNode.appendChild(node) + + def characters(self, chars): + PullDOM.characters(self, chars) + node = self.lastEvent[0][1] + parentNode = self.elementStack[-1] + parentNode.appendChild(node) + + +default_bufsize = (2 ** 14) - 20 + +def parse(stream_or_string, parser=None, bufsize=None): + if bufsize is None: + bufsize = default_bufsize + if type(stream_or_string) in _StringTypes: + stream = open(stream_or_string) + else: + stream = stream_or_string + if not parser: + parser = xmlcore.sax.make_parser() + return DOMEventStream(stream, parser, bufsize) + +def parseString(string, parser=None): + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + + bufsize = len(string) + buf = StringIO(string) + if not parser: + parser = xmlcore.sax.make_parser() + return DOMEventStream(buf, parser, bufsize) diff --git a/Lib/xmlcore/dom/xmlbuilder.py b/Lib/xmlcore/dom/xmlbuilder.py new file mode 100644 index 0000000..d58c723 --- /dev/null +++ b/Lib/xmlcore/dom/xmlbuilder.py @@ -0,0 +1,388 @@ +"""Implementation of the DOM Level 3 'LS-Load' feature.""" + +import copy +import xmlcore.dom + +from xmlcore.dom.minicompat import * + +from xmlcore.dom.NodeFilter import NodeFilter + + +__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] + + +class Options: + """Features object that has variables set for each DOMBuilder feature. + + The DOMBuilder class uses an instance of this class to pass settings to + the ExpatBuilder class. + """ + + # Note that the DOMBuilder class in LoadSave constrains which of these + # values can be set using the DOM Level 3 LoadSave feature. + + namespaces = 1 + namespace_declarations = True + validation = False + external_parameter_entities = True + external_general_entities = True + external_dtd_subset = True + validate_if_schema = False + validate = False + datatype_normalization = False + create_entity_ref_nodes = True + entities = True + whitespace_in_element_content = True + cdata_sections = True + comments = True + charset_overrides_xml_encoding = True + infoset = False + supported_mediatypes_only = False + + errorHandler = None + filter = None + + +class DOMBuilder: + entityResolver = None + errorHandler = None + filter = None + + ACTION_REPLACE = 1 + ACTION_APPEND_AS_CHILDREN = 2 + ACTION_INSERT_AFTER = 3 + ACTION_INSERT_BEFORE = 4 + + _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, + ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) + + def __init__(self): + self._options = Options() + + def _get_entityResolver(self): + return self.entityResolver + def _set_entityResolver(self, entityResolver): + self.entityResolver = entityResolver + + def _get_errorHandler(self): + return self.errorHandler + def _set_errorHandler(self, errorHandler): + self.errorHandler = errorHandler + + def _get_filter(self): + return self.filter + def _set_filter(self, filter): + self.filter = filter + + def setFeature(self, name, state): + if self.supportsFeature(name): + state = state and 1 or 0 + try: + settings = self._settings[(_name_xform(name), state)] + except KeyError: + raise xmlcore.dom.NotSupportedErr( + "unsupported feature: %r" % (name,)) + else: + for name, value in settings: + setattr(self._options, name, value) + else: + raise xmlcore.dom.NotFoundErr("unknown feature: " + repr(name)) + + def supportsFeature(self, name): + return hasattr(self._options, _name_xform(name)) + + def canSetFeature(self, name, state): + key = (_name_xform(name), state and 1 or 0) + return self._settings.has_key(key) + + # This dictionary maps from (feature,value) to a list of + # (option,value) pairs that should be set on the Options object. + # If a (feature,value) setting is not in this dictionary, it is + # not supported by the DOMBuilder. + # + _settings = { + ("namespace_declarations", 0): [ + ("namespace_declarations", 0)], + ("namespace_declarations", 1): [ + ("namespace_declarations", 1)], + ("validation", 0): [ + ("validation", 0)], + ("external_general_entities", 0): [ + ("external_general_entities", 0)], + ("external_general_entities", 1): [ + ("external_general_entities", 1)], + ("external_parameter_entities", 0): [ + ("external_parameter_entities", 0)], + ("external_parameter_entities", 1): [ + ("external_parameter_entities", 1)], + ("validate_if_schema", 0): [ + ("validate_if_schema", 0)], + ("create_entity_ref_nodes", 0): [ + ("create_entity_ref_nodes", 0)], + ("create_entity_ref_nodes", 1): [ + ("create_entity_ref_nodes", 1)], + ("entities", 0): [ + ("create_entity_ref_nodes", 0), + ("entities", 0)], + ("entities", 1): [ + ("entities", 1)], + ("whitespace_in_element_content", 0): [ + ("whitespace_in_element_content", 0)], + ("whitespace_in_element_content", 1): [ + ("whitespace_in_element_content", 1)], + ("cdata_sections", 0): [ + ("cdata_sections", 0)], + ("cdata_sections", 1): [ + ("cdata_sections", 1)], + ("comments", 0): [ + ("comments", 0)], + ("comments", 1): [ + ("comments", 1)], + ("charset_overrides_xml_encoding", 0): [ + ("charset_overrides_xml_encoding", 0)], + ("charset_overrides_xml_encoding", 1): [ + ("charset_overrides_xml_encoding", 1)], + ("infoset", 0): [], + ("infoset", 1): [ + ("namespace_declarations", 0), + ("validate_if_schema", 0), + ("create_entity_ref_nodes", 0), + ("entities", 0), + ("cdata_sections", 0), + ("datatype_normalization", 1), + ("whitespace_in_element_content", 1), + ("comments", 1), + ("charset_overrides_xml_encoding", 1)], + ("supported_mediatypes_only", 0): [ + ("supported_mediatypes_only", 0)], + ("namespaces", 0): [ + ("namespaces", 0)], + ("namespaces", 1): [ + ("namespaces", 1)], + } + + def getFeature(self, name): + xname = _name_xform(name) + try: + return getattr(self._options, xname) + except AttributeError: + if name == "infoset": + options = self._options + return (options.datatype_normalization + and options.whitespace_in_element_content + and options.comments + and options.charset_overrides_xml_encoding + and not (options.namespace_declarations + or options.validate_if_schema + or options.create_entity_ref_nodes + or options.entities + or options.cdata_sections)) + raise xmlcore.dom.NotFoundErr("feature %s not known" % repr(name)) + + def parseURI(self, uri): + if self.entityResolver: + input = self.entityResolver.resolveEntity(None, uri) + else: + input = DOMEntityResolver().resolveEntity(None, uri) + return self.parse(input) + + def parse(self, input): + options = copy.copy(self._options) + options.filter = self.filter + options.errorHandler = self.errorHandler + fp = input.byteStream + if fp is None and options.systemId: + import urllib2 + fp = urllib2.urlopen(input.systemId) + return self._parse_bytestream(fp, options) + + def parseWithContext(self, input, cnode, action): + if action not in self._legal_actions: + raise ValueError("not a legal action") + raise NotImplementedError("Haven't written this yet...") + + def _parse_bytestream(self, stream, options): + import xmlcore.dom.expatbuilder + builder = xmlcore.dom.expatbuilder.makeBuilder(options) + return builder.parseFile(stream) + + +def _name_xform(name): + return name.lower().replace('-', '_') + + +class DOMEntityResolver(NewStyle): + __slots__ = '_opener', + + def resolveEntity(self, publicId, systemId): + assert systemId is not None + source = DOMInputSource() + source.publicId = publicId + source.systemId = systemId + source.byteStream = self._get_opener().open(systemId) + + # determine the encoding if the transport provided it + source.encoding = self._guess_media_encoding(source) + + # determine the base URI is we can + import posixpath, urlparse + parts = urlparse.urlparse(systemId) + scheme, netloc, path, params, query, fragment = parts + # XXX should we check the scheme here as well? + if path and not path.endswith("/"): + path = posixpath.dirname(path) + "/" + parts = scheme, netloc, path, params, query, fragment + source.baseURI = urlparse.urlunparse(parts) + + return source + + def _get_opener(self): + try: + return self._opener + except AttributeError: + self._opener = self._create_opener() + return self._opener + + def _create_opener(self): + import urllib2 + return urllib2.build_opener() + + def _guess_media_encoding(self, source): + info = source.byteStream.info() + if info.has_key("Content-Type"): + for param in info.getplist(): + if param.startswith("charset="): + return param.split("=", 1)[1].lower() + + +class DOMInputSource(NewStyle): + __slots__ = ('byteStream', 'characterStream', 'stringData', + 'encoding', 'publicId', 'systemId', 'baseURI') + + def __init__(self): + self.byteStream = None + self.characterStream = None + self.stringData = None + self.encoding = None + self.publicId = None + self.systemId = None + self.baseURI = None + + def _get_byteStream(self): + return self.byteStream + def _set_byteStream(self, byteStream): + self.byteStream = byteStream + + def _get_characterStream(self): + return self.characterStream + def _set_characterStream(self, characterStream): + self.characterStream = characterStream + + def _get_stringData(self): + return self.stringData + def _set_stringData(self, data): + self.stringData = data + + def _get_encoding(self): + return self.encoding + def _set_encoding(self, encoding): + self.encoding = encoding + + def _get_publicId(self): + return self.publicId + def _set_publicId(self, publicId): + self.publicId = publicId + + def _get_systemId(self): + return self.systemId + def _set_systemId(self, systemId): + self.systemId = systemId + + def _get_baseURI(self): + return self.baseURI + def _set_baseURI(self, uri): + self.baseURI = uri + + +class DOMBuilderFilter: + """Element filter which can be used to tailor construction of + a DOM instance. + """ + + # There's really no need for this class; concrete implementations + # should just implement the endElement() and startElement() + # methods as appropriate. Using this makes it easy to only + # implement one of them. + + FILTER_ACCEPT = 1 + FILTER_REJECT = 2 + FILTER_SKIP = 3 + FILTER_INTERRUPT = 4 + + whatToShow = NodeFilter.SHOW_ALL + + def _get_whatToShow(self): + return self.whatToShow + + def acceptNode(self, element): + return self.FILTER_ACCEPT + + def startContainer(self, element): + return self.FILTER_ACCEPT + +del NodeFilter + + +class DocumentLS: + """Mixin to create documents that conform to the load/save spec.""" + + async = False + + def _get_async(self): + return False + def _set_async(self, async): + if async: + raise xmlcore.dom.NotSupportedErr( + "asynchronous document loading is not supported") + + def abort(self): + # What does it mean to "clear" a document? Does the + # documentElement disappear? + raise NotImplementedError( + "haven't figured out what this means yet") + + def load(self, uri): + raise NotImplementedError("haven't written this yet") + + def loadXML(self, source): + raise NotImplementedError("haven't written this yet") + + def saveXML(self, snode): + if snode is None: + snode = self + elif snode.ownerDocument is not self: + raise xmlcore.dom.WrongDocumentErr() + return snode.toxml() + + +class DOMImplementationLS: + MODE_SYNCHRONOUS = 1 + MODE_ASYNCHRONOUS = 2 + + def createDOMBuilder(self, mode, schemaType): + if schemaType is not None: + raise xmlcore.dom.NotSupportedErr( + "schemaType not yet supported") + if mode == self.MODE_SYNCHRONOUS: + return DOMBuilder() + if mode == self.MODE_ASYNCHRONOUS: + raise xmlcore.dom.NotSupportedErr( + "asynchronous builders are not supported") + raise ValueError("unknown value for mode") + + def createDOMWriter(self): + raise NotImplementedError( + "the writer interface hasn't been written yet!") + + def createDOMInputSource(self): + return DOMInputSource() diff --git a/Lib/xmlcore/etree/ElementInclude.py b/Lib/xmlcore/etree/ElementInclude.py new file mode 100644 index 0000000..5ee199a --- /dev/null +++ b/Lib/xmlcore/etree/ElementInclude.py @@ -0,0 +1,141 @@ +# +# ElementTree +# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $ +# +# limited xinclude support for element trees +# +# history: +# 2003-08-15 fl created +# 2003-11-14 fl fixed default loader +# +# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2004 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +## +# Limited XInclude support for the ElementTree package. +## + +import copy +import ElementTree + +XINCLUDE = "{http://www.w3.org/2001/XInclude}" + +XINCLUDE_INCLUDE = XINCLUDE + "include" +XINCLUDE_FALLBACK = XINCLUDE + "fallback" + +## +# Fatal include error. + +class FatalIncludeError(SyntaxError): + pass + +## +# Default loader. This loader reads an included resource from disk. +# +# @param href Resource reference. +# @param parse Parse mode. Either "xml" or "text". +# @param encoding Optional text encoding. +# @return The expanded resource. If the parse mode is "xml", this +# is an ElementTree instance. If the parse mode is "text", this +# is a Unicode string. If the loader fails, it can return None +# or raise an IOError exception. +# @throws IOError If the loader fails to load the resource. + +def default_loader(href, parse, encoding=None): + file = open(href) + if parse == "xml": + data = ElementTree.parse(file).getroot() + else: + data = file.read() + if encoding: + data = data.decode(encoding) + file.close() + return data + +## +# Expand XInclude directives. +# +# @param elem Root element. +# @param loader Optional resource loader. If omitted, it defaults +# to {@link default_loader}. If given, it should be a callable +# that implements the same interface as default_loader. +# @throws FatalIncludeError If the function fails to include a given +# resource, or if the tree contains malformed XInclude elements. +# @throws IOError If the function fails to load a given resource. + +def include(elem, loader=None): + if loader is None: + loader = default_loader + # look for xinclude elements + i = 0 + while i < len(elem): + e = elem[i] + if e.tag == XINCLUDE_INCLUDE: + # process xinclude directive + href = e.get("href") + parse = e.get("parse", "xml") + if parse == "xml": + node = loader(href, parse) + if node is None: + raise FatalIncludeError( + "cannot load %r as %r" % (href, parse) + ) + node = copy.copy(node) + if e.tail: + node.tail = (node.tail or "") + e.tail + elem[i] = node + elif parse == "text": + text = loader(href, parse, e.get("encoding")) + if text is None: + raise FatalIncludeError( + "cannot load %r as %r" % (href, parse) + ) + if i: + node = elem[i-1] + node.tail = (node.tail or "") + text + else: + elem.text = (elem.text or "") + text + (e.tail or "") + del elem[i] + continue + else: + raise FatalIncludeError( + "unknown parse type in xi:include tag (%r)" % parse + ) + elif e.tag == XINCLUDE_FALLBACK: + raise FatalIncludeError( + "xi:fallback tag must be child of xi:include (%r)" % e.tag + ) + else: + include(e, loader) + i = i + 1 + diff --git a/Lib/xmlcore/etree/ElementPath.py b/Lib/xmlcore/etree/ElementPath.py new file mode 100644 index 0000000..e8093c6 --- /dev/null +++ b/Lib/xmlcore/etree/ElementPath.py @@ -0,0 +1,196 @@ +# +# ElementTree +# $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $ +# +# limited xpath support for element trees +# +# history: +# 2003-05-23 fl created +# 2003-05-28 fl added support for // etc +# 2003-08-27 fl fixed parsing of periods in element names +# +# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2004 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +## +# Implementation module for XPath support. There's usually no reason +# to import this module directly; the ElementTree does this for +# you, if needed. +## + +import re + +xpath_tokenizer = re.compile( + "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+" + ).findall + +class xpath_descendant_or_self: + pass + +## +# Wrapper for a compiled XPath. + +class Path: + + ## + # Create an Path instance from an XPath expression. + + def __init__(self, path): + tokens = xpath_tokenizer(path) + # the current version supports 'path/path'-style expressions only + self.path = [] + self.tag = None + if tokens and tokens[0][0] == "/": + raise SyntaxError("cannot use absolute path on element") + while tokens: + op, tag = tokens.pop(0) + if tag or op == "*": + self.path.append(tag or op) + elif op == ".": + pass + elif op == "/": + self.path.append(xpath_descendant_or_self()) + continue + else: + raise SyntaxError("unsupported path syntax (%s)" % op) + if tokens: + op, tag = tokens.pop(0) + if op != "/": + raise SyntaxError( + "expected path separator (%s)" % (op or tag) + ) + if self.path and isinstance(self.path[-1], xpath_descendant_or_self): + raise SyntaxError("path cannot end with //") + if len(self.path) == 1 and isinstance(self.path[0], type("")): + self.tag = self.path[0] + + ## + # Find first matching object. + + def find(self, element): + tag = self.tag + if tag is None: + nodeset = self.findall(element) + if not nodeset: + return None + return nodeset[0] + for elem in element: + if elem.tag == tag: + return elem + return None + + ## + # Find text for first matching object. + + def findtext(self, element, default=None): + tag = self.tag + if tag is None: + nodeset = self.findall(element) + if not nodeset: + return default + return nodeset[0].text or "" + for elem in element: + if elem.tag == tag: + return elem.text or "" + return default + + ## + # Find all matching objects. + + def findall(self, element): + nodeset = [element] + index = 0 + while 1: + try: + path = self.path[index] + index = index + 1 + except IndexError: + return nodeset + set = [] + if isinstance(path, xpath_descendant_or_self): + try: + tag = self.path[index] + if not isinstance(tag, type("")): + tag = None + else: + index = index + 1 + except IndexError: + tag = None # invalid path + for node in nodeset: + new = list(node.getiterator(tag)) + if new and new[0] is node: + set.extend(new[1:]) + else: + set.extend(new) + else: + for node in nodeset: + for node in node: + if path == "*" or node.tag == path: + set.append(node) + if not set: + return [] + nodeset = set + +_cache = {} + +## +# (Internal) Compile path. + +def _compile(path): + p = _cache.get(path) + if p is not None: + return p + p = Path(path) + if len(_cache) >= 100: + _cache.clear() + _cache[path] = p + return p + +## +# Find first matching object. + +def find(element, path): + return _compile(path).find(element) + +## +# Find text for first matching object. + +def findtext(element, path, default=None): + return _compile(path).findtext(element, default) + +## +# Find all matching objects. + +def findall(element, path): + return _compile(path).findall(element) + diff --git a/Lib/xmlcore/etree/ElementTree.py b/Lib/xmlcore/etree/ElementTree.py new file mode 100644 index 0000000..c18f7ff --- /dev/null +++ b/Lib/xmlcore/etree/ElementTree.py @@ -0,0 +1,1254 @@ +# +# ElementTree +# $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $ +# +# light-weight XML support for Python 1.5.2 and later. +# +# history: +# 2001-10-20 fl created (from various sources) +# 2001-11-01 fl return root from parse method +# 2002-02-16 fl sort attributes in lexical order +# 2002-04-06 fl TreeBuilder refactoring, added PythonDoc markup +# 2002-05-01 fl finished TreeBuilder refactoring +# 2002-07-14 fl added basic namespace support to ElementTree.write +# 2002-07-25 fl added QName attribute support +# 2002-10-20 fl fixed encoding in write +# 2002-11-24 fl changed default encoding to ascii; fixed attribute encoding +# 2002-11-27 fl accept file objects or file names for parse/write +# 2002-12-04 fl moved XMLTreeBuilder back to this module +# 2003-01-11 fl fixed entity encoding glitch for us-ascii +# 2003-02-13 fl added XML literal factory +# 2003-02-21 fl added ProcessingInstruction/PI factory +# 2003-05-11 fl added tostring/fromstring helpers +# 2003-05-26 fl added ElementPath support +# 2003-07-05 fl added makeelement factory method +# 2003-07-28 fl added more well-known namespace prefixes +# 2003-08-15 fl fixed typo in ElementTree.findtext (Thomas Dartsch) +# 2003-09-04 fl fall back on emulator if ElementPath is not installed +# 2003-10-31 fl markup updates +# 2003-11-15 fl fixed nested namespace bug +# 2004-03-28 fl added XMLID helper +# 2004-06-02 fl added default support to findtext +# 2004-06-08 fl fixed encoding of non-ascii element/attribute names +# 2004-08-23 fl take advantage of post-2.1 expat features +# 2005-02-01 fl added iterparse implementation +# 2005-03-02 fl fixed iterparse support for pre-2.2 versions +# +# Copyright (c) 1999-2005 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2005 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +__all__ = [ + # public symbols + "Comment", + "dump", + "Element", "ElementTree", + "fromstring", + "iselement", "iterparse", + "parse", + "PI", "ProcessingInstruction", + "QName", + "SubElement", + "tostring", + "TreeBuilder", + "VERSION", "XML", + "XMLTreeBuilder", + ] + +## +# The Element type is a flexible container object, designed to +# store hierarchical data structures in memory. The type can be +# described as a cross between a list and a dictionary. +#

+# Each element has a number of properties associated with it: +#

    +#
  • a tag. This is a string identifying what kind of data +# this element represents (the element type, in other words).
  • +#
  • a number of attributes, stored in a Python dictionary.
  • +#
  • a text string.
  • +#
  • an optional tail string.
  • +#
  • a number of child elements, stored in a Python sequence
  • +#
+# +# To create an element instance, use the {@link #Element} or {@link +# #SubElement} factory functions. +#

+# The {@link #ElementTree} class can be used to wrap an element +# structure, and convert it from and to XML. +## + +import string, sys, re + +class _SimpleElementPath: + # emulate pre-1.2 find/findtext/findall behaviour + def find(self, element, tag): + for elem in element: + if elem.tag == tag: + return elem + return None + def findtext(self, element, tag, default=None): + for elem in element: + if elem.tag == tag: + return elem.text or "" + return default + def findall(self, element, tag): + if tag[:3] == ".//": + return element.getiterator(tag[3:]) + result = [] + for elem in element: + if elem.tag == tag: + result.append(elem) + return result + +try: + import ElementPath +except ImportError: + # FIXME: issue warning in this case? + ElementPath = _SimpleElementPath() + +# TODO: add support for custom namespace resolvers/default namespaces +# TODO: add improved support for incremental parsing + +VERSION = "1.2.6" + +## +# Internal element class. This class defines the Element interface, +# and provides a reference implementation of this interface. +#

+# You should not create instances of this class directly. Use the +# appropriate factory functions instead, such as {@link #Element} +# and {@link #SubElement}. +# +# @see Element +# @see SubElement +# @see Comment +# @see ProcessingInstruction + +class _ElementInterface: + # text...tail + + ## + # (Attribute) Element tag. + + tag = None + + ## + # (Attribute) Element attribute dictionary. Where possible, use + # {@link #_ElementInterface.get}, + # {@link #_ElementInterface.set}, + # {@link #_ElementInterface.keys}, and + # {@link #_ElementInterface.items} to access + # element attributes. + + attrib = None + + ## + # (Attribute) Text before first subelement. This is either a + # string or the value None, if there was no text. + + text = None + + ## + # (Attribute) Text after this element's end tag, but before the + # next sibling element's start tag. This is either a string or + # the value None, if there was no text. + + tail = None # text after end tag, if any + + def __init__(self, tag, attrib): + self.tag = tag + self.attrib = attrib + self._children = [] + + def __repr__(self): + return "" % (self.tag, id(self)) + + ## + # Creates a new element object of the same type as this element. + # + # @param tag Element tag. + # @param attrib Element attributes, given as a dictionary. + # @return A new element instance. + + def makeelement(self, tag, attrib): + return Element(tag, attrib) + + ## + # Returns the number of subelements. + # + # @return The number of subelements. + + def __len__(self): + return len(self._children) + + ## + # Returns the given subelement. + # + # @param index What subelement to return. + # @return The given subelement. + # @exception IndexError If the given element does not exist. + + def __getitem__(self, index): + return self._children[index] + + ## + # Replaces the given subelement. + # + # @param index What subelement to replace. + # @param element The new element value. + # @exception IndexError If the given element does not exist. + # @exception AssertionError If element is not a valid object. + + def __setitem__(self, index, element): + assert iselement(element) + self._children[index] = element + + ## + # Deletes the given subelement. + # + # @param index What subelement to delete. + # @exception IndexError If the given element does not exist. + + def __delitem__(self, index): + del self._children[index] + + ## + # Returns a list containing subelements in the given range. + # + # @param start The first subelement to return. + # @param stop The first subelement that shouldn't be returned. + # @return A sequence object containing subelements. + + def __getslice__(self, start, stop): + return self._children[start:stop] + + ## + # Replaces a number of subelements with elements from a sequence. + # + # @param start The first subelement to replace. + # @param stop The first subelement that shouldn't be replaced. + # @param elements A sequence object with zero or more elements. + # @exception AssertionError If a sequence member is not a valid object. + + def __setslice__(self, start, stop, elements): + for element in elements: + assert iselement(element) + self._children[start:stop] = list(elements) + + ## + # Deletes a number of subelements. + # + # @param start The first subelement to delete. + # @param stop The first subelement to leave in there. + + def __delslice__(self, start, stop): + del self._children[start:stop] + + ## + # Adds a subelement to the end of this element. + # + # @param element The element to add. + # @exception AssertionError If a sequence member is not a valid object. + + def append(self, element): + assert iselement(element) + self._children.append(element) + + ## + # Inserts a subelement at the given position in this element. + # + # @param index Where to insert the new subelement. + # @exception AssertionError If the element is not a valid object. + + def insert(self, index, element): + assert iselement(element) + self._children.insert(index, element) + + ## + # Removes a matching subelement. Unlike the find methods, + # this method compares elements based on identity, not on tag + # value or contents. + # + # @param element What element to remove. + # @exception ValueError If a matching element could not be found. + # @exception AssertionError If the element is not a valid object. + + def remove(self, element): + assert iselement(element) + self._children.remove(element) + + ## + # Returns all subelements. The elements are returned in document + # order. + # + # @return A list of subelements. + # @defreturn list of Element instances + + def getchildren(self): + return self._children + + ## + # Finds the first matching subelement, by tag name or path. + # + # @param path What element to look for. + # @return The first matching element, or None if no element was found. + # @defreturn Element or None + + def find(self, path): + return ElementPath.find(self, path) + + ## + # Finds text for the first matching subelement, by tag name or path. + # + # @param path What element to look for. + # @param default What to return if the element was not found. + # @return The text content of the first matching element, or the + # default value no element was found. Note that if the element + # has is found, but has no text content, this method returns an + # empty string. + # @defreturn string + + def findtext(self, path, default=None): + return ElementPath.findtext(self, path, default) + + ## + # Finds all matching subelements, by tag name or path. + # + # @param path What element to look for. + # @return A list or iterator containing all matching elements, + # in document order. + # @defreturn list of Element instances + + def findall(self, path): + return ElementPath.findall(self, path) + + ## + # Resets an element. This function removes all subelements, clears + # all attributes, and sets the text and tail attributes to None. + + def clear(self): + self.attrib.clear() + self._children = [] + self.text = self.tail = None + + ## + # Gets an element attribute. + # + # @param key What attribute to look for. + # @param default What to return if the attribute was not found. + # @return The attribute value, or the default value, if the + # attribute was not found. + # @defreturn string or None + + def get(self, key, default=None): + return self.attrib.get(key, default) + + ## + # Sets an element attribute. + # + # @param key What attribute to set. + # @param value The attribute value. + + def set(self, key, value): + self.attrib[key] = value + + ## + # Gets a list of attribute names. The names are returned in an + # arbitrary order (just like for an ordinary Python dictionary). + # + # @return A list of element attribute names. + # @defreturn list of strings + + def keys(self): + return self.attrib.keys() + + ## + # Gets element attributes, as a sequence. The attributes are + # returned in an arbitrary order. + # + # @return A list of (name, value) tuples for all attributes. + # @defreturn list of (string, string) tuples + + def items(self): + return self.attrib.items() + + ## + # Creates a tree iterator. The iterator loops over this element + # and all subelements, in document order, and returns all elements + # with a matching tag. + #

+ # If the tree structure is modified during iteration, the result + # is undefined. + # + # @param tag What tags to look for (default is to return all elements). + # @return A list or iterator containing all the matching elements. + # @defreturn list or iterator + + def getiterator(self, tag=None): + nodes = [] + if tag == "*": + tag = None + if tag is None or self.tag == tag: + nodes.append(self) + for node in self._children: + nodes.extend(node.getiterator(tag)) + return nodes + +# compatibility +_Element = _ElementInterface + +## +# Element factory. This function returns an object implementing the +# standard Element interface. The exact class or type of that object +# is implementation dependent, but it will always be compatible with +# the {@link #_ElementInterface} class in this module. +#

+# The element name, attribute names, and attribute values can be +# either 8-bit ASCII strings or Unicode strings. +# +# @param tag The element name. +# @param attrib An optional dictionary, containing element attributes. +# @param **extra Additional attributes, given as keyword arguments. +# @return An element instance. +# @defreturn Element + +def Element(tag, attrib={}, **extra): + attrib = attrib.copy() + attrib.update(extra) + return _ElementInterface(tag, attrib) + +## +# Subelement factory. This function creates an element instance, and +# appends it to an existing element. +#

+# The element name, attribute names, and attribute values can be +# either 8-bit ASCII strings or Unicode strings. +# +# @param parent The parent element. +# @param tag The subelement name. +# @param attrib An optional dictionary, containing element attributes. +# @param **extra Additional attributes, given as keyword arguments. +# @return An element instance. +# @defreturn Element + +def SubElement(parent, tag, attrib={}, **extra): + attrib = attrib.copy() + attrib.update(extra) + element = parent.makeelement(tag, attrib) + parent.append(element) + return element + +## +# Comment element factory. This factory function creates a special +# element that will be serialized as an XML comment. +#

+# The comment string can be either an 8-bit ASCII string or a Unicode +# string. +# +# @param text A string containing the comment string. +# @return An element instance, representing a comment. +# @defreturn Element + +def Comment(text=None): + element = Element(Comment) + element.text = text + return element + +## +# PI element factory. This factory function creates a special element +# that will be serialized as an XML processing instruction. +# +# @param target A string containing the PI target. +# @param text A string containing the PI contents, if any. +# @return An element instance, representing a PI. +# @defreturn Element + +def ProcessingInstruction(target, text=None): + element = Element(ProcessingInstruction) + element.text = target + if text: + element.text = element.text + " " + text + return element + +PI = ProcessingInstruction + +## +# QName wrapper. This can be used to wrap a QName attribute value, in +# order to get proper namespace handling on output. +# +# @param text A string containing the QName value, in the form {uri}local, +# or, if the tag argument is given, the URI part of a QName. +# @param tag Optional tag. If given, the first argument is interpreted as +# an URI, and this argument is interpreted as a local name. +# @return An opaque object, representing the QName. + +class QName: + def __init__(self, text_or_uri, tag=None): + if tag: + text_or_uri = "{%s}%s" % (text_or_uri, tag) + self.text = text_or_uri + def __str__(self): + return self.text + def __hash__(self): + return hash(self.text) + def __cmp__(self, other): + if isinstance(other, QName): + return cmp(self.text, other.text) + return cmp(self.text, other) + +## +# ElementTree wrapper class. This class represents an entire element +# hierarchy, and adds some extra support for serialization to and from +# standard XML. +# +# @param element Optional root element. +# @keyparam file Optional file handle or name. If given, the +# tree is initialized with the contents of this XML file. + +class ElementTree: + + def __init__(self, element=None, file=None): + assert element is None or iselement(element) + self._root = element # first node + if file: + self.parse(file) + + ## + # Gets the root element for this tree. + # + # @return An element instance. + # @defreturn Element + + def getroot(self): + return self._root + + ## + # Replaces the root element for this tree. This discards the + # current contents of the tree, and replaces it with the given + # element. Use with care. + # + # @param element An element instance. + + def _setroot(self, element): + assert iselement(element) + self._root = element + + ## + # Loads an external XML document into this element tree. + # + # @param source A file name or file object. + # @param parser An optional parser instance. If not given, the + # standard {@link XMLTreeBuilder} parser is used. + # @return The document root element. + # @defreturn Element + + def parse(self, source, parser=None): + if not hasattr(source, "read"): + source = open(source, "rb") + if not parser: + parser = XMLTreeBuilder() + while 1: + data = source.read(32768) + if not data: + break + parser.feed(data) + self._root = parser.close() + return self._root + + ## + # Creates a tree iterator for the root element. The iterator loops + # over all elements in this tree, in document order. + # + # @param tag What tags to look for (default is to return all elements) + # @return An iterator. + # @defreturn iterator + + def getiterator(self, tag=None): + assert self._root is not None + return self._root.getiterator(tag) + + ## + # Finds the first toplevel element with given tag. + # Same as getroot().find(path). + # + # @param path What element to look for. + # @return The first matching element, or None if no element was found. + # @defreturn Element or None + + def find(self, path): + assert self._root is not None + if path[:1] == "/": + path = "." + path + return self._root.find(path) + + ## + # Finds the element text for the first toplevel element with given + # tag. Same as getroot().findtext(path). + # + # @param path What toplevel element to look for. + # @param default What to return if the element was not found. + # @return The text content of the first matching element, or the + # default value no element was found. Note that if the element + # has is found, but has no text content, this method returns an + # empty string. + # @defreturn string + + def findtext(self, path, default=None): + assert self._root is not None + if path[:1] == "/": + path = "." + path + return self._root.findtext(path, default) + + ## + # Finds all toplevel elements with the given tag. + # Same as getroot().findall(path). + # + # @param path What element to look for. + # @return A list or iterator containing all matching elements, + # in document order. + # @defreturn list of Element instances + + def findall(self, path): + assert self._root is not None + if path[:1] == "/": + path = "." + path + return self._root.findall(path) + + ## + # Writes the element tree to a file, as XML. + # + # @param file A file name, or a file object opened for writing. + # @param encoding Optional output encoding (default is US-ASCII). + + def write(self, file, encoding="us-ascii"): + assert self._root is not None + if not hasattr(file, "write"): + file = open(file, "wb") + if not encoding: + encoding = "us-ascii" + elif encoding != "utf-8" and encoding != "us-ascii": + file.write("\n" % encoding) + self._write(file, self._root, encoding, {}) + + def _write(self, file, node, encoding, namespaces): + # write XML to file + tag = node.tag + if tag is Comment: + file.write("" % _escape_cdata(node.text, encoding)) + elif tag is ProcessingInstruction: + file.write("" % _escape_cdata(node.text, encoding)) + else: + items = node.items() + xmlns_items = [] # new namespaces in this scope + try: + if isinstance(tag, QName) or tag[:1] == "{": + tag, xmlns = fixtag(tag, namespaces) + if xmlns: xmlns_items.append(xmlns) + except TypeError: + _raise_serialization_error(tag) + file.write("<" + _encode(tag, encoding)) + if items or xmlns_items: + items.sort() # lexical order + for k, v in items: + try: + if isinstance(k, QName) or k[:1] == "{": + k, xmlns = fixtag(k, namespaces) + if xmlns: xmlns_items.append(xmlns) + except TypeError: + _raise_serialization_error(k) + try: + if isinstance(v, QName): + v, xmlns = fixtag(v, namespaces) + if xmlns: xmlns_items.append(xmlns) + except TypeError: + _raise_serialization_error(v) + file.write(" %s=\"%s\"" % (_encode(k, encoding), + _escape_attrib(v, encoding))) + for k, v in xmlns_items: + file.write(" %s=\"%s\"" % (_encode(k, encoding), + _escape_attrib(v, encoding))) + if node.text or len(node): + file.write(">") + if node.text: + file.write(_escape_cdata(node.text, encoding)) + for n in node: + self._write(file, n, encoding, namespaces) + file.write("") + else: + file.write(" />") + for k, v in xmlns_items: + del namespaces[v] + if node.tail: + file.write(_escape_cdata(node.tail, encoding)) + +# -------------------------------------------------------------------- +# helpers + +## +# Checks if an object appears to be a valid element object. +# +# @param An element instance. +# @return A true value if this is an element object. +# @defreturn flag + +def iselement(element): + # FIXME: not sure about this; might be a better idea to look + # for tag/attrib/text attributes + return isinstance(element, _ElementInterface) or hasattr(element, "tag") + +## +# Writes an element tree or element structure to sys.stdout. This +# function should be used for debugging only. +#

+# The exact output format is implementation dependent. In this +# version, it's written as an ordinary XML file. +# +# @param elem An element tree or an individual element. + +def dump(elem): + # debugging + if not isinstance(elem, ElementTree): + elem = ElementTree(elem) + elem.write(sys.stdout) + tail = elem.getroot().tail + if not tail or tail[-1] != "\n": + sys.stdout.write("\n") + +def _encode(s, encoding): + try: + return s.encode(encoding) + except AttributeError: + return s # 1.5.2: assume the string uses the right encoding + +if sys.version[:3] == "1.5": + _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2 +else: + _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"')) + +_escape_map = { + "&": "&", + "<": "<", + ">": ">", + '"': """, +} + +_namespace_map = { + # "well-known" namespace prefixes + "http://www.w3.org/XML/1998/namespace": "xml", + "http://www.w3.org/1999/xhtml": "html", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", + "http://schemas.xmlsoap.org/wsdl/": "wsdl", +} + +def _raise_serialization_error(text): + raise TypeError( + "cannot serialize %r (type %s)" % (text, type(text).__name__) + ) + +def _encode_entity(text, pattern=_escape): + # map reserved and non-ascii characters to numerical entities + def escape_entities(m, map=_escape_map): + out = [] + append = out.append + for char in m.group(): + text = map.get(char) + if text is None: + text = "&#%d;" % ord(char) + append(text) + return string.join(out, "") + try: + return _encode(pattern.sub(escape_entities, text), "ascii") + except TypeError: + _raise_serialization_error(text) + +# +# the following functions assume an ascii-compatible encoding +# (or "utf-16") + +def _escape_cdata(text, encoding=None, replace=string.replace): + # escape character data + try: + if encoding: + try: + text = _encode(text, encoding) + except UnicodeError: + return _encode_entity(text) + text = replace(text, "&", "&") + text = replace(text, "<", "<") + text = replace(text, ">", ">") + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_attrib(text, encoding=None, replace=string.replace): + # escape attribute value + try: + if encoding: + try: + text = _encode(text, encoding) + except UnicodeError: + return _encode_entity(text) + text = replace(text, "&", "&") + text = replace(text, "'", "'") # FIXME: overkill + text = replace(text, "\"", """) + text = replace(text, "<", "<") + text = replace(text, ">", ">") + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def fixtag(tag, namespaces): + # given a decorated tag (of the form {uri}tag), return prefixed + # tag and namespace declaration, if any + if isinstance(tag, QName): + tag = tag.text + namespace_uri, tag = string.split(tag[1:], "}", 1) + prefix = namespaces.get(namespace_uri) + if prefix is None: + prefix = _namespace_map.get(namespace_uri) + if prefix is None: + prefix = "ns%d" % len(namespaces) + namespaces[namespace_uri] = prefix + if prefix == "xml": + xmlns = None + else: + xmlns = ("xmlns:%s" % prefix, namespace_uri) + else: + xmlns = None + return "%s:%s" % (prefix, tag), xmlns + +## +# Parses an XML document into an element tree. +# +# @param source A filename or file object containing XML data. +# @param parser An optional parser instance. If not given, the +# standard {@link XMLTreeBuilder} parser is used. +# @return An ElementTree instance + +def parse(source, parser=None): + tree = ElementTree() + tree.parse(source, parser) + return tree + +## +# Parses an XML document into an element tree incrementally, and reports +# what's going on to the user. +# +# @param source A filename or file object containing XML data. +# @param events A list of events to report back. If omitted, only "end" +# events are reported. +# @return A (event, elem) iterator. + +class iterparse: + + def __init__(self, source, events=None): + if not hasattr(source, "read"): + source = open(source, "rb") + self._file = source + self._events = [] + self._index = 0 + self.root = self._root = None + self._parser = XMLTreeBuilder() + # wire up the parser for event reporting + parser = self._parser._parser + append = self._events.append + if events is None: + events = ["end"] + for event in events: + if event == "start": + try: + parser.ordered_attributes = 1 + parser.specified_attributes = 1 + def handler(tag, attrib_in, event=event, append=append, + start=self._parser._start_list): + append((event, start(tag, attrib_in))) + parser.StartElementHandler = handler + except AttributeError: + def handler(tag, attrib_in, event=event, append=append, + start=self._parser._start): + append((event, start(tag, attrib_in))) + parser.StartElementHandler = handler + elif event == "end": + def handler(tag, event=event, append=append, + end=self._parser._end): + append((event, end(tag))) + parser.EndElementHandler = handler + elif event == "start-ns": + def handler(prefix, uri, event=event, append=append): + try: + uri = _encode(uri, "ascii") + except UnicodeError: + pass + append((event, (prefix or "", uri))) + parser.StartNamespaceDeclHandler = handler + elif event == "end-ns": + def handler(prefix, event=event, append=append): + append((event, None)) + parser.EndNamespaceDeclHandler = handler + + def next(self): + while 1: + try: + item = self._events[self._index] + except IndexError: + if self._parser is None: + self.root = self._root + try: + raise StopIteration + except NameError: + raise IndexError + # load event buffer + del self._events[:] + self._index = 0 + data = self._file.read(16384) + if data: + self._parser.feed(data) + else: + self._root = self._parser.close() + self._parser = None + else: + self._index = self._index + 1 + return item + + try: + iter + def __iter__(self): + return self + except NameError: + def __getitem__(self, index): + return self.next() + +## +# Parses an XML document from a string constant. This function can +# be used to embed "XML literals" in Python code. +# +# @param source A string containing XML data. +# @return An Element instance. +# @defreturn Element + +def XML(text): + parser = XMLTreeBuilder() + parser.feed(text) + return parser.close() + +## +# Parses an XML document from a string constant, and also returns +# a dictionary which maps from element id:s to elements. +# +# @param source A string containing XML data. +# @return A tuple containing an Element instance and a dictionary. +# @defreturn (Element, dictionary) + +def XMLID(text): + parser = XMLTreeBuilder() + parser.feed(text) + tree = parser.close() + ids = {} + for elem in tree.getiterator(): + id = elem.get("id") + if id: + ids[id] = elem + return tree, ids + +## +# Parses an XML document from a string constant. Same as {@link #XML}. +# +# @def fromstring(text) +# @param source A string containing XML data. +# @return An Element instance. +# @defreturn Element + +fromstring = XML + +## +# Generates a string representation of an XML element, including all +# subelements. +# +# @param element An Element instance. +# @return An encoded string containing the XML data. +# @defreturn string + +def tostring(element, encoding=None): + class dummy: + pass + data = [] + file = dummy() + file.write = data.append + ElementTree(element).write(file, encoding) + return string.join(data, "") + +## +# Generic element structure builder. This builder converts a sequence +# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link +# #TreeBuilder.end} method calls to a well-formed element structure. +#

+# You can use this class to build an element structure using a custom XML +# parser, or a parser for some other XML-like format. +# +# @param element_factory Optional element factory. This factory +# is called to create new Element instances, as necessary. + +class TreeBuilder: + + def __init__(self, element_factory=None): + self._data = [] # data collector + self._elem = [] # element stack + self._last = None # last element + self._tail = None # true if we're after an end tag + if element_factory is None: + element_factory = _ElementInterface + self._factory = element_factory + + ## + # Flushes the parser buffers, and returns the toplevel documen + # element. + # + # @return An Element instance. + # @defreturn Element + + def close(self): + assert len(self._elem) == 0, "missing end tags" + assert self._last != None, "missing toplevel element" + return self._last + + def _flush(self): + if self._data: + if self._last is not None: + text = string.join(self._data, "") + if self._tail: + assert self._last.tail is None, "internal error (tail)" + self._last.tail = text + else: + assert self._last.text is None, "internal error (text)" + self._last.text = text + self._data = [] + + ## + # Adds text to the current element. + # + # @param data A string. This should be either an 8-bit string + # containing ASCII text, or a Unicode string. + + def data(self, data): + self._data.append(data) + + ## + # Opens a new element. + # + # @param tag The element name. + # @param attrib A dictionary containing element attributes. + # @return The opened element. + # @defreturn Element + + def start(self, tag, attrs): + self._flush() + self._last = elem = self._factory(tag, attrs) + if self._elem: + self._elem[-1].append(elem) + self._elem.append(elem) + self._tail = 0 + return elem + + ## + # Closes the current element. + # + # @param tag The element name. + # @return The closed element. + # @defreturn Element + + def end(self, tag): + self._flush() + self._last = self._elem.pop() + assert self._last.tag == tag,\ + "end tag mismatch (expected %s, got %s)" % ( + self._last.tag, tag) + self._tail = 1 + return self._last + +## +# Element structure builder for XML source data, based on the +# expat parser. +# +# @keyparam target Target object. If omitted, the builder uses an +# instance of the standard {@link #TreeBuilder} class. +# @keyparam html Predefine HTML entities. This flag is not supported +# by the current implementation. +# @see #ElementTree +# @see #TreeBuilder + +class XMLTreeBuilder: + + def __init__(self, html=0, target=None): + try: + from xmlcore.parsers import expat + except ImportError: + raise ImportError( + "No module named expat; use SimpleXMLTreeBuilder instead" + ) + self._parser = parser = expat.ParserCreate(None, "}") + if target is None: + target = TreeBuilder() + self._target = target + self._names = {} # name memo cache + # callbacks + parser.DefaultHandlerExpand = self._default + parser.StartElementHandler = self._start + parser.EndElementHandler = self._end + parser.CharacterDataHandler = self._data + # let expat do the buffering, if supported + try: + self._parser.buffer_text = 1 + except AttributeError: + pass + # use new-style attribute handling, if supported + try: + self._parser.ordered_attributes = 1 + self._parser.specified_attributes = 1 + parser.StartElementHandler = self._start_list + except AttributeError: + pass + encoding = None + if not parser.returns_unicode: + encoding = "utf-8" + # target.xml(encoding, None) + self._doctype = None + self.entity = {} + + def _fixtext(self, text): + # convert text string to ascii, if possible + try: + return _encode(text, "ascii") + except UnicodeError: + return text + + def _fixname(self, key): + # expand qname, and convert name string to ascii, if possible + try: + name = self._names[key] + except KeyError: + name = key + if "}" in name: + name = "{" + name + self._names[key] = name = self._fixtext(name) + return name + + def _start(self, tag, attrib_in): + fixname = self._fixname + tag = fixname(tag) + attrib = {} + for key, value in attrib_in.items(): + attrib[fixname(key)] = self._fixtext(value) + return self._target.start(tag, attrib) + + def _start_list(self, tag, attrib_in): + fixname = self._fixname + tag = fixname(tag) + attrib = {} + if attrib_in: + for i in range(0, len(attrib_in), 2): + attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1]) + return self._target.start(tag, attrib) + + def _data(self, text): + return self._target.data(self._fixtext(text)) + + def _end(self, tag): + return self._target.end(self._fixname(tag)) + + def _default(self, text): + prefix = text[:1] + if prefix == "&": + # deal with undefined entities + try: + self._target.data(self.entity[text[1:-1]]) + except KeyError: + from xmlcore.parsers import expat + raise expat.error( + "undefined entity %s: line %d, column %d" % + (text, self._parser.ErrorLineNumber, + self._parser.ErrorColumnNumber) + ) + elif prefix == "<" and text[:9] == "": + self._doctype = None + return + text = string.strip(text) + if not text: + return + self._doctype.append(text) + n = len(self._doctype) + if n > 2: + type = self._doctype[1] + if type == "PUBLIC" and n == 4: + name, type, pubid, system = self._doctype + elif type == "SYSTEM" and n == 3: + name, type, system = self._doctype + pubid = None + else: + return + if pubid: + pubid = pubid[1:-1] + self.doctype(name, pubid, system[1:-1]) + self._doctype = None + + ## + # Handles a doctype declaration. + # + # @param name Doctype name. + # @param pubid Public identifier. + # @param system System identifier. + + def doctype(self, name, pubid, system): + pass + + ## + # Feeds data to the parser. + # + # @param data Encoded data. + + def feed(self, data): + self._parser.Parse(data, 0) + + ## + # Finishes feeding data to the parser. + # + # @return An element structure. + # @defreturn Element + + def close(self): + self._parser.Parse("", 1) # end of data + tree = self._target.close() + del self._target, self._parser # get rid of circular references + return tree diff --git a/Lib/xmlcore/etree/__init__.py b/Lib/xmlcore/etree/__init__.py new file mode 100644 index 0000000..ef8d14c --- /dev/null +++ b/Lib/xmlcore/etree/__init__.py @@ -0,0 +1,30 @@ +# $Id: __init__.py 1821 2004-06-03 16:57:49Z fredrik $ +# elementtree package + +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2004 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- diff --git a/Lib/xmlcore/parsers/__init__.py b/Lib/xmlcore/parsers/__init__.py new file mode 100644 index 0000000..eb314a3 --- /dev/null +++ b/Lib/xmlcore/parsers/__init__.py @@ -0,0 +1,8 @@ +"""Python interfaces to XML parsers. + +This package contains one module: + +expat -- Python wrapper for James Clark's Expat parser, with namespace + support. + +""" diff --git a/Lib/xmlcore/parsers/expat.py b/Lib/xmlcore/parsers/expat.py new file mode 100644 index 0000000..11359a0 --- /dev/null +++ b/Lib/xmlcore/parsers/expat.py @@ -0,0 +1,4 @@ +"""Interface to the Expat non-validating XML parser.""" +__version__ = '$Revision$' + +from pyexpat import * diff --git a/Lib/xmlcore/sax/__init__.py b/Lib/xmlcore/sax/__init__.py new file mode 100644 index 0000000..f1e467c --- /dev/null +++ b/Lib/xmlcore/sax/__init__.py @@ -0,0 +1,108 @@ +"""Simple API for XML (SAX) implementation for Python. + +This module provides an implementation of the SAX 2 interface; +information about the Java version of the interface can be found at +http://www.megginson.com/SAX/. The Python version of the interface is +documented at <...>. + +This package contains the following modules: + +handler -- Base classes and constants which define the SAX 2 API for + the 'client-side' of SAX for Python. + +saxutils -- Implementation of the convenience classes commonly used to + work with SAX. + +xmlreader -- Base classes and constants which define the SAX 2 API for + the parsers used with SAX for Python. + +expatreader -- Driver that allows use of the Expat parser with SAX. +""" + +from xmlreader import InputSource +from handler import ContentHandler, ErrorHandler +from _exceptions import SAXException, SAXNotRecognizedException, \ + SAXParseException, SAXNotSupportedException, \ + SAXReaderNotAvailable + + +def parse(source, handler, errorHandler=ErrorHandler()): + parser = make_parser() + parser.setContentHandler(handler) + parser.setErrorHandler(errorHandler) + parser.parse(source) + +def parseString(string, handler, errorHandler=ErrorHandler()): + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + + if errorHandler is None: + errorHandler = ErrorHandler() + parser = make_parser() + parser.setContentHandler(handler) + parser.setErrorHandler(errorHandler) + + inpsrc = InputSource() + inpsrc.setByteStream(StringIO(string)) + parser.parse(inpsrc) + +# this is the parser list used by the make_parser function if no +# alternatives are given as parameters to the function + +default_parser_list = ["xmlcore.sax.expatreader"] + +# tell modulefinder that importing sax potentially imports expatreader +_false = 0 +if _false: + import xmlcore.sax.expatreader + +import os, sys +if os.environ.has_key("PY_SAX_PARSER"): + default_parser_list = os.environ["PY_SAX_PARSER"].split(",") +del os + +_key = "python.xml.sax.parser" +if sys.platform[:4] == "java" and sys.registry.containsKey(_key): + default_parser_list = sys.registry.getProperty(_key).split(",") + + +def make_parser(parser_list = []): + """Creates and returns a SAX parser. + + Creates the first parser it is able to instantiate of the ones + given in the list created by doing parser_list + + default_parser_list. The lists must contain the names of Python + modules containing both a SAX parser and a create_parser function.""" + + for parser_name in parser_list + default_parser_list: + try: + return _create_parser(parser_name) + except ImportError,e: + import sys + if sys.modules.has_key(parser_name): + # The parser module was found, but importing it + # failed unexpectedly, pass this exception through + raise + except SAXReaderNotAvailable: + # The parser module detected that it won't work properly, + # so try the next one + pass + + raise SAXReaderNotAvailable("No parsers found", None) + +# --- Internal utility methods used by make_parser + +if sys.platform[ : 4] == "java": + def _create_parser(parser_name): + from org.python.core import imp + drv_module = imp.importName(parser_name, 0, globals()) + return drv_module.create_parser() + +else: + def _create_parser(parser_name): + drv_module = __import__(parser_name,{},{},['create_parser']) + return drv_module.create_parser() + +del sys diff --git a/Lib/xmlcore/sax/_exceptions.py b/Lib/xmlcore/sax/_exceptions.py new file mode 100644 index 0000000..fdd614a --- /dev/null +++ b/Lib/xmlcore/sax/_exceptions.py @@ -0,0 +1,131 @@ +"""Different kinds of SAX Exceptions""" +import sys +if sys.platform[:4] == "java": + from java.lang import Exception +del sys + +# ===== SAXEXCEPTION ===== + +class SAXException(Exception): + """Encapsulate an XML error or warning. This class can contain + basic error or warning information from either the XML parser or + the application: you can subclass it to provide additional + functionality, or to add localization. Note that although you will + receive a SAXException as the argument to the handlers in the + ErrorHandler interface, you are not actually required to throw + the exception; instead, you can simply read the information in + it.""" + + def __init__(self, msg, exception=None): + """Creates an exception. The message is required, but the exception + is optional.""" + self._msg = msg + self._exception = exception + Exception.__init__(self, msg) + + def getMessage(self): + "Return a message for this exception." + return self._msg + + def getException(self): + "Return the embedded exception, or None if there was none." + return self._exception + + def __str__(self): + "Create a string representation of the exception." + return self._msg + + def __getitem__(self, ix): + """Avoids weird error messages if someone does exception[ix] by + mistake, since Exception has __getitem__ defined.""" + raise AttributeError("__getitem__") + + +# ===== SAXPARSEEXCEPTION ===== + +class SAXParseException(SAXException): + """Encapsulate an XML parse error or warning. + + This exception will include information for locating the error in + the original XML document. Note that although the application will + receive a SAXParseException as the argument to the handlers in the + ErrorHandler interface, the application is not actually required + to throw the exception; instead, it can simply read the + information in it and take a different action. + + Since this exception is a subclass of SAXException, it inherits + the ability to wrap another exception.""" + + def __init__(self, msg, exception, locator): + "Creates the exception. The exception parameter is allowed to be None." + SAXException.__init__(self, msg, exception) + self._locator = locator + + # We need to cache this stuff at construction time. + # If this exception is thrown, the objects through which we must + # traverse to get this information may be deleted by the time + # it gets caught. + self._systemId = self._locator.getSystemId() + self._colnum = self._locator.getColumnNumber() + self._linenum = self._locator.getLineNumber() + + def getColumnNumber(self): + """The column number of the end of the text where the exception + occurred.""" + return self._colnum + + def getLineNumber(self): + "The line number of the end of the text where the exception occurred." + return self._linenum + + def getPublicId(self): + "Get the public identifier of the entity where the exception occurred." + return self._locator.getPublicId() + + def getSystemId(self): + "Get the system identifier of the entity where the exception occurred." + return self._systemId + + def __str__(self): + "Create a string representation of the exception." + sysid = self.getSystemId() + if sysid is None: + sysid = "" + linenum = self.getLineNumber() + if linenum is None: + linenum = "?" + colnum = self.getColumnNumber() + if colnum is None: + colnum = "?" + return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg) + + +# ===== SAXNOTRECOGNIZEDEXCEPTION ===== + +class SAXNotRecognizedException(SAXException): + """Exception class for an unrecognized identifier. + + An XMLReader will raise this exception when it is confronted with an + unrecognized feature or property. SAX applications and extensions may + use this class for similar purposes.""" + + +# ===== SAXNOTSUPPORTEDEXCEPTION ===== + +class SAXNotSupportedException(SAXException): + """Exception class for an unsupported operation. + + An XMLReader will raise this exception when a service it cannot + perform is requested (specifically setting a state or value). SAX + applications and extensions may use this class for similar + purposes.""" + +# ===== SAXNOTSUPPORTEDEXCEPTION ===== + +class SAXReaderNotAvailable(SAXNotSupportedException): + """Exception class for a missing driver. + + An XMLReader module (driver) should raise this exception when it + is first imported, e.g. when a support module cannot be imported. + It also may be raised during parsing, e.g. if executing an external + program is not permitted.""" diff --git a/Lib/xmlcore/sax/expatreader.py b/Lib/xmlcore/sax/expatreader.py new file mode 100644 index 0000000..6fbd22e --- /dev/null +++ b/Lib/xmlcore/sax/expatreader.py @@ -0,0 +1,414 @@ +""" +SAX driver for the pyexpat C module. This driver works with +pyexpat.__version__ == '2.22'. +""" + +version = "0.20" + +from xmlcore.sax._exceptions import * +from xmlcore.sax.handler import feature_validation, feature_namespaces +from xmlcore.sax.handler import feature_namespace_prefixes +from xmlcore.sax.handler import feature_external_ges, feature_external_pes +from xmlcore.sax.handler import feature_string_interning +from xmlcore.sax.handler import property_xml_string, property_interning_dict + +# xmlcore.parsers.expat does not raise ImportError in Jython +import sys +if sys.platform[:4] == "java": + raise SAXReaderNotAvailable("expat not available in Java", None) +del sys + +try: + from xmlcore.parsers import expat +except ImportError: + raise SAXReaderNotAvailable("expat not supported", None) +else: + if not hasattr(expat, "ParserCreate"): + raise SAXReaderNotAvailable("expat not supported", None) +from xmlcore.sax import xmlreader, saxutils, handler + +AttributesImpl = xmlreader.AttributesImpl +AttributesNSImpl = xmlreader.AttributesNSImpl + +# If we're using a sufficiently recent version of Python, we can use +# weak references to avoid cycles between the parser and content +# handler, otherwise we'll just have to pretend. +try: + import _weakref +except ImportError: + def _mkproxy(o): + return o +else: + import weakref + _mkproxy = weakref.proxy + del weakref, _weakref + +# --- ExpatLocator + +class ExpatLocator(xmlreader.Locator): + """Locator for use with the ExpatParser class. + + This uses a weak reference to the parser object to avoid creating + a circular reference between the parser and the content handler. + """ + def __init__(self, parser): + self._ref = _mkproxy(parser) + + def getColumnNumber(self): + parser = self._ref + if parser._parser is None: + return None + return parser._parser.ErrorColumnNumber + + def getLineNumber(self): + parser = self._ref + if parser._parser is None: + return 1 + return parser._parser.ErrorLineNumber + + def getPublicId(self): + parser = self._ref + if parser is None: + return None + return parser._source.getPublicId() + + def getSystemId(self): + parser = self._ref + if parser is None: + return None + return parser._source.getSystemId() + + +# --- ExpatParser + +class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): + """SAX driver for the pyexpat C module.""" + + def __init__(self, namespaceHandling=0, bufsize=2**16-20): + xmlreader.IncrementalParser.__init__(self, bufsize) + self._source = xmlreader.InputSource() + self._parser = None + self._namespaces = namespaceHandling + self._lex_handler_prop = None + self._parsing = 0 + self._entity_stack = [] + self._external_ges = 1 + self._interning = None + + # XMLReader methods + + def parse(self, source): + "Parse an XML document from a URL or an InputSource." + source = saxutils.prepare_input_source(source) + + self._source = source + self.reset() + self._cont_handler.setDocumentLocator(ExpatLocator(self)) + xmlreader.IncrementalParser.parse(self, source) + + def prepareParser(self, source): + if source.getSystemId() != None: + self._parser.SetBase(source.getSystemId()) + + # Redefined setContentHandler to allow changing handlers during parsing + + def setContentHandler(self, handler): + xmlreader.IncrementalParser.setContentHandler(self, handler) + if self._parsing: + self._reset_cont_handler() + + def getFeature(self, name): + if name == feature_namespaces: + return self._namespaces + elif name == feature_string_interning: + return self._interning is not None + elif name in (feature_validation, feature_external_pes, + feature_namespace_prefixes): + return 0 + elif name == feature_external_ges: + return self._external_ges + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def setFeature(self, name, state): + if self._parsing: + raise SAXNotSupportedException("Cannot set features while parsing") + + if name == feature_namespaces: + self._namespaces = state + elif name == feature_external_ges: + self._external_ges = state + elif name == feature_string_interning: + if state: + if self._interning is None: + self._interning = {} + else: + self._interning = None + elif name == feature_validation: + if state: + raise SAXNotSupportedException( + "expat does not support validation") + elif name == feature_external_pes: + if state: + raise SAXNotSupportedException( + "expat does not read external parameter entities") + elif name == feature_namespace_prefixes: + if state: + raise SAXNotSupportedException( + "expat does not report namespace prefixes") + else: + raise SAXNotRecognizedException( + "Feature '%s' not recognized" % name) + + def getProperty(self, name): + if name == handler.property_lexical_handler: + return self._lex_handler_prop + elif name == property_interning_dict: + return self._interning + elif name == property_xml_string: + if self._parser: + if hasattr(self._parser, "GetInputContext"): + return self._parser.GetInputContext() + else: + raise SAXNotRecognizedException( + "This version of expat does not support getting" + " the XML string") + else: + raise SAXNotSupportedException( + "XML string cannot be returned when not parsing") + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + def setProperty(self, name, value): + if name == handler.property_lexical_handler: + self._lex_handler_prop = value + if self._parsing: + self._reset_lex_handler_prop() + elif name == property_interning_dict: + self._interning = value + elif name == property_xml_string: + raise SAXNotSupportedException("Property '%s' cannot be set" % + name) + else: + raise SAXNotRecognizedException("Property '%s' not recognized" % + name) + + # IncrementalParser methods + + def feed(self, data, isFinal = 0): + if not self._parsing: + self.reset() + self._parsing = 1 + self._cont_handler.startDocument() + + try: + # The isFinal parameter is internal to the expat reader. + # If it is set to true, expat will check validity of the entire + # document. When feeding chunks, they are not normally final - + # except when invoked from close. + self._parser.Parse(data, isFinal) + except expat.error, e: + exc = SAXParseException(expat.ErrorString(e.code), e, self) + # FIXME: when to invoke error()? + self._err_handler.fatalError(exc) + + def close(self): + if self._entity_stack: + # If we are completing an external entity, do nothing here + return + self.feed("", isFinal = 1) + self._cont_handler.endDocument() + self._parsing = 0 + # break cycle created by expat handlers pointing to our methods + self._parser = None + + def _reset_cont_handler(self): + self._parser.ProcessingInstructionHandler = \ + self._cont_handler.processingInstruction + self._parser.CharacterDataHandler = self._cont_handler.characters + + def _reset_lex_handler_prop(self): + lex = self._lex_handler_prop + parser = self._parser + if lex is None: + parser.CommentHandler = None + parser.StartCdataSectionHandler = None + parser.EndCdataSectionHandler = None + parser.StartDoctypeDeclHandler = None + parser.EndDoctypeDeclHandler = None + else: + parser.CommentHandler = lex.comment + parser.StartCdataSectionHandler = lex.startCDATA + parser.EndCdataSectionHandler = lex.endCDATA + parser.StartDoctypeDeclHandler = self.start_doctype_decl + parser.EndDoctypeDeclHandler = lex.endDTD + + def reset(self): + if self._namespaces: + self._parser = expat.ParserCreate(self._source.getEncoding(), " ", + intern=self._interning) + self._parser.namespace_prefixes = 1 + self._parser.StartElementHandler = self.start_element_ns + self._parser.EndElementHandler = self.end_element_ns + else: + self._parser = expat.ParserCreate(self._source.getEncoding(), + intern = self._interning) + self._parser.StartElementHandler = self.start_element + self._parser.EndElementHandler = self.end_element + + self._reset_cont_handler() + self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl + self._parser.NotationDeclHandler = self.notation_decl + self._parser.StartNamespaceDeclHandler = self.start_namespace_decl + self._parser.EndNamespaceDeclHandler = self.end_namespace_decl + + self._decl_handler_prop = None + if self._lex_handler_prop: + self._reset_lex_handler_prop() +# self._parser.DefaultHandler = +# self._parser.DefaultHandlerExpand = +# self._parser.NotStandaloneHandler = + self._parser.ExternalEntityRefHandler = self.external_entity_ref + try: + self._parser.SkippedEntityHandler = self.skipped_entity_handler + except AttributeError: + # This pyexpat does not support SkippedEntity + pass + self._parser.SetParamEntityParsing( + expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) + + self._parsing = 0 + self._entity_stack = [] + + # Locator methods + + def getColumnNumber(self): + if self._parser is None: + return None + return self._parser.ErrorColumnNumber + + def getLineNumber(self): + if self._parser is None: + return 1 + return self._parser.ErrorLineNumber + + def getPublicId(self): + return self._source.getPublicId() + + def getSystemId(self): + return self._source.getSystemId() + + # event handlers + def start_element(self, name, attrs): + self._cont_handler.startElement(name, AttributesImpl(attrs)) + + def end_element(self, name): + self._cont_handler.endElement(name) + + def start_element_ns(self, name, attrs): + pair = name.split() + if len(pair) == 1: + # no namespace + pair = (None, name) + elif len(pair) == 3: + pair = pair[0], pair[1] + else: + # default namespace + pair = tuple(pair) + + newattrs = {} + qnames = {} + for (aname, value) in attrs.items(): + parts = aname.split() + length = len(parts) + if length == 1: + # no namespace + qname = aname + apair = (None, aname) + elif length == 3: + qname = "%s:%s" % (parts[2], parts[1]) + apair = parts[0], parts[1] + else: + # default namespace + qname = parts[1] + apair = tuple(parts) + + newattrs[apair] = value + qnames[apair] = qname + + self._cont_handler.startElementNS(pair, None, + AttributesNSImpl(newattrs, qnames)) + + def end_element_ns(self, name): + pair = name.split() + if len(pair) == 1: + pair = (None, name) + elif len(pair) == 3: + pair = pair[0], pair[1] + else: + pair = tuple(pair) + + self._cont_handler.endElementNS(pair, None) + + # this is not used (call directly to ContentHandler) + def processing_instruction(self, target, data): + self._cont_handler.processingInstruction(target, data) + + # this is not used (call directly to ContentHandler) + def character_data(self, data): + self._cont_handler.characters(data) + + def start_namespace_decl(self, prefix, uri): + self._cont_handler.startPrefixMapping(prefix, uri) + + def end_namespace_decl(self, prefix): + self._cont_handler.endPrefixMapping(prefix) + + def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): + self._lex_handler_prop.startDTD(name, pubid, sysid) + + def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): + self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) + + def notation_decl(self, name, base, sysid, pubid): + self._dtd_handler.notationDecl(name, pubid, sysid) + + def external_entity_ref(self, context, base, sysid, pubid): + if not self._external_ges: + return 1 + + source = self._ent_handler.resolveEntity(pubid, sysid) + source = saxutils.prepare_input_source(source, + self._source.getSystemId() or + "") + + self._entity_stack.append((self._parser, self._source)) + self._parser = self._parser.ExternalEntityParserCreate(context) + self._source = source + + try: + xmlreader.IncrementalParser.parse(self, source) + except: + return 0 # FIXME: save error info here? + + (self._parser, self._source) = self._entity_stack[-1] + del self._entity_stack[-1] + return 1 + + def skipped_entity_handler(self, name, is_pe): + if is_pe: + # The SAX spec requires to report skipped PEs with a '%' + name = '%'+name + self._cont_handler.skippedEntity(name) + +# --- + +def create_parser(*args, **kwargs): + return ExpatParser(*args, **kwargs) + +# --- + +if __name__ == "__main__": + import xmlcore.sax + p = create_parser() + p.setContentHandler(xmlcore.sax.XMLGenerator()) + p.setErrorHandler(xmlcore.sax.ErrorHandler()) + p.parse("../../../hamlet.xml") diff --git a/Lib/xmlcore/sax/handler.py b/Lib/xmlcore/sax/handler.py new file mode 100644 index 0000000..f9e91b6 --- /dev/null +++ b/Lib/xmlcore/sax/handler.py @@ -0,0 +1,342 @@ +""" +This module contains the core classes of version 2.0 of SAX for Python. +This file provides only default classes with absolutely minimum +functionality, from which drivers and applications can be subclassed. + +Many of these classes are empty and are included only as documentation +of the interfaces. + +$Id$ +""" + +version = '2.0beta' + +#============================================================================ +# +# HANDLER INTERFACES +# +#============================================================================ + +# ===== ERRORHANDLER ===== + +class ErrorHandler: + """Basic interface for SAX error handlers. + + If you create an object that implements this interface, then + register the object with your XMLReader, the parser will call the + methods in your object to report all warnings and errors. There + are three levels of errors available: warnings, (possibly) + recoverable errors, and unrecoverable errors. All methods take a + SAXParseException as the only parameter.""" + + def error(self, exception): + "Handle a recoverable error." + raise exception + + def fatalError(self, exception): + "Handle a non-recoverable error." + raise exception + + def warning(self, exception): + "Handle a warning." + print exception + + +# ===== CONTENTHANDLER ===== + +class ContentHandler: + """Interface for receiving logical document content events. + + This is the main callback interface in SAX, and the one most + important to applications. The order of events in this interface + mirrors the order of the information in the document.""" + + def __init__(self): + self._locator = None + + def setDocumentLocator(self, locator): + """Called by the parser to give the application a locator for + locating the origin of document events. + + SAX parsers are strongly encouraged (though not absolutely + required) to supply a locator: if it does so, it must supply + the locator to the application by invoking this method before + invoking any of the other methods in the DocumentHandler + interface. + + The locator allows the application to determine the end + position of any document-related event, even if the parser is + not reporting an error. Typically, the application will use + this information for reporting its own errors (such as + character content that does not match an application's + business rules). The information returned by the locator is + probably not sufficient for use with a search engine. + + Note that the locator will return correct information only + during the invocation of the events in this interface. The + application should not attempt to use it at any other time.""" + self._locator = locator + + def startDocument(self): + """Receive notification of the beginning of a document. + + The SAX parser will invoke this method only once, before any + other methods in this interface or in DTDHandler (except for + setDocumentLocator).""" + + def endDocument(self): + """Receive notification of the end of a document. + + The SAX parser will invoke this method only once, and it will + be the last method invoked during the parse. The parser shall + not invoke this method until it has either abandoned parsing + (because of an unrecoverable error) or reached the end of + input.""" + + def startPrefixMapping(self, prefix, uri): + """Begin the scope of a prefix-URI Namespace mapping. + + The information from this event is not necessary for normal + Namespace processing: the SAX XML reader will automatically + replace prefixes for element and attribute names when the + http://xml.org/sax/features/namespaces feature is true (the + default). + + There are cases, however, when applications need to use + prefixes in character data or in attribute values, where they + cannot safely be expanded automatically; the + start/endPrefixMapping event supplies the information to the + application to expand prefixes in those contexts itself, if + necessary. + + Note that start/endPrefixMapping events are not guaranteed to + be properly nested relative to each-other: all + startPrefixMapping events will occur before the corresponding + startElement event, and all endPrefixMapping events will occur + after the corresponding endElement event, but their order is + not guaranteed.""" + + def endPrefixMapping(self, prefix): + """End the scope of a prefix-URI mapping. + + See startPrefixMapping for details. This event will always + occur after the corresponding endElement event, but the order + of endPrefixMapping events is not otherwise guaranteed.""" + + def startElement(self, name, attrs): + """Signals the start of an element in non-namespace mode. + + The name parameter contains the raw XML 1.0 name of the + element type as a string and the attrs parameter holds an + instance of the Attributes class containing the attributes of + the element.""" + + def endElement(self, name): + """Signals the end of an element in non-namespace mode. + + The name parameter contains the name of the element type, just + as with the startElement event.""" + + def startElementNS(self, name, qname, attrs): + """Signals the start of an element in namespace mode. + + The name parameter contains the name of the element type as a + (uri, localname) tuple, the qname parameter the raw XML 1.0 + name used in the source document, and the attrs parameter + holds an instance of the Attributes class containing the + attributes of the element. + + The uri part of the name tuple is None for elements which have + no namespace.""" + + def endElementNS(self, name, qname): + """Signals the end of an element in namespace mode. + + The name parameter contains the name of the element type, just + as with the startElementNS event.""" + + def characters(self, content): + """Receive notification of character data. + + The Parser will call this method to report each chunk of + character data. SAX parsers may return all contiguous + character data in a single chunk, or they may split it into + several chunks; however, all of the characters in any single + event must come from the same external entity so that the + Locator provides useful information.""" + + def ignorableWhitespace(self, whitespace): + """Receive notification of ignorable whitespace in element content. + + Validating Parsers must use this method to report each chunk + of ignorable whitespace (see the W3C XML 1.0 recommendation, + section 2.10): non-validating parsers may also use this method + if they are capable of parsing and using content models. + + SAX parsers may return all contiguous whitespace in a single + chunk, or they may split it into several chunks; however, all + of the characters in any single event must come from the same + external entity, so that the Locator provides useful + information.""" + + def processingInstruction(self, target, data): + """Receive notification of a processing instruction. + + The Parser will invoke this method once for each processing + instruction found: note that processing instructions may occur + before or after the main document element. + + A SAX parser should never report an XML declaration (XML 1.0, + section 2.8) or a text declaration (XML 1.0, section 4.3.1) + using this method.""" + + def skippedEntity(self, name): + """Receive notification of a skipped entity. + + The Parser will invoke this method once for each entity + skipped. Non-validating processors may skip entities if they + have not seen the declarations (because, for example, the + entity was declared in an external DTD subset). All processors + may skip external entities, depending on the values of the + http://xml.org/sax/features/external-general-entities and the + http://xml.org/sax/features/external-parameter-entities + properties.""" + + +# ===== DTDHandler ===== + +class DTDHandler: + """Handle DTD events. + + This interface specifies only those DTD events required for basic + parsing (unparsed entities and attributes).""" + + def notationDecl(self, name, publicId, systemId): + "Handle a notation declaration event." + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + "Handle an unparsed entity declaration event." + + +# ===== ENTITYRESOLVER ===== + +class EntityResolver: + """Basic interface for resolving entities. If you create an object + implementing this interface, then register the object with your + Parser, the parser will call the method in your object to + resolve all external entities. Note that DefaultHandler implements + this interface with the default behaviour.""" + + def resolveEntity(self, publicId, systemId): + """Resolve the system identifier of an entity and return either + the system identifier to read from as a string, or an InputSource + to read from.""" + return systemId + + +#============================================================================ +# +# CORE FEATURES +# +#============================================================================ + +feature_namespaces = "http://xml.org/sax/features/namespaces" +# true: Perform Namespace processing (default). +# false: Optionally do not perform Namespace processing +# (implies namespace-prefixes). +# access: (parsing) read-only; (not parsing) read/write + +feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" +# true: Report the original prefixed names and attributes used for Namespace +# declarations. +# false: Do not report attributes used for Namespace declarations, and +# optionally do not report original prefixed names (default). +# access: (parsing) read-only; (not parsing) read/write + +feature_string_interning = "http://xml.org/sax/features/string-interning" +# true: All element names, prefixes, attribute names, Namespace URIs, and +# local names are interned using the built-in intern function. +# false: Names are not necessarily interned, although they may be (default). +# access: (parsing) read-only; (not parsing) read/write + +feature_validation = "http://xml.org/sax/features/validation" +# true: Report all validation errors (implies external-general-entities and +# external-parameter-entities). +# false: Do not report validation errors. +# access: (parsing) read-only; (not parsing) read/write + +feature_external_ges = "http://xml.org/sax/features/external-general-entities" +# true: Include all external general (text) entities. +# false: Do not include external general entities. +# access: (parsing) read-only; (not parsing) read/write + +feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" +# true: Include all external parameter entities, including the external +# DTD subset. +# false: Do not include any external parameter entities, even the external +# DTD subset. +# access: (parsing) read-only; (not parsing) read/write + +all_features = [feature_namespaces, + feature_namespace_prefixes, + feature_string_interning, + feature_validation, + feature_external_ges, + feature_external_pes] + + +#============================================================================ +# +# CORE PROPERTIES +# +#============================================================================ + +property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" +# data type: xml.sax.sax2lib.LexicalHandler +# description: An optional extension handler for lexical events like comments. +# access: read/write + +property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" +# data type: xml.sax.sax2lib.DeclHandler +# description: An optional extension handler for DTD-related events other +# than notations and unparsed entities. +# access: read/write + +property_dom_node = "http://xml.org/sax/properties/dom-node" +# data type: org.w3c.dom.Node +# description: When parsing, the current DOM node being visited if this is +# a DOM iterator; when not parsing, the root DOM node for +# iteration. +# access: (parsing) read-only; (not parsing) read/write + +property_xml_string = "http://xml.org/sax/properties/xml-string" +# data type: String +# description: The literal string of characters that was the source for +# the current event. +# access: read-only + +property_encoding = "http://www.python.org/sax/properties/encoding" +# data type: String +# description: The name of the encoding to assume for input data. +# access: write: set the encoding, e.g. established by a higher-level +# protocol. May change during parsing (e.g. after +# processing a META tag) +# read: return the current encoding (possibly established through +# auto-detection. +# initial value: UTF-8 +# + +property_interning_dict = "http://www.python.org/sax/properties/interning-dict" +# data type: Dictionary +# description: The dictionary used to intern common strings in the document +# access: write: Request that the parser uses a specific dictionary, to +# allow interning across different documents +# read: return the current interning dictionary, or None +# + +all_properties = [property_lexical_handler, + property_dom_node, + property_declaration_handler, + property_xml_string, + property_encoding, + property_interning_dict] diff --git a/Lib/xmlcore/sax/saxutils.py b/Lib/xmlcore/sax/saxutils.py new file mode 100644 index 0000000..582b008 --- /dev/null +++ b/Lib/xmlcore/sax/saxutils.py @@ -0,0 +1,297 @@ +"""\ +A library of useful helper classes to the SAX classes, for the +convenience of application and driver writers. +""" + +import os, urlparse, urllib, types +import handler +import xmlreader + +try: + _StringTypes = [types.StringType, types.UnicodeType] +except AttributeError: + _StringTypes = [types.StringType] + +# See whether the xmlcharrefreplace error handler is +# supported +try: + from codecs import xmlcharrefreplace_errors + _error_handling = "xmlcharrefreplace" + del xmlcharrefreplace_errors +except ImportError: + _error_handling = "strict" + +def __dict_replace(s, d): + """Replace substrings of a string using a dictionary.""" + for key, value in d.items(): + s = s.replace(key, value) + return s + +def escape(data, entities={}): + """Escape &, <, and > in a string of data. + + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + + # must do ampersand first + data = data.replace("&", "&") + data = data.replace(">", ">") + data = data.replace("<", "<") + if entities: + data = __dict_replace(data, entities) + return data + +def unescape(data, entities={}): + """Unescape &, <, and > in a string of data. + + You can unescape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + data = data.replace("<", "<") + data = data.replace(">", ">") + if entities: + data = __dict_replace(data, entities) + # must do ampersand last + return data.replace("&", "&") + +def quoteattr(data, entities={}): + """Escape and quote an attribute value. + + Escape &, <, and > in a string of data, then quote it for use as + an attribute value. The \" character will be escaped as well, if + necessary. + + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + data = escape(data, entities) + if '"' in data: + if "'" in data: + data = '"%s"' % data.replace('"', """) + else: + data = "'%s'" % data + else: + data = '"%s"' % data + return data + + +class XMLGenerator(handler.ContentHandler): + + def __init__(self, out=None, encoding="iso-8859-1"): + if out is None: + import sys + out = sys.stdout + handler.ContentHandler.__init__(self) + self._out = out + self._ns_contexts = [{}] # contains uri -> prefix dicts + self._current_context = self._ns_contexts[-1] + self._undeclared_ns_maps = [] + self._encoding = encoding + + def _write(self, text): + if isinstance(text, str): + self._out.write(text) + else: + self._out.write(text.encode(self._encoding, _error_handling)) + + # ContentHandler methods + + def startDocument(self): + self._write('\n' % + self._encoding) + + def startPrefixMapping(self, prefix, uri): + self._ns_contexts.append(self._current_context.copy()) + self._current_context[uri] = prefix + self._undeclared_ns_maps.append((prefix, uri)) + + def endPrefixMapping(self, prefix): + self._current_context = self._ns_contexts[-1] + del self._ns_contexts[-1] + + def startElement(self, name, attrs): + self._write('<' + name) + for (name, value) in attrs.items(): + self._write(' %s=%s' % (name, quoteattr(value))) + self._write('>') + + def endElement(self, name): + self._write('' % name) + + def startElementNS(self, name, qname, attrs): + if name[0] is None: + # if the name was not namespace-scoped, use the unqualified part + name = name[1] + else: + # else try to restore the original prefix from the namespace + name = self._current_context[name[0]] + ":" + name[1] + self._write('<' + name) + + for pair in self._undeclared_ns_maps: + self._write(' xmlns:%s="%s"' % pair) + self._undeclared_ns_maps = [] + + for (name, value) in attrs.items(): + name = self._current_context[name[0]] + ":" + name[1] + self._write(' %s=%s' % (name, quoteattr(value))) + self._write('>') + + def endElementNS(self, name, qname): + if name[0] is None: + name = name[1] + else: + name = self._current_context[name[0]] + ":" + name[1] + self._write('' % name) + + def characters(self, content): + self._write(escape(content)) + + def ignorableWhitespace(self, content): + self._write(content) + + def processingInstruction(self, target, data): + self._write('' % (target, data)) + + +class XMLFilterBase(xmlreader.XMLReader): + """This class is designed to sit between an XMLReader and the + client application's event handlers. By default, it does nothing + but pass requests up to the reader and events on to the handlers + unmodified, but subclasses can override specific methods to modify + the event stream or the configuration requests as they pass + through.""" + + def __init__(self, parent = None): + xmlreader.XMLReader.__init__(self) + self._parent = parent + + # ErrorHandler methods + + def error(self, exception): + self._err_handler.error(exception) + + def fatalError(self, exception): + self._err_handler.fatalError(exception) + + def warning(self, exception): + self._err_handler.warning(exception) + + # ContentHandler methods + + def setDocumentLocator(self, locator): + self._cont_handler.setDocumentLocator(locator) + + def startDocument(self): + self._cont_handler.startDocument() + + def endDocument(self): + self._cont_handler.endDocument() + + def startPrefixMapping(self, prefix, uri): + self._cont_handler.startPrefixMapping(prefix, uri) + + def endPrefixMapping(self, prefix): + self._cont_handler.endPrefixMapping(prefix) + + def startElement(self, name, attrs): + self._cont_handler.startElement(name, attrs) + + def endElement(self, name): + self._cont_handler.endElement(name) + + def startElementNS(self, name, qname, attrs): + self._cont_handler.startElementNS(name, qname, attrs) + + def endElementNS(self, name, qname): + self._cont_handler.endElementNS(name, qname) + + def characters(self, content): + self._cont_handler.characters(content) + + def ignorableWhitespace(self, chars): + self._cont_handler.ignorableWhitespace(chars) + + def processingInstruction(self, target, data): + self._cont_handler.processingInstruction(target, data) + + def skippedEntity(self, name): + self._cont_handler.skippedEntity(name) + + # DTDHandler methods + + def notationDecl(self, name, publicId, systemId): + self._dtd_handler.notationDecl(name, publicId, systemId) + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) + + # EntityResolver methods + + def resolveEntity(self, publicId, systemId): + return self._ent_handler.resolveEntity(publicId, systemId) + + # XMLReader methods + + def parse(self, source): + self._parent.setContentHandler(self) + self._parent.setErrorHandler(self) + self._parent.setEntityResolver(self) + self._parent.setDTDHandler(self) + self._parent.parse(source) + + def setLocale(self, locale): + self._parent.setLocale(locale) + + def getFeature(self, name): + return self._parent.getFeature(name) + + def setFeature(self, name, state): + self._parent.setFeature(name, state) + + def getProperty(self, name): + return self._parent.getProperty(name) + + def setProperty(self, name, value): + self._parent.setProperty(name, value) + + # XMLFilter methods + + def getParent(self): + return self._parent + + def setParent(self, parent): + self._parent = parent + +# --- Utility functions + +def prepare_input_source(source, base = ""): + """This function takes an InputSource and an optional base URL and + returns a fully resolved InputSource object ready for reading.""" + + if type(source) in _StringTypes: + source = xmlreader.InputSource(source) + elif hasattr(source, "read"): + f = source + source = xmlreader.InputSource() + source.setByteStream(f) + if hasattr(f, "name"): + source.setSystemId(f.name) + + if source.getByteStream() is None: + sysid = source.getSystemId() + basehead = os.path.dirname(os.path.normpath(base)) + sysidfilename = os.path.join(basehead, sysid) + if os.path.isfile(sysidfilename): + source.setSystemId(sysidfilename) + f = open(sysidfilename, "rb") + else: + source.setSystemId(urlparse.urljoin(base, sysid)) + f = urllib.urlopen(source.getSystemId()) + + source.setByteStream(f) + + return source diff --git a/Lib/xmlcore/sax/xmlreader.py b/Lib/xmlcore/sax/xmlreader.py new file mode 100644 index 0000000..9a2361e --- /dev/null +++ b/Lib/xmlcore/sax/xmlreader.py @@ -0,0 +1,381 @@ +"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers +should be based on this code. """ + +import handler + +from _exceptions import SAXNotSupportedException, SAXNotRecognizedException + + +# ===== XMLREADER ===== + +class XMLReader: + """Interface for reading an XML document using callbacks. + + XMLReader is the interface that an XML parser's SAX2 driver must + implement. This interface allows an application to set and query + features and properties in the parser, to register event handlers + for document processing, and to initiate a document parse. + + All SAX interfaces are assumed to be synchronous: the parse + methods must not return until parsing is complete, and readers + must wait for an event-handler callback to return before reporting + the next event.""" + + def __init__(self): + self._cont_handler = handler.ContentHandler() + self._dtd_handler = handler.DTDHandler() + self._ent_handler = handler.EntityResolver() + self._err_handler = handler.ErrorHandler() + + def parse(self, source): + "Parse an XML document from a system identifier or an InputSource." + raise NotImplementedError("This method must be implemented!") + + def getContentHandler(self): + "Returns the current ContentHandler." + return self._cont_handler + + def setContentHandler(self, handler): + "Registers a new object to receive document content events." + self._cont_handler = handler + + def getDTDHandler(self): + "Returns the current DTD handler." + return self._dtd_handler + + def setDTDHandler(self, handler): + "Register an object to receive basic DTD-related events." + self._dtd_handler = handler + + def getEntityResolver(self): + "Returns the current EntityResolver." + return self._ent_handler + + def setEntityResolver(self, resolver): + "Register an object to resolve external entities." + self._ent_handler = resolver + + def getErrorHandler(self): + "Returns the current ErrorHandler." + return self._err_handler + + def setErrorHandler(self, handler): + "Register an object to receive error-message events." + self._err_handler = handler + + def setLocale(self, locale): + """Allow an application to set the locale for errors and warnings. + + SAX parsers are not required to provide localization for errors + and warnings; if they cannot support the requested locale, + however, they must throw a SAX exception. Applications may + request a locale change in the middle of a parse.""" + raise SAXNotSupportedException("Locale support not implemented") + + def getFeature(self, name): + "Looks up and returns the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def setFeature(self, name, state): + "Sets the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def getProperty(self, name): + "Looks up and returns the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + def setProperty(self, name, value): + "Sets the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + +class IncrementalParser(XMLReader): + """This interface adds three extra methods to the XMLReader + interface that allow XML parsers to support incremental + parsing. Support for this interface is optional, since not all + underlying XML parsers support this functionality. + + When the parser is instantiated it is ready to begin accepting + data from the feed method immediately. After parsing has been + finished with a call to close the reset method must be called to + make the parser ready to accept new data, either from feed or + using the parse method. + + Note that these methods must _not_ be called during parsing, that + is, after parse has been called and before it returns. + + By default, the class also implements the parse method of the XMLReader + interface using the feed, close and reset methods of the + IncrementalParser interface as a convenience to SAX 2.0 driver + writers.""" + + def __init__(self, bufsize=2**16): + self._bufsize = bufsize + XMLReader.__init__(self) + + def parse(self, source): + import saxutils + source = saxutils.prepare_input_source(source) + + self.prepareParser(source) + file = source.getByteStream() + buffer = file.read(self._bufsize) + while buffer != "": + self.feed(buffer) + buffer = file.read(self._bufsize) + self.close() + + def feed(self, data): + """This method gives the raw XML data in the data parameter to + the parser and makes it parse the data, emitting the + corresponding events. It is allowed for XML constructs to be + split across several calls to feed. + + feed may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + + def prepareParser(self, source): + """This method is called by the parse implementation to allow + the SAX 2.0 driver to prepare itself for parsing.""" + raise NotImplementedError("prepareParser must be overridden!") + + def close(self): + """This method is called when the entire XML document has been + passed to the parser through the feed method, to notify the + parser that there are no more data. This allows the parser to + do the final checks on the document and empty the internal + data buffer. + + The parser will not be ready to parse another document until + the reset method has been called. + + close may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + + def reset(self): + """This method is called after close has been called to reset + the parser so that it is ready to parse new documents. The + results of calling parse or feed after close without calling + reset are undefined.""" + raise NotImplementedError("This method must be implemented!") + +# ===== LOCATOR ===== + +class Locator: + """Interface for associating a SAX event with a document + location. A locator object will return valid results only during + calls to DocumentHandler methods; at any other time, the + results are unpredictable.""" + + def getColumnNumber(self): + "Return the column number where the current event ends." + return -1 + + def getLineNumber(self): + "Return the line number where the current event ends." + return -1 + + def getPublicId(self): + "Return the public identifier for the current event." + return None + + def getSystemId(self): + "Return the system identifier for the current event." + return None + +# ===== INPUTSOURCE ===== + +class InputSource: + """Encapsulation of the information needed by the XMLReader to + read entities. + + This class may include information about the public identifier, + system identifier, byte stream (possibly with character encoding + information) and/or the character stream of an entity. + + Applications will create objects of this class for use in the + XMLReader.parse method and for returning from + EntityResolver.resolveEntity. + + An InputSource belongs to the application, the XMLReader is not + allowed to modify InputSource objects passed to it from the + application, although it may make copies and modify those.""" + + def __init__(self, system_id = None): + self.__system_id = system_id + self.__public_id = None + self.__encoding = None + self.__bytefile = None + self.__charfile = None + + def setPublicId(self, public_id): + "Sets the public identifier of this InputSource." + self.__public_id = public_id + + def getPublicId(self): + "Returns the public identifier of this InputSource." + return self.__public_id + + def setSystemId(self, system_id): + "Sets the system identifier of this InputSource." + self.__system_id = system_id + + def getSystemId(self): + "Returns the system identifier of this InputSource." + return self.__system_id + + def setEncoding(self, encoding): + """Sets the character encoding of this InputSource. + + The encoding must be a string acceptable for an XML encoding + declaration (see section 4.3.3 of the XML recommendation). + + The encoding attribute of the InputSource is ignored if the + InputSource also contains a character stream.""" + self.__encoding = encoding + + def getEncoding(self): + "Get the character encoding of this InputSource." + return self.__encoding + + def setByteStream(self, bytefile): + """Set the byte stream (a Python file-like object which does + not perform byte-to-character conversion) for this input + source. + + The SAX parser will ignore this if there is also a character + stream specified, but it will use a byte stream in preference + to opening a URI connection itself. + + If the application knows the character encoding of the byte + stream, it should set it with the setEncoding method.""" + self.__bytefile = bytefile + + def getByteStream(self): + """Get the byte stream for this input source. + + The getEncoding method will return the character encoding for + this byte stream, or None if unknown.""" + return self.__bytefile + + def setCharacterStream(self, charfile): + """Set the character stream for this input source. (The stream + must be a Python 2.0 Unicode-wrapped file-like that performs + conversion to Unicode strings.) + + If there is a character stream specified, the SAX parser will + ignore any byte stream and will not attempt to open a URI + connection to the system identifier.""" + self.__charfile = charfile + + def getCharacterStream(self): + "Get the character stream for this input source." + return self.__charfile + +# ===== ATTRIBUTESIMPL ===== + +class AttributesImpl: + + def __init__(self, attrs): + """Non-NS-aware implementation. + + attrs should be of the form {name : value}.""" + self._attrs = attrs + + def getLength(self): + return len(self._attrs) + + def getType(self, name): + return "CDATA" + + def getValue(self, name): + return self._attrs[name] + + def getValueByQName(self, name): + return self._attrs[name] + + def getNameByQName(self, name): + if not self._attrs.has_key(name): + raise KeyError, name + return name + + def getQNameByName(self, name): + if not self._attrs.has_key(name): + raise KeyError, name + return name + + def getNames(self): + return self._attrs.keys() + + def getQNames(self): + return self._attrs.keys() + + def __len__(self): + return len(self._attrs) + + def __getitem__(self, name): + return self._attrs[name] + + def keys(self): + return self._attrs.keys() + + def has_key(self, name): + return self._attrs.has_key(name) + + def __contains__(self, name): + return self._attrs.has_key(name) + + def get(self, name, alternative=None): + return self._attrs.get(name, alternative) + + def copy(self): + return self.__class__(self._attrs) + + def items(self): + return self._attrs.items() + + def values(self): + return self._attrs.values() + +# ===== ATTRIBUTESNSIMPL ===== + +class AttributesNSImpl(AttributesImpl): + + def __init__(self, attrs, qnames): + """NS-aware implementation. + + attrs should be of the form {(ns_uri, lname): value, ...}. + qnames of the form {(ns_uri, lname): qname, ...}.""" + self._attrs = attrs + self._qnames = qnames + + def getValueByQName(self, name): + for (nsname, qname) in self._qnames.items(): + if qname == name: + return self._attrs[nsname] + + raise KeyError, name + + def getNameByQName(self, name): + for (nsname, qname) in self._qnames.items(): + if qname == name: + return nsname + + raise KeyError, name + + def getQNameByName(self, name): + return self._qnames[name] + + def getQNames(self): + return self._qnames.values() + + def copy(self): + return self.__class__(self._attrs, self._qnames) + + +def _test(): + XMLReader() + IncrementalParser() + Locator() + +if __name__ == "__main__": + _test() -- cgit v0.12