diff options
Diffstat (limited to 'Lib/xml/dom')
-rw-r--r-- | Lib/xml/dom/NodeFilter.py | 27 | ||||
-rw-r--r-- | Lib/xml/dom/__init__.py | 139 | ||||
-rw-r--r-- | Lib/xml/dom/domreg.py | 99 | ||||
-rw-r--r-- | Lib/xml/dom/expatbuilder.py | 983 | ||||
-rw-r--r-- | Lib/xml/dom/minicompat.py | 184 | ||||
-rw-r--r-- | Lib/xml/dom/minidom.py | 1938 | ||||
-rw-r--r-- | Lib/xml/dom/pulldom.py | 351 | ||||
-rw-r--r-- | Lib/xml/dom/xmlbuilder.py | 388 |
8 files changed, 0 insertions, 4109 deletions
diff --git a/Lib/xml/dom/NodeFilter.py b/Lib/xml/dom/NodeFilter.py deleted file mode 100644 index fc05245..0000000 --- a/Lib/xml/dom/NodeFilter.py +++ /dev/null @@ -1,27 +0,0 @@ -# This is the Python mapping for interface NodeFilter from -# DOM2-Traversal-Range. It contains only constants. - -class NodeFilter: - """ - This is the DOM2 NodeFilter interface. It contains only constants. - """ - FILTER_ACCEPT = 1 - FILTER_REJECT = 2 - FILTER_SKIP = 3 - - SHOW_ALL = 0xFFFFFFFFL - SHOW_ELEMENT = 0x00000001 - SHOW_ATTRIBUTE = 0x00000002 - SHOW_TEXT = 0x00000004 - SHOW_CDATA_SECTION = 0x00000008 - SHOW_ENTITY_REFERENCE = 0x00000010 - SHOW_ENTITY = 0x00000020 - SHOW_PROCESSING_INSTRUCTION = 0x00000040 - SHOW_COMMENT = 0x00000080 - SHOW_DOCUMENT = 0x00000100 - SHOW_DOCUMENT_TYPE = 0x00000200 - SHOW_DOCUMENT_FRAGMENT = 0x00000400 - SHOW_NOTATION = 0x00000800 - - def acceptNode(self, node): - raise NotImplementedError diff --git a/Lib/xml/dom/__init__.py b/Lib/xml/dom/__init__.py deleted file mode 100644 index 6363d00..0000000 --- a/Lib/xml/dom/__init__.py +++ /dev/null @@ -1,139 +0,0 @@ -"""W3C Document Object Model implementation for Python. - -The Python mapping of the Document Object Model is documented in the -Python Library Reference in the section on the xml.dom package. - -This package contains the following modules: - -minidom -- A simple implementation of the Level 1 DOM with namespace - support added (based on the Level 2 specification) and other - minor Level 2 functionality. - -pulldom -- DOM builder supporting on-demand tree-building for selected - subtrees of the document. - -""" - - -class Node: - """Class giving the NodeType constants.""" - - # DOM implementations may use this as a base class for their own - # Node implementations. If they don't, the constants defined here - # should still be used as the canonical definitions as they match - # the values given in the W3C recommendation. Client code can - # safely refer to these values in all tests of Node.nodeType - # values. - - ELEMENT_NODE = 1 - ATTRIBUTE_NODE = 2 - TEXT_NODE = 3 - CDATA_SECTION_NODE = 4 - ENTITY_REFERENCE_NODE = 5 - ENTITY_NODE = 6 - PROCESSING_INSTRUCTION_NODE = 7 - COMMENT_NODE = 8 - DOCUMENT_NODE = 9 - DOCUMENT_TYPE_NODE = 10 - DOCUMENT_FRAGMENT_NODE = 11 - NOTATION_NODE = 12 - - -#ExceptionCode -INDEX_SIZE_ERR = 1 -DOMSTRING_SIZE_ERR = 2 -HIERARCHY_REQUEST_ERR = 3 -WRONG_DOCUMENT_ERR = 4 -INVALID_CHARACTER_ERR = 5 -NO_DATA_ALLOWED_ERR = 6 -NO_MODIFICATION_ALLOWED_ERR = 7 -NOT_FOUND_ERR = 8 -NOT_SUPPORTED_ERR = 9 -INUSE_ATTRIBUTE_ERR = 10 -INVALID_STATE_ERR = 11 -SYNTAX_ERR = 12 -INVALID_MODIFICATION_ERR = 13 -NAMESPACE_ERR = 14 -INVALID_ACCESS_ERR = 15 -VALIDATION_ERR = 16 - - -class DOMException(Exception): - """Abstract base class for DOM exceptions. - Exceptions with specific codes are specializations of this class.""" - - def __init__(self, *args, **kw): - if self.__class__ is DOMException: - raise RuntimeError( - "DOMException should not be instantiated directly") - Exception.__init__(self, *args, **kw) - - def _get_code(self): - return self.code - - -class IndexSizeErr(DOMException): - code = INDEX_SIZE_ERR - -class DomstringSizeErr(DOMException): - code = DOMSTRING_SIZE_ERR - -class HierarchyRequestErr(DOMException): - code = HIERARCHY_REQUEST_ERR - -class WrongDocumentErr(DOMException): - code = WRONG_DOCUMENT_ERR - -class InvalidCharacterErr(DOMException): - code = INVALID_CHARACTER_ERR - -class NoDataAllowedErr(DOMException): - code = NO_DATA_ALLOWED_ERR - -class NoModificationAllowedErr(DOMException): - code = NO_MODIFICATION_ALLOWED_ERR - -class NotFoundErr(DOMException): - code = NOT_FOUND_ERR - -class NotSupportedErr(DOMException): - code = NOT_SUPPORTED_ERR - -class InuseAttributeErr(DOMException): - code = INUSE_ATTRIBUTE_ERR - -class InvalidStateErr(DOMException): - code = INVALID_STATE_ERR - -class SyntaxErr(DOMException): - code = SYNTAX_ERR - -class InvalidModificationErr(DOMException): - code = INVALID_MODIFICATION_ERR - -class NamespaceErr(DOMException): - code = NAMESPACE_ERR - -class InvalidAccessErr(DOMException): - code = INVALID_ACCESS_ERR - -class ValidationErr(DOMException): - code = VALIDATION_ERR - -class UserDataHandler: - """Class giving the operation constants for UserDataHandler.handle().""" - - # Based on DOM Level 3 (WD 9 April 2002) - - NODE_CLONED = 1 - NODE_IMPORTED = 2 - NODE_DELETED = 3 - NODE_RENAMED = 4 - -XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" -XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" -XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" -EMPTY_NAMESPACE = None -EMPTY_PREFIX = None - -from domreg import getDOMImplementation,registerDOMImplementation diff --git a/Lib/xml/dom/domreg.py b/Lib/xml/dom/domreg.py deleted file mode 100644 index 684c436..0000000 --- a/Lib/xml/dom/domreg.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Registration facilities for DOM. This module should not be used -directly. Instead, the functions getDOMImplementation and -registerDOMImplementation should be imported from xml.dom.""" - -from xml.dom.minicompat import * # isinstance, StringTypes - -# This is a list of well-known implementations. Well-known names -# should be published by posting to xml-sig@python.org, and are -# subsequently recorded in this file. - -well_known_implementations = { - 'minidom':'xml.dom.minidom', - '4DOM': 'xml.dom.DOMImplementation', - } - -# DOM implementations not officially registered should register -# themselves with their - -registered = {} - -def registerDOMImplementation(name, factory): - """registerDOMImplementation(name, factory) - - Register the factory function with the name. The factory function - should return an object which implements the DOMImplementation - interface. The factory function can either return the same object, - or a new one (e.g. if that implementation supports some - customization).""" - - registered[name] = factory - -def _good_enough(dom, features): - "_good_enough(dom, features) -> Return 1 if the dom offers the features" - for f,v in features: - if not dom.hasFeature(f,v): - return 0 - return 1 - -def getDOMImplementation(name = None, features = ()): - """getDOMImplementation(name = None, features = ()) -> DOM implementation. - - Return a suitable DOM implementation. The name is either - well-known, the module name of a DOM implementation, or None. If - it is not None, imports the corresponding module and returns - DOMImplementation object if the import succeeds. - - If name is not given, consider the available implementations to - find one with the required feature set. If no implementation can - be found, raise an ImportError. The features list must be a sequence - of (feature, version) pairs which are passed to hasFeature.""" - - import os - creator = None - mod = well_known_implementations.get(name) - if mod: - mod = __import__(mod, {}, {}, ['getDOMImplementation']) - return mod.getDOMImplementation() - elif name: - return registered[name]() - elif os.environ.has_key("PYTHON_DOM"): - return getDOMImplementation(name = os.environ["PYTHON_DOM"]) - - # User did not specify a name, try implementations in arbitrary - # order, returning the one that has the required features - if isinstance(features, StringTypes): - features = _parse_feature_string(features) - for creator in registered.values(): - dom = creator() - if _good_enough(dom, features): - return dom - - for creator in well_known_implementations.keys(): - try: - dom = getDOMImplementation(name = creator) - except StandardError: # typically ImportError, or AttributeError - continue - if _good_enough(dom, features): - return dom - - raise ImportError,"no suitable DOM implementation found" - -def _parse_feature_string(s): - features = [] - parts = s.split() - i = 0 - length = len(parts) - while i < length: - feature = parts[i] - if feature[0] in "0123456789": - raise ValueError, "bad feature name: %r" % (feature,) - i = i + 1 - version = None - if i < length: - v = parts[i] - if v[0] in "0123456789": - i = i + 1 - version = v - features.append((feature, version)) - return tuple(features) diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py deleted file mode 100644 index 47d81fb..0000000 --- a/Lib/xml/dom/expatbuilder.py +++ /dev/null @@ -1,983 +0,0 @@ -"""Facility to use the Expat parser to load a minidom instance -from a string or file. - -This avoids all the overhead of SAX and pulldom to gain performance. -""" - -# Warning! -# -# This module is tightly bound to the implementation details of the -# minidom DOM and can't be used with other DOM implementations. This -# is due, in part, to a lack of appropriate methods in the DOM (there is -# no way to create Entity and Notation nodes via the DOM Level 2 -# interface), and for performance. The later is the cause of some fairly -# cryptic code. -# -# Performance hacks: -# -# - .character_data_handler() has an extra case in which continuing -# data is appended to an existing Text node; this can be a -# speedup since pyexpat can break up character data into multiple -# callbacks even though we set the buffer_text attribute on the -# parser. This also gives us the advantage that we don't need a -# separate normalization pass. -# -# - Determining that a node exists is done using an identity comparison -# with None rather than a truth test; this avoids searching for and -# calling any methods on the node object if it exists. (A rather -# nice speedup is achieved this way as well!) - -from xml.dom import xmlbuilder, minidom, Node -from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE -from xml.parsers import expat -from xml.dom.minidom import _append_child, _set_attribute_node -from xml.dom.NodeFilter import NodeFilter - -from xml.dom.minicompat import * - -TEXT_NODE = Node.TEXT_NODE -CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE -DOCUMENT_NODE = Node.DOCUMENT_NODE - -FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT -FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT -FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP -FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT - -theDOMImplementation = minidom.getDOMImplementation() - -# Expat typename -> TypeInfo -_typeinfo_map = { - "CDATA": minidom.TypeInfo(None, "cdata"), - "ENUM": minidom.TypeInfo(None, "enumeration"), - "ENTITY": minidom.TypeInfo(None, "entity"), - "ENTITIES": minidom.TypeInfo(None, "entities"), - "ID": minidom.TypeInfo(None, "id"), - "IDREF": minidom.TypeInfo(None, "idref"), - "IDREFS": minidom.TypeInfo(None, "idrefs"), - "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), - "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), - } - -class ElementInfo(NewStyle): - __slots__ = '_attr_info', '_model', 'tagName' - - def __init__(self, tagName, model=None): - self.tagName = tagName - self._attr_info = [] - self._model = model - - def __getstate__(self): - return self._attr_info, self._model, self.tagName - - def __setstate__(self, state): - self._attr_info, self._model, self.tagName = state - - def getAttributeType(self, aname): - for info in self._attr_info: - if info[1] == aname: - t = info[-2] - if t[0] == "(": - return _typeinfo_map["ENUM"] - else: - return _typeinfo_map[info[-2]] - return minidom._no_type - - def getAttributeTypeNS(self, namespaceURI, localName): - return minidom._no_type - - def isElementContent(self): - if self._model: - type = self._model[0] - return type not in (expat.model.XML_CTYPE_ANY, - expat.model.XML_CTYPE_MIXED) - else: - return False - - def isEmpty(self): - if self._model: - return self._model[0] == expat.model.XML_CTYPE_EMPTY - else: - return False - - def isId(self, aname): - for info in self._attr_info: - if info[1] == aname: - return info[-2] == "ID" - return False - - def isIdNS(self, euri, ename, auri, aname): - # not sure this is meaningful - return self.isId((auri, aname)) - -def _intern(builder, s): - return builder._intern_setdefault(s, s) - -def _parse_ns_name(builder, name): - assert ' ' in name - parts = name.split(' ') - intern = builder._intern_setdefault - if len(parts) == 3: - uri, localname, prefix = parts - prefix = intern(prefix, prefix) - qname = "%s:%s" % (prefix, localname) - qname = intern(qname, qname) - localname = intern(localname, localname) - else: - uri, localname = parts - prefix = EMPTY_PREFIX - qname = localname = intern(localname, localname) - return intern(uri, uri), localname, prefix, qname - - -class ExpatBuilder: - """Document builder that uses Expat to build a ParsedXML.DOM document - instance.""" - - def __init__(self, options=None): - if options is None: - options = xmlbuilder.Options() - self._options = options - if self._options.filter is not None: - self._filter = FilterVisibilityController(self._options.filter) - else: - self._filter = None - # This *really* doesn't do anything in this case, so - # override it with something fast & minimal. - self._finish_start_element = id - self._parser = None - self.reset() - - def createParser(self): - """Create a new parser object.""" - return expat.ParserCreate() - - def getParser(self): - """Return the parser object, creating a new one if needed.""" - if not self._parser: - self._parser = self.createParser() - self._intern_setdefault = self._parser.intern.setdefault - self._parser.buffer_text = True - self._parser.ordered_attributes = True - self._parser.specified_attributes = True - self.install(self._parser) - return self._parser - - def reset(self): - """Free all data structures used during DOM construction.""" - self.document = theDOMImplementation.createDocument( - EMPTY_NAMESPACE, None, None) - self.curNode = self.document - self._elem_info = self.document._elem_info - self._cdata = False - - def install(self, parser): - """Install the callbacks needed to build the DOM into the parser.""" - # This creates circular references! - parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler - parser.StartElementHandler = self.first_element_handler - parser.EndElementHandler = self.end_element_handler - parser.ProcessingInstructionHandler = self.pi_handler - if self._options.entities: - parser.EntityDeclHandler = self.entity_decl_handler - parser.NotationDeclHandler = self.notation_decl_handler - if self._options.comments: - parser.CommentHandler = self.comment_handler - if self._options.cdata_sections: - parser.StartCdataSectionHandler = self.start_cdata_section_handler - parser.EndCdataSectionHandler = self.end_cdata_section_handler - parser.CharacterDataHandler = self.character_data_handler_cdata - else: - parser.CharacterDataHandler = self.character_data_handler - parser.ExternalEntityRefHandler = self.external_entity_ref_handler - parser.XmlDeclHandler = self.xml_decl_handler - parser.ElementDeclHandler = self.element_decl_handler - parser.AttlistDeclHandler = self.attlist_decl_handler - - def parseFile(self, file): - """Parse a document from a file object, returning the document - node.""" - parser = self.getParser() - first_buffer = True - try: - while 1: - buffer = file.read(16*1024) - if not buffer: - break - parser.Parse(buffer, 0) - if first_buffer and self.document.documentElement: - self._setup_subset(buffer) - first_buffer = False - parser.Parse("", True) - except ParseEscape: - pass - doc = self.document - self.reset() - self._parser = None - return doc - - def parseString(self, string): - """Parse a document from a string, returning the document node.""" - parser = self.getParser() - try: - parser.Parse(string, True) - self._setup_subset(string) - except ParseEscape: - pass - doc = self.document - self.reset() - self._parser = None - return doc - - def _setup_subset(self, buffer): - """Load the internal subset if there might be one.""" - if self.document.doctype: - extractor = InternalSubsetExtractor() - extractor.parseString(buffer) - subset = extractor.getSubset() - self.document.doctype.internalSubset = subset - - def start_doctype_decl_handler(self, doctypeName, systemId, publicId, - has_internal_subset): - doctype = self.document.implementation.createDocumentType( - doctypeName, publicId, systemId) - doctype.ownerDocument = self.document - self.document.childNodes.append(doctype) - self.document.doctype = doctype - if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: - self.document.doctype = None - del self.document.childNodes[-1] - doctype = None - self._parser.EntityDeclHandler = None - self._parser.NotationDeclHandler = None - if has_internal_subset: - if doctype is not None: - doctype.entities._seq = [] - doctype.notations._seq = [] - self._parser.CommentHandler = None - self._parser.ProcessingInstructionHandler = None - self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler - - def end_doctype_decl_handler(self): - if self._options.comments: - self._parser.CommentHandler = self.comment_handler - self._parser.ProcessingInstructionHandler = self.pi_handler - if not (self._elem_info or self._filter): - self._finish_end_element = id - - def pi_handler(self, target, data): - node = self.document.createProcessingInstruction(target, data) - _append_child(self.curNode, node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - self.curNode.removeChild(node) - - def character_data_handler_cdata(self, data): - childNodes = self.curNode.childNodes - if self._cdata: - if ( self._cdata_continue - and childNodes[-1].nodeType == CDATA_SECTION_NODE): - childNodes[-1].appendData(data) - return - node = self.document.createCDATASection(data) - self._cdata_continue = True - elif childNodes and childNodes[-1].nodeType == TEXT_NODE: - node = childNodes[-1] - value = node.data + data - d = node.__dict__ - d['data'] = d['nodeValue'] = value - return - else: - node = minidom.Text() - d = node.__dict__ - d['data'] = d['nodeValue'] = data - d['ownerDocument'] = self.document - _append_child(self.curNode, node) - - def character_data_handler(self, data): - childNodes = self.curNode.childNodes - if childNodes and childNodes[-1].nodeType == TEXT_NODE: - node = childNodes[-1] - d = node.__dict__ - d['data'] = d['nodeValue'] = node.data + data - return - node = minidom.Text() - d = node.__dict__ - d['data'] = d['nodeValue'] = node.data + data - d['ownerDocument'] = self.document - _append_child(self.curNode, node) - - def entity_decl_handler(self, entityName, is_parameter_entity, value, - base, systemId, publicId, notationName): - if is_parameter_entity: - # we don't care about parameter entities for the DOM - return - if not self._options.entities: - return - node = self.document._create_entity(entityName, publicId, - systemId, notationName) - if value is not None: - # internal entity - # node *should* be readonly, but we'll cheat - child = self.document.createTextNode(value) - node.childNodes.append(child) - self.document.doctype.entities._seq.append(node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - del self.document.doctype.entities._seq[-1] - - def notation_decl_handler(self, notationName, base, systemId, publicId): - node = self.document._create_notation(notationName, publicId, systemId) - self.document.doctype.notations._seq.append(node) - if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: - del self.document.doctype.notations._seq[-1] - - def comment_handler(self, data): - node = self.document.createComment(data) - _append_child(self.curNode, node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - self.curNode.removeChild(node) - - def start_cdata_section_handler(self): - self._cdata = True - self._cdata_continue = False - - def end_cdata_section_handler(self): - self._cdata = False - self._cdata_continue = False - - def external_entity_ref_handler(self, context, base, systemId, publicId): - return 1 - - def first_element_handler(self, name, attributes): - if self._filter is None and not self._elem_info: - self._finish_end_element = id - self.getParser().StartElementHandler = self.start_element_handler - self.start_element_handler(name, attributes) - - def start_element_handler(self, name, attributes): - node = self.document.createElement(name) - _append_child(self.curNode, node) - self.curNode = node - - if attributes: - for i in range(0, len(attributes), 2): - a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, - None, EMPTY_PREFIX) - value = attributes[i+1] - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = value - d = a.__dict__ - d['value'] = d['nodeValue'] = value - d['ownerDocument'] = self.document - _set_attribute_node(node, a) - - if node is not self.document.documentElement: - self._finish_start_element(node) - - def _finish_start_element(self, node): - if self._filter: - # To be general, we'd have to call isSameNode(), but this - # is sufficient for minidom: - if node is self.document.documentElement: - return - filt = self._filter.startContainer(node) - if filt == FILTER_REJECT: - # ignore this node & all descendents - Rejecter(self) - elif filt == FILTER_SKIP: - # ignore this node, but make it's children become - # children of the parent node - Skipper(self) - else: - return - self.curNode = node.parentNode - node.parentNode.removeChild(node) - node.unlink() - - # If this ever changes, Namespaces.end_element_handler() needs to - # be changed to match. - # - def end_element_handler(self, name): - curNode = self.curNode - self.curNode = curNode.parentNode - self._finish_end_element(curNode) - - def _finish_end_element(self, curNode): - info = self._elem_info.get(curNode.tagName) - if info: - self._handle_white_text_nodes(curNode, info) - if self._filter: - if curNode is self.document.documentElement: - return - if self._filter.acceptNode(curNode) == FILTER_REJECT: - self.curNode.removeChild(curNode) - curNode.unlink() - - def _handle_white_text_nodes(self, node, info): - if (self._options.whitespace_in_element_content - or not info.isElementContent()): - return - - # We have element type information and should remove ignorable - # whitespace; identify for text nodes which contain only - # whitespace. - L = [] - for child in node.childNodes: - if child.nodeType == TEXT_NODE and not child.data.strip(): - L.append(child) - - # Remove ignorable whitespace from the tree. - for child in L: - node.removeChild(child) - - def element_decl_handler(self, name, model): - info = self._elem_info.get(name) - if info is None: - self._elem_info[name] = ElementInfo(name, model) - else: - assert info._model is None - info._model = model - - def attlist_decl_handler(self, elem, name, type, default, required): - info = self._elem_info.get(elem) - if info is None: - info = ElementInfo(elem) - self._elem_info[elem] = info - info._attr_info.append( - [None, name, None, None, default, 0, type, required]) - - def xml_decl_handler(self, version, encoding, standalone): - self.document.version = version - self.document.encoding = encoding - # This is still a little ugly, thanks to the pyexpat API. ;-( - if standalone >= 0: - if standalone: - self.document.standalone = True - else: - self.document.standalone = False - - -# Don't include FILTER_INTERRUPT, since that's checked separately -# where allowed. -_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) - -class FilterVisibilityController(NewStyle): - """Wrapper around a DOMBuilderFilter which implements the checks - to make the whatToShow filter attribute work.""" - - __slots__ = 'filter', - - def __init__(self, filter): - self.filter = filter - - def startContainer(self, node): - mask = self._nodetype_mask[node.nodeType] - if self.filter.whatToShow & mask: - val = self.filter.startContainer(node) - if val == FILTER_INTERRUPT: - raise ParseEscape - if val not in _ALLOWED_FILTER_RETURNS: - raise ValueError, \ - "startContainer() returned illegal value: " + repr(val) - return val - else: - return FILTER_ACCEPT - - def acceptNode(self, node): - mask = self._nodetype_mask[node.nodeType] - if self.filter.whatToShow & mask: - val = self.filter.acceptNode(node) - if val == FILTER_INTERRUPT: - raise ParseEscape - if val == FILTER_SKIP: - # move all child nodes to the parent, and remove this node - parent = node.parentNode - for child in node.childNodes[:]: - parent.appendChild(child) - # node is handled by the caller - return FILTER_REJECT - if val not in _ALLOWED_FILTER_RETURNS: - raise ValueError, \ - "acceptNode() returned illegal value: " + repr(val) - return val - else: - return FILTER_ACCEPT - - _nodetype_mask = { - Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, - Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, - Node.TEXT_NODE: NodeFilter.SHOW_TEXT, - Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, - Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, - Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, - Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, - Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, - Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, - Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, - Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, - Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, - } - - -class FilterCrutch(NewStyle): - __slots__ = '_builder', '_level', '_old_start', '_old_end' - - def __init__(self, builder): - self._level = 0 - self._builder = builder - parser = builder._parser - self._old_start = parser.StartElementHandler - self._old_end = parser.EndElementHandler - parser.StartElementHandler = self.start_element_handler - parser.EndElementHandler = self.end_element_handler - -class Rejecter(FilterCrutch): - __slots__ = () - - def __init__(self, builder): - FilterCrutch.__init__(self, builder) - parser = builder._parser - for name in ("ProcessingInstructionHandler", - "CommentHandler", - "CharacterDataHandler", - "StartCdataSectionHandler", - "EndCdataSectionHandler", - "ExternalEntityRefHandler", - ): - setattr(parser, name, None) - - def start_element_handler(self, *args): - self._level = self._level + 1 - - def end_element_handler(self, *args): - if self._level == 0: - # restore the old handlers - parser = self._builder._parser - self._builder.install(parser) - parser.StartElementHandler = self._old_start - parser.EndElementHandler = self._old_end - else: - self._level = self._level - 1 - -class Skipper(FilterCrutch): - __slots__ = () - - def start_element_handler(self, *args): - node = self._builder.curNode - self._old_start(*args) - if self._builder.curNode is not node: - self._level = self._level + 1 - - def end_element_handler(self, *args): - if self._level == 0: - # We're popping back out of the node we're skipping, so we - # shouldn't need to do anything but reset the handlers. - self._builder._parser.StartElementHandler = self._old_start - self._builder._parser.EndElementHandler = self._old_end - self._builder = None - else: - self._level = self._level - 1 - self._old_end(*args) - - -# framework document used by the fragment builder. -# Takes a string for the doctype, subset string, and namespace attrs string. - -_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ - "http://xml.python.org/entities/fragment-builder/internal" - -_FRAGMENT_BUILDER_TEMPLATE = ( - '''\ -<!DOCTYPE wrapper - %%s [ - <!ENTITY fragment-builder-internal - SYSTEM "%s"> -%%s -]> -<wrapper %%s ->&fragment-builder-internal;</wrapper>''' - % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) - - -class FragmentBuilder(ExpatBuilder): - """Builder which constructs document fragments given XML source - text and a context node. - - The context node is expected to provide information about the - namespace declarations which are in scope at the start of the - fragment. - """ - - def __init__(self, context, options=None): - if context.nodeType == DOCUMENT_NODE: - self.originalDocument = context - self.context = context - else: - self.originalDocument = context.ownerDocument - self.context = context - ExpatBuilder.__init__(self, options) - - def reset(self): - ExpatBuilder.reset(self) - self.fragment = None - - def parseFile(self, file): - """Parse a document fragment from a file object, returning the - fragment node.""" - return self.parseString(file.read()) - - def parseString(self, string): - """Parse a document fragment from a string, returning the - fragment node.""" - self._source = string - parser = self.getParser() - doctype = self.originalDocument.doctype - ident = "" - if doctype: - subset = doctype.internalSubset or self._getDeclarations() - if doctype.publicId: - ident = ('PUBLIC "%s" "%s"' - % (doctype.publicId, doctype.systemId)) - elif doctype.systemId: - ident = 'SYSTEM "%s"' % doctype.systemId - else: - subset = "" - nsattrs = self._getNSattrs() # get ns decls from node's ancestors - document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) - try: - parser.Parse(document, 1) - except: - self.reset() - raise - fragment = self.fragment - self.reset() -## self._parser = None - return fragment - - def _getDeclarations(self): - """Re-create the internal subset from the DocumentType node. - - This is only needed if we don't already have the - internalSubset as a string. - """ - doctype = self.context.ownerDocument.doctype - s = "" - if doctype: - for i in range(doctype.notations.length): - notation = doctype.notations.item(i) - if s: - s = s + "\n " - s = "%s<!NOTATION %s" % (s, notation.nodeName) - if notation.publicId: - s = '%s PUBLIC "%s"\n "%s">' \ - % (s, notation.publicId, notation.systemId) - else: - s = '%s SYSTEM "%s">' % (s, notation.systemId) - for i in range(doctype.entities.length): - entity = doctype.entities.item(i) - if s: - s = s + "\n " - s = "%s<!ENTITY %s" % (s, entity.nodeName) - if entity.publicId: - s = '%s PUBLIC "%s"\n "%s"' \ - % (s, entity.publicId, entity.systemId) - elif entity.systemId: - s = '%s SYSTEM "%s"' % (s, entity.systemId) - else: - s = '%s "%s"' % (s, entity.firstChild.data) - if entity.notationName: - s = "%s NOTATION %s" % (s, entity.notationName) - s = s + ">" - return s - - def _getNSattrs(self): - return "" - - def external_entity_ref_handler(self, context, base, systemId, publicId): - if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: - # this entref is the one that we made to put the subtree - # in; all of our given input is parsed in here. - old_document = self.document - old_cur_node = self.curNode - parser = self._parser.ExternalEntityParserCreate(context) - # put the real document back, parse into the fragment to return - self.document = self.originalDocument - self.fragment = self.document.createDocumentFragment() - self.curNode = self.fragment - try: - parser.Parse(self._source, 1) - finally: - self.curNode = old_cur_node - self.document = old_document - self._source = None - return -1 - else: - return ExpatBuilder.external_entity_ref_handler( - self, context, base, systemId, publicId) - - -class Namespaces: - """Mix-in class for builders; adds support for namespaces.""" - - def _initNamespaces(self): - # list of (prefix, uri) ns declarations. Namespace attrs are - # constructed from this and added to the element's attrs. - self._ns_ordered_prefixes = [] - - def createParser(self): - """Create a new namespace-handling parser.""" - parser = expat.ParserCreate(namespace_separator=" ") - parser.namespace_prefixes = True - return parser - - def install(self, parser): - """Insert the namespace-handlers onto the parser.""" - ExpatBuilder.install(self, parser) - if self._options.namespace_declarations: - parser.StartNamespaceDeclHandler = ( - self.start_namespace_decl_handler) - - def start_namespace_decl_handler(self, prefix, uri): - """Push this namespace declaration on our storage.""" - self._ns_ordered_prefixes.append((prefix, uri)) - - def start_element_handler(self, name, attributes): - if ' ' in name: - uri, localname, prefix, qname = _parse_ns_name(self, name) - else: - uri = EMPTY_NAMESPACE - qname = name - localname = None - prefix = EMPTY_PREFIX - node = minidom.Element(qname, uri, prefix, localname) - node.ownerDocument = self.document - _append_child(self.curNode, node) - self.curNode = node - - if self._ns_ordered_prefixes: - for prefix, uri in self._ns_ordered_prefixes: - if prefix: - a = minidom.Attr(_intern(self, 'xmlns:' + prefix), - XMLNS_NAMESPACE, prefix, "xmlns") - else: - a = minidom.Attr("xmlns", XMLNS_NAMESPACE, - "xmlns", EMPTY_PREFIX) - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = uri - d = a.__dict__ - d['value'] = d['nodeValue'] = uri - d['ownerDocument'] = self.document - _set_attribute_node(node, a) - del self._ns_ordered_prefixes[:] - - if attributes: - _attrs = node._attrs - _attrsNS = node._attrsNS - for i in range(0, len(attributes), 2): - aname = attributes[i] - value = attributes[i+1] - if ' ' in aname: - uri, localname, prefix, qname = _parse_ns_name(self, aname) - a = minidom.Attr(qname, uri, localname, prefix) - _attrs[qname] = a - _attrsNS[(uri, localname)] = a - else: - a = minidom.Attr(aname, EMPTY_NAMESPACE, - aname, EMPTY_PREFIX) - _attrs[aname] = a - _attrsNS[(EMPTY_NAMESPACE, aname)] = a - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = value - d = a.__dict__ - d['ownerDocument'] = self.document - d['value'] = d['nodeValue'] = value - d['ownerElement'] = node - - if __debug__: - # This only adds some asserts to the original - # end_element_handler(), so we only define this when -O is not - # used. If changing one, be sure to check the other to see if - # it needs to be changed as well. - # - def end_element_handler(self, name): - curNode = self.curNode - if ' ' in name: - uri, localname, prefix, qname = _parse_ns_name(self, name) - assert (curNode.namespaceURI == uri - and curNode.localName == localname - and curNode.prefix == prefix), \ - "element stack messed up! (namespace)" - else: - assert curNode.nodeName == name, \ - "element stack messed up - bad nodeName" - assert curNode.namespaceURI == EMPTY_NAMESPACE, \ - "element stack messed up - bad namespaceURI" - self.curNode = curNode.parentNode - self._finish_end_element(curNode) - - -class ExpatBuilderNS(Namespaces, ExpatBuilder): - """Document builder that supports namespaces.""" - - def reset(self): - ExpatBuilder.reset(self) - self._initNamespaces() - - -class FragmentBuilderNS(Namespaces, FragmentBuilder): - """Fragment builder that supports namespaces.""" - - def reset(self): - FragmentBuilder.reset(self) - self._initNamespaces() - - def _getNSattrs(self): - """Return string of namespace attributes from this element and - ancestors.""" - # XXX This needs to be re-written to walk the ancestors of the - # context to build up the namespace information from - # declarations, elements, and attributes found in context. - # Otherwise we have to store a bunch more data on the DOM - # (though that *might* be more reliable -- not clear). - attrs = "" - context = self.context - L = [] - while context: - if hasattr(context, '_ns_prefix_uri'): - for prefix, uri in context._ns_prefix_uri.items(): - # add every new NS decl from context to L and attrs string - if prefix in L: - continue - L.append(prefix) - if prefix: - declname = "xmlns:" + prefix - else: - declname = "xmlns" - if attrs: - attrs = "%s\n %s='%s'" % (attrs, declname, uri) - else: - attrs = " %s='%s'" % (declname, uri) - context = context.parentNode - return attrs - - -class ParseEscape(Exception): - """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" - pass - -class InternalSubsetExtractor(ExpatBuilder): - """XML processor which can rip out the internal document type subset.""" - - subset = None - - def getSubset(self): - """Return the internal subset as a string.""" - return self.subset - - def parseFile(self, file): - try: - ExpatBuilder.parseFile(self, file) - except ParseEscape: - pass - - def parseString(self, string): - try: - ExpatBuilder.parseString(self, string) - except ParseEscape: - pass - - def install(self, parser): - parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler - parser.StartElementHandler = self.start_element_handler - - def start_doctype_decl_handler(self, name, publicId, systemId, - has_internal_subset): - if has_internal_subset: - parser = self.getParser() - self.subset = [] - parser.DefaultHandler = self.subset.append - parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler - else: - raise ParseEscape() - - def end_doctype_decl_handler(self): - s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') - self.subset = s - raise ParseEscape() - - def start_element_handler(self, name, attrs): - raise ParseEscape() - - -def parse(file, namespaces=1): - """Parse a document, returning the resulting Document node. - - 'file' may be either a file name or an open file object. - """ - if namespaces: - builder = ExpatBuilderNS() - else: - builder = ExpatBuilder() - - if isinstance(file, StringTypes): - fp = open(file, 'rb') - try: - result = builder.parseFile(fp) - finally: - fp.close() - else: - result = builder.parseFile(file) - return result - - -def parseString(string, namespaces=1): - """Parse a document from a string, returning the resulting - Document node. - """ - if namespaces: - builder = ExpatBuilderNS() - else: - builder = ExpatBuilder() - return builder.parseString(string) - - -def parseFragment(file, context, namespaces=1): - """Parse a fragment of a document, given the context from which it - was originally extracted. context should be the parent of the - node(s) which are in the fragment. - - 'file' may be either a file name or an open file object. - """ - if namespaces: - builder = FragmentBuilderNS(context) - else: - builder = FragmentBuilder(context) - - if isinstance(file, StringTypes): - fp = open(file, 'rb') - try: - result = builder.parseFile(fp) - finally: - fp.close() - else: - result = builder.parseFile(file) - return result - - -def parseFragmentString(string, context, namespaces=1): - """Parse a fragment of a document from a string, given the context - from which it was originally extracted. context should be the - parent of the node(s) which are in the fragment. - """ - if namespaces: - builder = FragmentBuilderNS(context) - else: - builder = FragmentBuilder(context) - return builder.parseString(string) - - -def makeBuilder(options): - """Create a builder based on an Options object.""" - if options.namespaces: - return ExpatBuilderNS(options) - else: - return ExpatBuilder(options) diff --git a/Lib/xml/dom/minicompat.py b/Lib/xml/dom/minicompat.py deleted file mode 100644 index 9f2f8f7..0000000 --- a/Lib/xml/dom/minicompat.py +++ /dev/null @@ -1,184 +0,0 @@ -"""Python version compatibility support for minidom.""" - -# This module should only be imported using "import *". -# -# The following names are defined: -# -# isinstance -- version of the isinstance() function that accepts -# tuples as the second parameter regardless of the -# Python version -# -# NodeList -- lightest possible NodeList implementation -# -# EmptyNodeList -- lightest possible NodeList that is guarateed to -# remain empty (immutable) -# -# StringTypes -- tuple of defined string types -# -# GetattrMagic -- base class used to make _get_<attr> be magically -# invoked when available -# defproperty -- function used in conjunction with GetattrMagic; -# using these together is needed to make them work -# as efficiently as possible in both Python 2.2+ -# and older versions. For example: -# -# class MyClass(GetattrMagic): -# def _get_myattr(self): -# return something -# -# defproperty(MyClass, "myattr", -# "return some value") -# -# For Python 2.2 and newer, this will construct a -# property object on the class, which avoids -# needing to override __getattr__(). It will only -# work for read-only attributes. -# -# For older versions of Python, inheriting from -# GetattrMagic will use the traditional -# __getattr__() hackery to achieve the same effect, -# but less efficiently. -# -# defproperty() should be used for each version of -# the relevant _get_<property>() function. -# -# NewStyle -- base class to cause __slots__ to be honored in -# the new world -# -# True, False -- only for Python 2.2 and earlier - -__all__ = ["NodeList", "EmptyNodeList", "NewStyle", - "StringTypes", "defproperty", "GetattrMagic"] - -import xml.dom - -try: - unicode -except NameError: - StringTypes = type(''), -else: - StringTypes = type(''), type(unicode('')) - - -# define True and False only if not defined as built-ins -try: - True -except NameError: - True = 1 - False = 0 - __all__.extend(["True", "False"]) - - -try: - isinstance('', StringTypes) -except TypeError: - # - # Wrap isinstance() to make it compatible with the version in - # Python 2.2 and newer. - # - _isinstance = isinstance - def isinstance(obj, type_or_seq): - try: - return _isinstance(obj, type_or_seq) - except TypeError: - for t in type_or_seq: - if _isinstance(obj, t): - return 1 - return 0 - __all__.append("isinstance") - - -if list is type([]): - class NodeList(list): - __slots__ = () - - def item(self, index): - if 0 <= index < len(self): - return self[index] - - def _get_length(self): - return len(self) - - def _set_length(self, value): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute 'length'") - - length = property(_get_length, _set_length, - doc="The number of nodes in the NodeList.") - - def __getstate__(self): - return list(self) - - def __setstate__(self, state): - self[:] = state - - class EmptyNodeList(tuple): - __slots__ = () - - def __add__(self, other): - NL = NodeList() - NL.extend(other) - return NL - - def __radd__(self, other): - NL = NodeList() - NL.extend(other) - return NL - - def item(self, index): - return None - - def _get_length(self): - return 0 - - def _set_length(self, value): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute 'length'") - - length = property(_get_length, _set_length, - doc="The number of nodes in the NodeList.") - -else: - def NodeList(): - return [] - - def EmptyNodeList(): - return [] - - -try: - property -except NameError: - def defproperty(klass, name, doc): - # taken care of by the base __getattr__() - pass - - class GetattrMagic: - def __getattr__(self, key): - if key.startswith("_"): - raise AttributeError, key - - try: - get = getattr(self, "_get_" + key) - except AttributeError: - raise AttributeError, key - return get() - - class NewStyle: - pass - -else: - def defproperty(klass, name, doc): - get = getattr(klass, ("_get_" + name)).im_func - def set(self, value, name=name): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute " + repr(name)) - assert not hasattr(klass, "_set_" + name), \ - "expected not to find _set_" + name - prop = property(get, set, doc=doc) - setattr(klass, name, prop) - - class GetattrMagic: - pass - - NewStyle = object diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py deleted file mode 100644 index 84be99b..0000000 --- a/Lib/xml/dom/minidom.py +++ /dev/null @@ -1,1938 +0,0 @@ -"""\ -minidom.py -- a lightweight DOM implementation. - -parse("foo.xml") - -parseString("<foo><bar/></foo>") - -Todo: -===== - * convenience methods for getting elements and text. - * more testing - * bring some of the writer and linearizer code into conformance with this - interface - * SAX 2 namespaces -""" - -import xml.dom - -from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg -from xml.dom.minicompat import * -from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS - -_TupleType = type(()) - -# This is used by the ID-cache invalidation checks; the list isn't -# actually complete, since the nodes being checked will never be the -# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is -# the node being added or removed, not the node being modified.) -# -_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, - xml.dom.Node.ENTITY_REFERENCE_NODE) - - -class Node(xml.dom.Node, GetattrMagic): - namespaceURI = None # this is non-null only for elements and attributes - parentNode = None - ownerDocument = None - nextSibling = None - previousSibling = None - - prefix = EMPTY_PREFIX # non-null only for NS elements and attributes - - def __nonzero__(self): - return True - - def toxml(self, encoding = None): - return self.toprettyxml("", "", encoding) - - def toprettyxml(self, indent="\t", newl="\n", encoding = None): - # indent = the indentation string to prepend, per level - # newl = the newline string to append - writer = _get_StringIO() - if encoding is not None: - import codecs - # Can't use codecs.getwriter to preserve 2.0 compatibility - writer = codecs.lookup(encoding)[3](writer) - if self.nodeType == Node.DOCUMENT_NODE: - # Can pass encoding only to document, to put it into XML header - self.writexml(writer, "", indent, newl, encoding) - else: - self.writexml(writer, "", indent, newl) - return writer.getvalue() - - def hasChildNodes(self): - if self.childNodes: - return True - else: - return False - - def _get_childNodes(self): - return self.childNodes - - def _get_firstChild(self): - if self.childNodes: - return self.childNodes[0] - - def _get_lastChild(self): - if self.childNodes: - return self.childNodes[-1] - - def insertBefore(self, newChild, refChild): - if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: - for c in tuple(newChild.childNodes): - self.insertBefore(c, refChild) - ### The DOM does not clearly specify what to return in this case - return newChild - if newChild.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(newChild), repr(self))) - if newChild.parentNode is not None: - newChild.parentNode.removeChild(newChild) - if refChild is None: - self.appendChild(newChild) - else: - try: - index = self.childNodes.index(refChild) - except ValueError: - raise xml.dom.NotFoundErr() - if newChild.nodeType in _nodeTypes_with_children: - _clear_id_cache(self) - self.childNodes.insert(index, newChild) - newChild.nextSibling = refChild - refChild.previousSibling = newChild - if index: - node = self.childNodes[index-1] - node.nextSibling = newChild - newChild.previousSibling = node - else: - newChild.previousSibling = None - newChild.parentNode = self - return newChild - - def appendChild(self, node): - if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: - for c in tuple(node.childNodes): - self.appendChild(c) - ### The DOM does not clearly specify what to return in this case - return node - if node.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(node), repr(self))) - elif node.nodeType in _nodeTypes_with_children: - _clear_id_cache(self) - if node.parentNode is not None: - node.parentNode.removeChild(node) - _append_child(self, node) - node.nextSibling = None - return node - - def replaceChild(self, newChild, oldChild): - if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: - refChild = oldChild.nextSibling - self.removeChild(oldChild) - return self.insertBefore(newChild, refChild) - if newChild.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(newChild), repr(self))) - if newChild is oldChild: - return - if newChild.parentNode is not None: - newChild.parentNode.removeChild(newChild) - try: - index = self.childNodes.index(oldChild) - except ValueError: - raise xml.dom.NotFoundErr() - self.childNodes[index] = newChild - newChild.parentNode = self - oldChild.parentNode = None - if (newChild.nodeType in _nodeTypes_with_children - or oldChild.nodeType in _nodeTypes_with_children): - _clear_id_cache(self) - newChild.nextSibling = oldChild.nextSibling - newChild.previousSibling = oldChild.previousSibling - oldChild.nextSibling = None - oldChild.previousSibling = None - if newChild.previousSibling: - newChild.previousSibling.nextSibling = newChild - if newChild.nextSibling: - newChild.nextSibling.previousSibling = newChild - return oldChild - - def removeChild(self, oldChild): - try: - self.childNodes.remove(oldChild) - except ValueError: - raise xml.dom.NotFoundErr() - if oldChild.nextSibling is not None: - oldChild.nextSibling.previousSibling = oldChild.previousSibling - if oldChild.previousSibling is not None: - oldChild.previousSibling.nextSibling = oldChild.nextSibling - oldChild.nextSibling = oldChild.previousSibling = None - if oldChild.nodeType in _nodeTypes_with_children: - _clear_id_cache(self) - - oldChild.parentNode = None - return oldChild - - def normalize(self): - L = [] - for child in self.childNodes: - if child.nodeType == Node.TEXT_NODE: - data = child.data - if data and L and L[-1].nodeType == child.nodeType: - # collapse text node - node = L[-1] - node.data = node.data + child.data - node.nextSibling = child.nextSibling - child.unlink() - elif data: - if L: - L[-1].nextSibling = child - child.previousSibling = L[-1] - else: - child.previousSibling = None - L.append(child) - else: - # empty text node; discard - child.unlink() - else: - if L: - L[-1].nextSibling = child - child.previousSibling = L[-1] - else: - child.previousSibling = None - L.append(child) - if child.nodeType == Node.ELEMENT_NODE: - child.normalize() - self.childNodes[:] = L - - def cloneNode(self, deep): - return _clone_node(self, deep, self.ownerDocument or self) - - def isSupported(self, feature, version): - return self.ownerDocument.implementation.hasFeature(feature, version) - - def _get_localName(self): - # Overridden in Element and Attr where localName can be Non-Null - return None - - # Node interfaces from Level 3 (WD 9 April 2002) - - def isSameNode(self, other): - return self is other - - def getInterface(self, feature): - if self.isSupported(feature, None): - return self - else: - return None - - # The "user data" functions use a dictionary that is only present - # if some user data has been set, so be careful not to assume it - # exists. - - def getUserData(self, key): - try: - return self._user_data[key][0] - except (AttributeError, KeyError): - return None - - def setUserData(self, key, data, handler): - old = None - try: - d = self._user_data - except AttributeError: - d = {} - self._user_data = d - if d.has_key(key): - old = d[key][0] - if data is None: - # ignore handlers passed for None - handler = None - if old is not None: - del d[key] - else: - d[key] = (data, handler) - return old - - def _call_user_data_handler(self, operation, src, dst): - if hasattr(self, "_user_data"): - for key, (data, handler) in self._user_data.items(): - if handler is not None: - handler.handle(operation, key, data, src, dst) - - # minidom-specific API: - - def unlink(self): - self.parentNode = self.ownerDocument = None - if self.childNodes: - for child in self.childNodes: - child.unlink() - self.childNodes = NodeList() - self.previousSibling = None - self.nextSibling = None - -defproperty(Node, "firstChild", doc="First child node, or None.") -defproperty(Node, "lastChild", doc="Last child node, or None.") -defproperty(Node, "localName", doc="Namespace-local name of this node.") - - -def _append_child(self, node): - # fast path with less checks; usable by DOM builders if careful - childNodes = self.childNodes - if childNodes: - last = childNodes[-1] - node.__dict__["previousSibling"] = last - last.__dict__["nextSibling"] = node - childNodes.append(node) - node.__dict__["parentNode"] = self - -def _in_document(node): - # return True iff node is part of a document tree - while node is not None: - if node.nodeType == Node.DOCUMENT_NODE: - return True - node = node.parentNode - return False - -def _write_data(writer, data): - "Writes datachars to writer." - data = data.replace("&", "&").replace("<", "<") - data = data.replace("\"", """).replace(">", ">") - writer.write(data) - -def _get_elements_by_tagName_helper(parent, name, rc): - for node in parent.childNodes: - if node.nodeType == Node.ELEMENT_NODE and \ - (name == "*" or node.tagName == name): - rc.append(node) - _get_elements_by_tagName_helper(node, name, rc) - return rc - -def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): - for node in parent.childNodes: - if node.nodeType == Node.ELEMENT_NODE: - if ((localName == "*" or node.localName == localName) and - (nsURI == "*" or node.namespaceURI == nsURI)): - rc.append(node) - _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) - return rc - -class DocumentFragment(Node): - nodeType = Node.DOCUMENT_FRAGMENT_NODE - nodeName = "#document-fragment" - nodeValue = None - attributes = None - parentNode = None - _child_node_types = (Node.ELEMENT_NODE, - Node.TEXT_NODE, - Node.CDATA_SECTION_NODE, - Node.ENTITY_REFERENCE_NODE, - Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, - Node.NOTATION_NODE) - - def __init__(self): - self.childNodes = NodeList() - - -class Attr(Node): - nodeType = Node.ATTRIBUTE_NODE - attributes = None - ownerElement = None - specified = False - _is_id = False - - _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) - - def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, - prefix=None): - # skip setattr for performance - d = self.__dict__ - d["nodeName"] = d["name"] = qName - d["namespaceURI"] = namespaceURI - d["prefix"] = prefix - d['childNodes'] = NodeList() - - # Add the single child node that represents the value of the attr - self.childNodes.append(Text()) - - # nodeValue and value are set elsewhere - - def _get_localName(self): - return self.nodeName.split(":", 1)[-1] - - def _get_name(self): - return self.name - - def _get_specified(self): - return self.specified - - def __setattr__(self, name, value): - d = self.__dict__ - if name in ("value", "nodeValue"): - d["value"] = d["nodeValue"] = value - d2 = self.childNodes[0].__dict__ - d2["data"] = d2["nodeValue"] = value - if self.ownerElement is not None: - _clear_id_cache(self.ownerElement) - elif name in ("name", "nodeName"): - d["name"] = d["nodeName"] = value - if self.ownerElement is not None: - _clear_id_cache(self.ownerElement) - else: - d[name] = value - - def _set_prefix(self, prefix): - nsuri = self.namespaceURI - if prefix == "xmlns": - if nsuri and nsuri != XMLNS_NAMESPACE: - raise xml.dom.NamespaceErr( - "illegal use of 'xmlns' prefix for the wrong namespace") - d = self.__dict__ - d['prefix'] = prefix - if prefix is None: - newName = self.localName - else: - newName = "%s:%s" % (prefix, self.localName) - if self.ownerElement: - _clear_id_cache(self.ownerElement) - d['nodeName'] = d['name'] = newName - - def _set_value(self, value): - d = self.__dict__ - d['value'] = d['nodeValue'] = value - if self.ownerElement: - _clear_id_cache(self.ownerElement) - self.childNodes[0].data = value - - def unlink(self): - # This implementation does not call the base implementation - # since most of that is not needed, and the expense of the - # method call is not warranted. We duplicate the removal of - # children, but that's all we needed from the base class. - elem = self.ownerElement - if elem is not None: - del elem._attrs[self.nodeName] - del elem._attrsNS[(self.namespaceURI, self.localName)] - if self._is_id: - self._is_id = False - elem._magic_id_nodes -= 1 - self.ownerDocument._magic_id_count -= 1 - for child in self.childNodes: - child.unlink() - del self.childNodes[:] - - def _get_isId(self): - if self._is_id: - return True - doc = self.ownerDocument - elem = self.ownerElement - if doc is None or elem is None: - return False - - info = doc._get_elem_info(elem) - if info is None: - return False - if self.namespaceURI: - return info.isIdNS(self.namespaceURI, self.localName) - else: - return info.isId(self.nodeName) - - def _get_schemaType(self): - doc = self.ownerDocument - elem = self.ownerElement - if doc is None or elem is None: - return _no_type - - info = doc._get_elem_info(elem) - if info is None: - return _no_type - if self.namespaceURI: - return info.getAttributeTypeNS(self.namespaceURI, self.localName) - else: - return info.getAttributeType(self.nodeName) - -defproperty(Attr, "isId", doc="True if this attribute is an ID.") -defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") -defproperty(Attr, "schemaType", doc="Schema type for this attribute.") - - -class NamedNodeMap(NewStyle, GetattrMagic): - """The attribute list is a transient interface to the underlying - dictionaries. Mutations here will change the underlying element's - dictionary. - - Ordering is imposed artificially and does not reflect the order of - attributes as found in an input document. - """ - - __slots__ = ('_attrs', '_attrsNS', '_ownerElement') - - def __init__(self, attrs, attrsNS, ownerElement): - self._attrs = attrs - self._attrsNS = attrsNS - self._ownerElement = ownerElement - - def _get_length(self): - return len(self._attrs) - - def item(self, index): - try: - return self[self._attrs.keys()[index]] - except IndexError: - return None - - def items(self): - L = [] - for node in self._attrs.values(): - L.append((node.nodeName, node.value)) - return L - - def itemsNS(self): - L = [] - for node in self._attrs.values(): - L.append(((node.namespaceURI, node.localName), node.value)) - return L - - def has_key(self, key): - if isinstance(key, StringTypes): - return self._attrs.has_key(key) - else: - return self._attrsNS.has_key(key) - - def keys(self): - return self._attrs.keys() - - def keysNS(self): - return self._attrsNS.keys() - - def values(self): - return self._attrs.values() - - def get(self, name, value=None): - return self._attrs.get(name, value) - - __len__ = _get_length - - def __cmp__(self, other): - if self._attrs is getattr(other, "_attrs", None): - return 0 - else: - return cmp(id(self), id(other)) - - def __getitem__(self, attname_or_tuple): - if isinstance(attname_or_tuple, _TupleType): - return self._attrsNS[attname_or_tuple] - else: - return self._attrs[attname_or_tuple] - - # same as set - def __setitem__(self, attname, value): - if isinstance(value, StringTypes): - try: - node = self._attrs[attname] - except KeyError: - node = Attr(attname) - node.ownerDocument = self._ownerElement.ownerDocument - self.setNamedItem(node) - node.value = value - else: - if not isinstance(value, Attr): - raise TypeError, "value must be a string or Attr object" - node = value - self.setNamedItem(node) - - def getNamedItem(self, name): - try: - return self._attrs[name] - except KeyError: - return None - - def getNamedItemNS(self, namespaceURI, localName): - try: - return self._attrsNS[(namespaceURI, localName)] - except KeyError: - return None - - def removeNamedItem(self, name): - n = self.getNamedItem(name) - if n is not None: - _clear_id_cache(self._ownerElement) - del self._attrs[n.nodeName] - del self._attrsNS[(n.namespaceURI, n.localName)] - if n.__dict__.has_key('ownerElement'): - n.__dict__['ownerElement'] = None - return n - else: - raise xml.dom.NotFoundErr() - - def removeNamedItemNS(self, namespaceURI, localName): - n = self.getNamedItemNS(namespaceURI, localName) - if n is not None: - _clear_id_cache(self._ownerElement) - del self._attrsNS[(n.namespaceURI, n.localName)] - del self._attrs[n.nodeName] - if n.__dict__.has_key('ownerElement'): - n.__dict__['ownerElement'] = None - return n - else: - raise xml.dom.NotFoundErr() - - def setNamedItem(self, node): - if not isinstance(node, Attr): - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(node), repr(self))) - old = self._attrs.get(node.name) - if old: - old.unlink() - self._attrs[node.name] = node - self._attrsNS[(node.namespaceURI, node.localName)] = node - node.ownerElement = self._ownerElement - _clear_id_cache(node.ownerElement) - return old - - def setNamedItemNS(self, node): - return self.setNamedItem(node) - - def __delitem__(self, attname_or_tuple): - node = self[attname_or_tuple] - _clear_id_cache(node.ownerElement) - node.unlink() - - def __getstate__(self): - return self._attrs, self._attrsNS, self._ownerElement - - def __setstate__(self, state): - self._attrs, self._attrsNS, self._ownerElement = state - -defproperty(NamedNodeMap, "length", - doc="Number of nodes in the NamedNodeMap.") - -AttributeList = NamedNodeMap - - -class TypeInfo(NewStyle): - __slots__ = 'namespace', 'name' - - def __init__(self, namespace, name): - self.namespace = namespace - self.name = name - - def __repr__(self): - if self.namespace: - return "<TypeInfo %r (from %r)>" % (self.name, self.namespace) - else: - return "<TypeInfo %r>" % self.name - - def _get_name(self): - return self.name - - def _get_namespace(self): - return self.namespace - -_no_type = TypeInfo(None, None) - -class Element(Node): - nodeType = Node.ELEMENT_NODE - nodeValue = None - schemaType = _no_type - - _magic_id_nodes = 0 - - _child_node_types = (Node.ELEMENT_NODE, - Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, - Node.TEXT_NODE, - Node.CDATA_SECTION_NODE, - Node.ENTITY_REFERENCE_NODE) - - def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, - localName=None): - self.tagName = self.nodeName = tagName - self.prefix = prefix - self.namespaceURI = namespaceURI - self.childNodes = NodeList() - - self._attrs = {} # attributes are double-indexed: - self._attrsNS = {} # tagName -> Attribute - # URI,localName -> Attribute - # in the future: consider lazy generation - # of attribute objects this is too tricky - # for now because of headaches with - # namespaces. - - def _get_localName(self): - return self.tagName.split(":", 1)[-1] - - def _get_tagName(self): - return self.tagName - - def unlink(self): - for attr in self._attrs.values(): - attr.unlink() - self._attrs = None - self._attrsNS = None - Node.unlink(self) - - def getAttribute(self, attname): - try: - return self._attrs[attname].value - except KeyError: - return "" - - def getAttributeNS(self, namespaceURI, localName): - try: - return self._attrsNS[(namespaceURI, localName)].value - except KeyError: - return "" - - def setAttribute(self, attname, value): - attr = self.getAttributeNode(attname) - if attr is None: - attr = Attr(attname) - # for performance - d = attr.__dict__ - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument - self.setAttributeNode(attr) - elif value != attr.value: - d = attr.__dict__ - d["value"] = d["nodeValue"] = value - if attr.isId: - _clear_id_cache(self) - - def setAttributeNS(self, namespaceURI, qualifiedName, value): - prefix, localname = _nssplit(qualifiedName) - attr = self.getAttributeNodeNS(namespaceURI, localname) - if attr is None: - # for performance - attr = Attr(qualifiedName, namespaceURI, localname, prefix) - d = attr.__dict__ - d["prefix"] = prefix - d["nodeName"] = qualifiedName - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument - self.setAttributeNode(attr) - else: - d = attr.__dict__ - if value != attr.value: - d["value"] = d["nodeValue"] = value - if attr.isId: - _clear_id_cache(self) - if attr.prefix != prefix: - d["prefix"] = prefix - d["nodeName"] = qualifiedName - - def getAttributeNode(self, attrname): - return self._attrs.get(attrname) - - def getAttributeNodeNS(self, namespaceURI, localName): - return self._attrsNS.get((namespaceURI, localName)) - - def setAttributeNode(self, attr): - if attr.ownerElement not in (None, self): - raise xml.dom.InuseAttributeErr("attribute node already owned") - old1 = self._attrs.get(attr.name, None) - if old1 is not None: - self.removeAttributeNode(old1) - old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) - if old2 is not None and old2 is not old1: - self.removeAttributeNode(old2) - _set_attribute_node(self, attr) - - if old1 is not attr: - # It might have already been part of this node, in which case - # it doesn't represent a change, and should not be returned. - return old1 - if old2 is not attr: - return old2 - - setAttributeNodeNS = setAttributeNode - - def removeAttribute(self, name): - try: - attr = self._attrs[name] - except KeyError: - raise xml.dom.NotFoundErr() - self.removeAttributeNode(attr) - - def removeAttributeNS(self, namespaceURI, localName): - try: - attr = self._attrsNS[(namespaceURI, localName)] - except KeyError: - raise xml.dom.NotFoundErr() - self.removeAttributeNode(attr) - - def removeAttributeNode(self, node): - if node is None: - raise xml.dom.NotFoundErr() - try: - self._attrs[node.name] - except KeyError: - raise xml.dom.NotFoundErr() - _clear_id_cache(self) - node.unlink() - # Restore this since the node is still useful and otherwise - # unlinked - node.ownerDocument = self.ownerDocument - - removeAttributeNodeNS = removeAttributeNode - - def hasAttribute(self, name): - return self._attrs.has_key(name) - - def hasAttributeNS(self, namespaceURI, localName): - return self._attrsNS.has_key((namespaceURI, localName)) - - def getElementsByTagName(self, name): - return _get_elements_by_tagName_helper(self, name, NodeList()) - - def getElementsByTagNameNS(self, namespaceURI, localName): - return _get_elements_by_tagName_ns_helper( - self, namespaceURI, localName, NodeList()) - - def __repr__(self): - return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) - - def writexml(self, writer, indent="", addindent="", newl=""): - # indent = current indentation - # addindent = indentation to add to higher levels - # newl = newline string - writer.write(indent+"<" + self.tagName) - - attrs = self._get_attributes() - a_names = attrs.keys() - a_names.sort() - - for a_name in a_names: - writer.write(" %s=\"" % a_name) - _write_data(writer, attrs[a_name].value) - writer.write("\"") - if self.childNodes: - writer.write(">%s"%(newl)) - for node in self.childNodes: - node.writexml(writer,indent+addindent,addindent,newl) - writer.write("%s</%s>%s" % (indent,self.tagName,newl)) - else: - writer.write("/>%s"%(newl)) - - def _get_attributes(self): - return NamedNodeMap(self._attrs, self._attrsNS, self) - - def hasAttributes(self): - if self._attrs: - return True - else: - return False - - # DOM Level 3 attributes, based on the 22 Oct 2002 draft - - def setIdAttribute(self, name): - idAttr = self.getAttributeNode(name) - self.setIdAttributeNode(idAttr) - - def setIdAttributeNS(self, namespaceURI, localName): - idAttr = self.getAttributeNodeNS(namespaceURI, localName) - self.setIdAttributeNode(idAttr) - - def setIdAttributeNode(self, idAttr): - if idAttr is None or not self.isSameNode(idAttr.ownerElement): - raise xml.dom.NotFoundErr() - if _get_containing_entref(self) is not None: - raise xml.dom.NoModificationAllowedErr() - if not idAttr._is_id: - idAttr.__dict__['_is_id'] = True - self._magic_id_nodes += 1 - self.ownerDocument._magic_id_count += 1 - _clear_id_cache(self) - -defproperty(Element, "attributes", - doc="NamedNodeMap of attributes on the element.") -defproperty(Element, "localName", - doc="Namespace-local name of this element.") - - -def _set_attribute_node(element, attr): - _clear_id_cache(element) - element._attrs[attr.name] = attr - element._attrsNS[(attr.namespaceURI, attr.localName)] = attr - - # This creates a circular reference, but Element.unlink() - # breaks the cycle since the references to the attribute - # dictionaries are tossed. - attr.__dict__['ownerElement'] = element - - -class Childless: - """Mixin that makes childless-ness easy to implement and avoids - the complexity of the Node methods that deal with children. - """ - - attributes = None - childNodes = EmptyNodeList() - firstChild = None - lastChild = None - - def _get_firstChild(self): - return None - - def _get_lastChild(self): - return None - - def appendChild(self, node): - raise xml.dom.HierarchyRequestErr( - self.nodeName + " nodes cannot have children") - - def hasChildNodes(self): - return False - - def insertBefore(self, newChild, refChild): - raise xml.dom.HierarchyRequestErr( - self.nodeName + " nodes do not have children") - - def removeChild(self, oldChild): - raise xml.dom.NotFoundErr( - self.nodeName + " nodes do not have children") - - def replaceChild(self, newChild, oldChild): - raise xml.dom.HierarchyRequestErr( - self.nodeName + " nodes do not have children") - - -class ProcessingInstruction(Childless, Node): - nodeType = Node.PROCESSING_INSTRUCTION_NODE - - def __init__(self, target, data): - self.target = self.nodeName = target - self.data = self.nodeValue = data - - def _get_data(self): - return self.data - def _set_data(self, value): - d = self.__dict__ - d['data'] = d['nodeValue'] = value - - def _get_target(self): - return self.target - def _set_target(self, value): - d = self.__dict__ - d['target'] = d['nodeName'] = value - - def __setattr__(self, name, value): - if name == "data" or name == "nodeValue": - self.__dict__['data'] = self.__dict__['nodeValue'] = value - elif name == "target" or name == "nodeName": - self.__dict__['target'] = self.__dict__['nodeName'] = value - else: - self.__dict__[name] = value - - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) - - -class CharacterData(Childless, Node): - def _get_length(self): - return len(self.data) - __len__ = _get_length - - def _get_data(self): - return self.__dict__['data'] - def _set_data(self, data): - d = self.__dict__ - d['data'] = d['nodeValue'] = data - - _get_nodeValue = _get_data - _set_nodeValue = _set_data - - def __setattr__(self, name, value): - if name == "data" or name == "nodeValue": - self.__dict__['data'] = self.__dict__['nodeValue'] = value - else: - self.__dict__[name] = value - - def __repr__(self): - data = self.data - if len(data) > 10: - dotdotdot = "..." - else: - dotdotdot = "" - return "<DOM %s node \"%s%s\">" % ( - self.__class__.__name__, data[0:10], dotdotdot) - - def substringData(self, offset, count): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if count < 0: - raise xml.dom.IndexSizeErr("count cannot be negative") - return self.data[offset:offset+count] - - def appendData(self, arg): - self.data = self.data + arg - - def insertData(self, offset, arg): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if arg: - self.data = "%s%s%s" % ( - self.data[:offset], arg, self.data[offset:]) - - def deleteData(self, offset, count): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if count < 0: - raise xml.dom.IndexSizeErr("count cannot be negative") - if count: - self.data = self.data[:offset] + self.data[offset+count:] - - def replaceData(self, offset, count, arg): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if count < 0: - raise xml.dom.IndexSizeErr("count cannot be negative") - if count: - self.data = "%s%s%s" % ( - self.data[:offset], arg, self.data[offset+count:]) - -defproperty(CharacterData, "length", doc="Length of the string data.") - - -class Text(CharacterData): - # Make sure we don't add an instance __dict__ if we don't already - # have one, at least when that's possible: - # XXX this does not work, CharacterData is an old-style class - # __slots__ = () - - nodeType = Node.TEXT_NODE - nodeName = "#text" - attributes = None - - def splitText(self, offset): - if offset < 0 or offset > len(self.data): - raise xml.dom.IndexSizeErr("illegal offset value") - newText = self.__class__() - newText.data = self.data[offset:] - newText.ownerDocument = self.ownerDocument - next = self.nextSibling - if self.parentNode and self in self.parentNode.childNodes: - if next is None: - self.parentNode.appendChild(newText) - else: - self.parentNode.insertBefore(newText, next) - self.data = self.data[:offset] - return newText - - def writexml(self, writer, indent="", addindent="", newl=""): - _write_data(writer, "%s%s%s"%(indent, self.data, newl)) - - # DOM Level 3 (WD 9 April 2002) - - def _get_wholeText(self): - L = [self.data] - n = self.previousSibling - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - L.insert(0, n.data) - n = n.previousSibling - else: - break - n = self.nextSibling - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - L.append(n.data) - n = n.nextSibling - else: - break - return ''.join(L) - - def replaceWholeText(self, content): - # XXX This needs to be seriously changed if minidom ever - # supports EntityReference nodes. - parent = self.parentNode - n = self.previousSibling - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - next = n.previousSibling - parent.removeChild(n) - n = next - else: - break - n = self.nextSibling - if not content: - parent.removeChild(self) - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - next = n.nextSibling - parent.removeChild(n) - n = next - else: - break - if content: - d = self.__dict__ - d['data'] = content - d['nodeValue'] = content - return self - else: - return None - - def _get_isWhitespaceInElementContent(self): - if self.data.strip(): - return False - elem = _get_containing_element(self) - if elem is None: - return False - info = self.ownerDocument._get_elem_info(elem) - if info is None: - return False - else: - return info.isElementContent() - -defproperty(Text, "isWhitespaceInElementContent", - doc="True iff this text node contains only whitespace" - " and is in element content.") -defproperty(Text, "wholeText", - doc="The text of all logically-adjacent text nodes.") - - -def _get_containing_element(node): - c = node.parentNode - while c is not None: - if c.nodeType == Node.ELEMENT_NODE: - return c - c = c.parentNode - return None - -def _get_containing_entref(node): - c = node.parentNode - while c is not None: - if c.nodeType == Node.ENTITY_REFERENCE_NODE: - return c - c = c.parentNode - return None - - -class Comment(Childless, CharacterData): - nodeType = Node.COMMENT_NODE - nodeName = "#comment" - - def __init__(self, data): - self.data = self.nodeValue = data - - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) - - -class CDATASection(Text): - # Make sure we don't add an instance __dict__ if we don't already - # have one, at least when that's possible: - # XXX this does not work, Text is an old-style class - # __slots__ = () - - nodeType = Node.CDATA_SECTION_NODE - nodeName = "#cdata-section" - - def writexml(self, writer, indent="", addindent="", newl=""): - if self.data.find("]]>") >= 0: - raise ValueError("']]>' not allowed in a CDATA section") - writer.write("<![CDATA[%s]]>" % self.data) - - -class ReadOnlySequentialNamedNodeMap(NewStyle, GetattrMagic): - __slots__ = '_seq', - - def __init__(self, seq=()): - # seq should be a list or tuple - self._seq = seq - - def __len__(self): - return len(self._seq) - - def _get_length(self): - return len(self._seq) - - def getNamedItem(self, name): - for n in self._seq: - if n.nodeName == name: - return n - - def getNamedItemNS(self, namespaceURI, localName): - for n in self._seq: - if n.namespaceURI == namespaceURI and n.localName == localName: - return n - - def __getitem__(self, name_or_tuple): - if isinstance(name_or_tuple, _TupleType): - node = self.getNamedItemNS(*name_or_tuple) - else: - node = self.getNamedItem(name_or_tuple) - if node is None: - raise KeyError, name_or_tuple - return node - - def item(self, index): - if index < 0: - return None - try: - return self._seq[index] - except IndexError: - return None - - def removeNamedItem(self, name): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def removeNamedItemNS(self, namespaceURI, localName): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def setNamedItem(self, node): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def setNamedItemNS(self, node): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def __getstate__(self): - return [self._seq] - - def __setstate__(self, state): - self._seq = state[0] - -defproperty(ReadOnlySequentialNamedNodeMap, "length", - doc="Number of entries in the NamedNodeMap.") - - -class Identified: - """Mix-in class that supports the publicId and systemId attributes.""" - - # XXX this does not work, this is an old-style class - # __slots__ = 'publicId', 'systemId' - - def _identified_mixin_init(self, publicId, systemId): - self.publicId = publicId - self.systemId = systemId - - def _get_publicId(self): - return self.publicId - - def _get_systemId(self): - return self.systemId - -class DocumentType(Identified, Childless, Node): - nodeType = Node.DOCUMENT_TYPE_NODE - nodeValue = None - name = None - publicId = None - systemId = None - internalSubset = None - - def __init__(self, qualifiedName): - self.entities = ReadOnlySequentialNamedNodeMap() - self.notations = ReadOnlySequentialNamedNodeMap() - if qualifiedName: - prefix, localname = _nssplit(qualifiedName) - self.name = localname - self.nodeName = self.name - - def _get_internalSubset(self): - return self.internalSubset - - def cloneNode(self, deep): - if self.ownerDocument is None: - # it's ok - clone = DocumentType(None) - clone.name = self.name - clone.nodeName = self.name - operation = xml.dom.UserDataHandler.NODE_CLONED - if deep: - clone.entities._seq = [] - clone.notations._seq = [] - for n in self.notations._seq: - notation = Notation(n.nodeName, n.publicId, n.systemId) - clone.notations._seq.append(notation) - n._call_user_data_handler(operation, n, notation) - for e in self.entities._seq: - entity = Entity(e.nodeName, e.publicId, e.systemId, - e.notationName) - entity.actualEncoding = e.actualEncoding - entity.encoding = e.encoding - entity.version = e.version - clone.entities._seq.append(entity) - e._call_user_data_handler(operation, n, entity) - self._call_user_data_handler(operation, self, clone) - return clone - else: - return None - - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write("<!DOCTYPE ") - writer.write(self.name) - if self.publicId: - writer.write("%s PUBLIC '%s'%s '%s'" - % (newl, self.publicId, newl, self.systemId)) - elif self.systemId: - writer.write("%s SYSTEM '%s'" % (newl, self.systemId)) - if self.internalSubset is not None: - writer.write(" [") - writer.write(self.internalSubset) - writer.write("]") - writer.write(">"+newl) - -class Entity(Identified, Node): - attributes = None - nodeType = Node.ENTITY_NODE - nodeValue = None - - actualEncoding = None - encoding = None - version = None - - def __init__(self, name, publicId, systemId, notation): - self.nodeName = name - self.notationName = notation - self.childNodes = NodeList() - self._identified_mixin_init(publicId, systemId) - - def _get_actualEncoding(self): - return self.actualEncoding - - def _get_encoding(self): - return self.encoding - - def _get_version(self): - return self.version - - def appendChild(self, newChild): - raise xml.dom.HierarchyRequestErr( - "cannot append children to an entity node") - - def insertBefore(self, newChild, refChild): - raise xml.dom.HierarchyRequestErr( - "cannot insert children below an entity node") - - def removeChild(self, oldChild): - raise xml.dom.HierarchyRequestErr( - "cannot remove children from an entity node") - - def replaceChild(self, newChild, oldChild): - raise xml.dom.HierarchyRequestErr( - "cannot replace children of an entity node") - -class Notation(Identified, Childless, Node): - nodeType = Node.NOTATION_NODE - nodeValue = None - - def __init__(self, name, publicId, systemId): - self.nodeName = name - self._identified_mixin_init(publicId, systemId) - - -class DOMImplementation(DOMImplementationLS): - _features = [("core", "1.0"), - ("core", "2.0"), - ("core", "3.0"), - ("core", None), - ("xml", "1.0"), - ("xml", "2.0"), - ("xml", "3.0"), - ("xml", None), - ("ls-load", "3.0"), - ("ls-load", None), - ] - - def hasFeature(self, feature, version): - if version == "": - version = None - return (feature.lower(), version) in self._features - - def createDocument(self, namespaceURI, qualifiedName, doctype): - if doctype and doctype.parentNode is not None: - raise xml.dom.WrongDocumentErr( - "doctype object owned by another DOM tree") - doc = self._create_document() - - add_root_element = not (namespaceURI is None - and qualifiedName is None - and doctype is None) - - if not qualifiedName and add_root_element: - # The spec is unclear what to raise here; SyntaxErr - # would be the other obvious candidate. Since Xerces raises - # InvalidCharacterErr, and since SyntaxErr is not listed - # for createDocument, that seems to be the better choice. - # XXX: need to check for illegal characters here and in - # createElement. - - # DOM Level III clears this up when talking about the return value - # of this function. If namespaceURI, qName and DocType are - # Null the document is returned without a document element - # Otherwise if doctype or namespaceURI are not None - # Then we go back to the above problem - raise xml.dom.InvalidCharacterErr("Element with no name") - - if add_root_element: - prefix, localname = _nssplit(qualifiedName) - if prefix == "xml" \ - and namespaceURI != "http://www.w3.org/XML/1998/namespace": - raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") - if prefix and not namespaceURI: - raise xml.dom.NamespaceErr( - "illegal use of prefix without namespaces") - element = doc.createElementNS(namespaceURI, qualifiedName) - if doctype: - doc.appendChild(doctype) - doc.appendChild(element) - - if doctype: - doctype.parentNode = doctype.ownerDocument = doc - - doc.doctype = doctype - doc.implementation = self - return doc - - def createDocumentType(self, qualifiedName, publicId, systemId): - doctype = DocumentType(qualifiedName) - doctype.publicId = publicId - doctype.systemId = systemId - return doctype - - # DOM Level 3 (WD 9 April 2002) - - def getInterface(self, feature): - if self.hasFeature(feature, None): - return self - else: - return None - - # internal - def _create_document(self): - return Document() - -class ElementInfo(NewStyle): - """Object that represents content-model information for an element. - - This implementation is not expected to be used in practice; DOM - builders should provide implementations which do the right thing - using information available to it. - - """ - - __slots__ = 'tagName', - - def __init__(self, name): - self.tagName = name - - def getAttributeType(self, aname): - return _no_type - - def getAttributeTypeNS(self, namespaceURI, localName): - return _no_type - - def isElementContent(self): - return False - - def isEmpty(self): - """Returns true iff this element is declared to have an EMPTY - content model.""" - return False - - def isId(self, aname): - """Returns true iff the named attribte is a DTD-style ID.""" - return False - - def isIdNS(self, namespaceURI, localName): - """Returns true iff the identified attribute is a DTD-style ID.""" - return False - - def __getstate__(self): - return self.tagName - - def __setstate__(self, state): - self.tagName = state - -def _clear_id_cache(node): - if node.nodeType == Node.DOCUMENT_NODE: - node._id_cache.clear() - node._id_search_stack = None - elif _in_document(node): - node.ownerDocument._id_cache.clear() - node.ownerDocument._id_search_stack= None - -class Document(Node, DocumentLS): - _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) - - nodeType = Node.DOCUMENT_NODE - nodeName = "#document" - nodeValue = None - attributes = None - doctype = None - parentNode = None - previousSibling = nextSibling = None - - implementation = DOMImplementation() - - # Document attributes from Level 3 (WD 9 April 2002) - - actualEncoding = None - encoding = None - standalone = None - version = None - strictErrorChecking = False - errorHandler = None - documentURI = None - - _magic_id_count = 0 - - def __init__(self): - self.childNodes = NodeList() - # mapping of (namespaceURI, localName) -> ElementInfo - # and tagName -> ElementInfo - self._elem_info = {} - self._id_cache = {} - self._id_search_stack = None - - def _get_elem_info(self, element): - if element.namespaceURI: - key = element.namespaceURI, element.localName - else: - key = element.tagName - return self._elem_info.get(key) - - def _get_actualEncoding(self): - return self.actualEncoding - - def _get_doctype(self): - return self.doctype - - def _get_documentURI(self): - return self.documentURI - - def _get_encoding(self): - return self.encoding - - def _get_errorHandler(self): - return self.errorHandler - - def _get_standalone(self): - return self.standalone - - def _get_strictErrorChecking(self): - return self.strictErrorChecking - - def _get_version(self): - return self.version - - def appendChild(self, node): - if node.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(node), repr(self))) - if node.parentNode is not None: - # This needs to be done before the next test since this - # may *be* the document element, in which case it should - # end up re-ordered to the end. - node.parentNode.removeChild(node) - - if node.nodeType == Node.ELEMENT_NODE \ - and self._get_documentElement(): - raise xml.dom.HierarchyRequestErr( - "two document elements disallowed") - return Node.appendChild(self, node) - - def removeChild(self, oldChild): - try: - self.childNodes.remove(oldChild) - except ValueError: - raise xml.dom.NotFoundErr() - oldChild.nextSibling = oldChild.previousSibling = None - oldChild.parentNode = None - if self.documentElement is oldChild: - self.documentElement = None - - return oldChild - - def _get_documentElement(self): - for node in self.childNodes: - if node.nodeType == Node.ELEMENT_NODE: - return node - - def unlink(self): - if self.doctype is not None: - self.doctype.unlink() - self.doctype = None - Node.unlink(self) - - def cloneNode(self, deep): - if not deep: - return None - clone = self.implementation.createDocument(None, None, None) - clone.encoding = self.encoding - clone.standalone = self.standalone - clone.version = self.version - for n in self.childNodes: - childclone = _clone_node(n, deep, clone) - assert childclone.ownerDocument.isSameNode(clone) - clone.childNodes.append(childclone) - if childclone.nodeType == Node.DOCUMENT_NODE: - assert clone.documentElement is None - elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: - assert clone.doctype is None - clone.doctype = childclone - childclone.parentNode = clone - self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, - self, clone) - return clone - - def createDocumentFragment(self): - d = DocumentFragment() - d.ownerDocument = self - return d - - def createElement(self, tagName): - e = Element(tagName) - e.ownerDocument = self - return e - - def createTextNode(self, data): - if not isinstance(data, StringTypes): - raise TypeError, "node contents must be a string" - t = Text() - t.data = data - t.ownerDocument = self - return t - - def createCDATASection(self, data): - if not isinstance(data, StringTypes): - raise TypeError, "node contents must be a string" - c = CDATASection() - c.data = data - c.ownerDocument = self - return c - - def createComment(self, data): - c = Comment(data) - c.ownerDocument = self - return c - - def createProcessingInstruction(self, target, data): - p = ProcessingInstruction(target, data) - p.ownerDocument = self - return p - - def createAttribute(self, qName): - a = Attr(qName) - a.ownerDocument = self - a.value = "" - return a - - def createElementNS(self, namespaceURI, qualifiedName): - prefix, localName = _nssplit(qualifiedName) - e = Element(qualifiedName, namespaceURI, prefix) - e.ownerDocument = self - return e - - def createAttributeNS(self, namespaceURI, qualifiedName): - prefix, localName = _nssplit(qualifiedName) - a = Attr(qualifiedName, namespaceURI, localName, prefix) - a.ownerDocument = self - a.value = "" - return a - - # A couple of implementation-specific helpers to create node types - # not supported by the W3C DOM specs: - - def _create_entity(self, name, publicId, systemId, notationName): - e = Entity(name, publicId, systemId, notationName) - e.ownerDocument = self - return e - - def _create_notation(self, name, publicId, systemId): - n = Notation(name, publicId, systemId) - n.ownerDocument = self - return n - - def getElementById(self, id): - if self._id_cache.has_key(id): - return self._id_cache[id] - if not (self._elem_info or self._magic_id_count): - return None - - stack = self._id_search_stack - if stack is None: - # we never searched before, or the cache has been cleared - stack = [self.documentElement] - self._id_search_stack = stack - elif not stack: - # Previous search was completed and cache is still valid; - # no matching node. - return None - - result = None - while stack: - node = stack.pop() - # add child elements to stack for continued searching - stack.extend([child for child in node.childNodes - if child.nodeType in _nodeTypes_with_children]) - # check this node - info = self._get_elem_info(node) - if info: - # We have to process all ID attributes before - # returning in order to get all the attributes set to - # be IDs using Element.setIdAttribute*(). - for attr in node.attributes.values(): - if attr.namespaceURI: - if info.isIdNS(attr.namespaceURI, attr.localName): - self._id_cache[attr.value] = node - if attr.value == id: - result = node - elif not node._magic_id_nodes: - break - elif info.isId(attr.name): - self._id_cache[attr.value] = node - if attr.value == id: - result = node - elif not node._magic_id_nodes: - break - elif attr._is_id: - self._id_cache[attr.value] = node - if attr.value == id: - result = node - elif node._magic_id_nodes == 1: - break - elif node._magic_id_nodes: - for attr in node.attributes.values(): - if attr._is_id: - self._id_cache[attr.value] = node - if attr.value == id: - result = node - if result is not None: - break - return result - - def getElementsByTagName(self, name): - return _get_elements_by_tagName_helper(self, name, NodeList()) - - def getElementsByTagNameNS(self, namespaceURI, localName): - return _get_elements_by_tagName_ns_helper( - self, namespaceURI, localName, NodeList()) - - def isSupported(self, feature, version): - return self.implementation.hasFeature(feature, version) - - def importNode(self, node, deep): - if node.nodeType == Node.DOCUMENT_NODE: - raise xml.dom.NotSupportedErr("cannot import document nodes") - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - raise xml.dom.NotSupportedErr("cannot import document type nodes") - return _clone_node(node, deep, self) - - def writexml(self, writer, indent="", addindent="", newl="", - encoding = None): - if encoding is None: - writer.write('<?xml version="1.0" ?>'+newl) - else: - writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl)) - for node in self.childNodes: - node.writexml(writer, indent, addindent, newl) - - # DOM Level 3 (WD 9 April 2002) - - def renameNode(self, n, namespaceURI, name): - if n.ownerDocument is not self: - raise xml.dom.WrongDocumentErr( - "cannot rename nodes from other documents;\n" - "expected %s,\nfound %s" % (self, n.ownerDocument)) - if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): - raise xml.dom.NotSupportedErr( - "renameNode() only applies to element and attribute nodes") - if namespaceURI != EMPTY_NAMESPACE: - if ':' in name: - prefix, localName = name.split(':', 1) - if ( prefix == "xmlns" - and namespaceURI != xml.dom.XMLNS_NAMESPACE): - raise xml.dom.NamespaceErr( - "illegal use of 'xmlns' prefix") - else: - if ( name == "xmlns" - and namespaceURI != xml.dom.XMLNS_NAMESPACE - and n.nodeType == Node.ATTRIBUTE_NODE): - raise xml.dom.NamespaceErr( - "illegal use of the 'xmlns' attribute") - prefix = None - localName = name - else: - prefix = None - localName = None - if n.nodeType == Node.ATTRIBUTE_NODE: - element = n.ownerElement - if element is not None: - is_id = n._is_id - element.removeAttributeNode(n) - else: - element = None - # avoid __setattr__ - d = n.__dict__ - d['prefix'] = prefix - d['localName'] = localName - d['namespaceURI'] = namespaceURI - d['nodeName'] = name - if n.nodeType == Node.ELEMENT_NODE: - d['tagName'] = name - else: - # attribute node - d['name'] = name - if element is not None: - element.setAttributeNode(n) - if is_id: - element.setIdAttributeNode(n) - # It's not clear from a semantic perspective whether we should - # call the user data handlers for the NODE_RENAMED event since - # we're re-using the existing node. The draft spec has been - # interpreted as meaning "no, don't call the handler unless a - # new node is created." - return n - -defproperty(Document, "documentElement", - doc="Top-level element of this document.") - - -def _clone_node(node, deep, newOwnerDocument): - """ - Clone a node and give it the new owner document. - Called by Node.cloneNode and Document.importNode - """ - if node.ownerDocument.isSameNode(newOwnerDocument): - operation = xml.dom.UserDataHandler.NODE_CLONED - else: - operation = xml.dom.UserDataHandler.NODE_IMPORTED - if node.nodeType == Node.ELEMENT_NODE: - clone = newOwnerDocument.createElementNS(node.namespaceURI, - node.nodeName) - for attr in node.attributes.values(): - clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) - a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) - a.specified = attr.specified - - if deep: - for child in node.childNodes: - c = _clone_node(child, deep, newOwnerDocument) - clone.appendChild(c) - - elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: - clone = newOwnerDocument.createDocumentFragment() - if deep: - for child in node.childNodes: - c = _clone_node(child, deep, newOwnerDocument) - clone.appendChild(c) - - elif node.nodeType == Node.TEXT_NODE: - clone = newOwnerDocument.createTextNode(node.data) - elif node.nodeType == Node.CDATA_SECTION_NODE: - clone = newOwnerDocument.createCDATASection(node.data) - elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: - clone = newOwnerDocument.createProcessingInstruction(node.target, - node.data) - elif node.nodeType == Node.COMMENT_NODE: - clone = newOwnerDocument.createComment(node.data) - elif node.nodeType == Node.ATTRIBUTE_NODE: - clone = newOwnerDocument.createAttributeNS(node.namespaceURI, - node.nodeName) - clone.specified = True - clone.value = node.value - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - assert node.ownerDocument is not newOwnerDocument - operation = xml.dom.UserDataHandler.NODE_IMPORTED - clone = newOwnerDocument.implementation.createDocumentType( - node.name, node.publicId, node.systemId) - clone.ownerDocument = newOwnerDocument - if deep: - clone.entities._seq = [] - clone.notations._seq = [] - for n in node.notations._seq: - notation = Notation(n.nodeName, n.publicId, n.systemId) - notation.ownerDocument = newOwnerDocument - clone.notations._seq.append(notation) - if hasattr(n, '_call_user_data_handler'): - n._call_user_data_handler(operation, n, notation) - for e in node.entities._seq: - entity = Entity(e.nodeName, e.publicId, e.systemId, - e.notationName) - entity.actualEncoding = e.actualEncoding - entity.encoding = e.encoding - entity.version = e.version - entity.ownerDocument = newOwnerDocument - clone.entities._seq.append(entity) - if hasattr(e, '_call_user_data_handler'): - e._call_user_data_handler(operation, n, entity) - else: - # Note the cloning of Document and DocumentType nodes is - # implemenetation specific. minidom handles those cases - # directly in the cloneNode() methods. - raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) - - # Check for _call_user_data_handler() since this could conceivably - # used with other DOM implementations (one of the FourThought - # DOMs, perhaps?). - if hasattr(node, '_call_user_data_handler'): - node._call_user_data_handler(operation, node, clone) - return clone - - -def _nssplit(qualifiedName): - fields = qualifiedName.split(':', 1) - if len(fields) == 2: - return fields - else: - return (None, fields[0]) - - -def _get_StringIO(): - # we can't use cStringIO since it doesn't support Unicode strings - from StringIO import StringIO - return StringIO() - -def _do_pulldom_parse(func, args, kwargs): - events = func(*args, **kwargs) - toktype, rootNode = events.getEvent() - events.expandNode(rootNode) - events.clear() - return rootNode - -def parse(file, parser=None, bufsize=None): - """Parse a file into a DOM by filename or file object.""" - if parser is None and not bufsize: - from xml.dom import expatbuilder - return expatbuilder.parse(file) - else: - from xml.dom import pulldom - return _do_pulldom_parse(pulldom.parse, (file,), - {'parser': parser, 'bufsize': bufsize}) - -def parseString(string, parser=None): - """Parse a file into a DOM from a string.""" - if parser is None: - from xml.dom import expatbuilder - return expatbuilder.parseString(string) - else: - from xml.dom import pulldom - return _do_pulldom_parse(pulldom.parseString, (string,), - {'parser': parser}) - -def getDOMImplementation(features=None): - if features: - if isinstance(features, StringTypes): - features = domreg._parse_feature_string(features) - for f, v in features: - if not Document.implementation.hasFeature(f, v): - return None - return Document.implementation diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py deleted file mode 100644 index 18f49b5..0000000 --- a/Lib/xml/dom/pulldom.py +++ /dev/null @@ -1,351 +0,0 @@ -import xml.sax -import xml.sax.handler -import types - -try: - _StringTypes = [types.StringType, types.UnicodeType] -except AttributeError: - _StringTypes = [types.StringType] - -START_ELEMENT = "START_ELEMENT" -END_ELEMENT = "END_ELEMENT" -COMMENT = "COMMENT" -START_DOCUMENT = "START_DOCUMENT" -END_DOCUMENT = "END_DOCUMENT" -PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION" -IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" -CHARACTERS = "CHARACTERS" - -class PullDOM(xml.sax.ContentHandler): - _locator = None - document = None - - def __init__(self, documentFactory=None): - from xml.dom import XML_NAMESPACE - self.documentFactory = documentFactory - self.firstEvent = [None, None] - self.lastEvent = self.firstEvent - self.elementStack = [] - self.push = self.elementStack.append - try: - self.pop = self.elementStack.pop - except AttributeError: - # use class' pop instead - pass - self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - self.pending_events = [] - - def pop(self): - result = self.elementStack[-1] - del self.elementStack[-1] - return result - - def setDocumentLocator(self, locator): - self._locator = locator - - def startPrefixMapping(self, prefix, uri): - if not hasattr(self, '_xmlns_attrs'): - self._xmlns_attrs = [] - self._xmlns_attrs.append((prefix or 'xmlns', uri)) - self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix or None - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts.pop() - - def startElementNS(self, name, tagName , attrs): - # Retrieve xml namespace declaration attributes. - xmlns_uri = 'http://www.w3.org/2000/xmlns/' - xmlns_attrs = getattr(self, '_xmlns_attrs', None) - if xmlns_attrs is not None: - for aname, value in xmlns_attrs: - attrs._attrs[(xmlns_uri, aname)] = value - self._xmlns_attrs = [] - uri, localname = name - if uri: - # When using namespaces, the reader may or may not - # provide us with the original name. If not, create - # *a* valid tagName from the current context. - if tagName is None: - prefix = self._current_context[uri] - if prefix: - tagName = prefix + ":" + localname - else: - tagName = localname - if self.document: - node = self.document.createElementNS(uri, tagName) - else: - node = self.buildDocument(uri, tagName) - else: - # When the tagname is not prefixed, it just appears as - # localname - if self.document: - node = self.document.createElement(localname) - else: - node = self.buildDocument(None, localname) - - for aname,value in attrs.items(): - a_uri, a_localname = aname - if a_uri == xmlns_uri: - if a_localname == 'xmlns': - qname = a_localname - else: - qname = 'xmlns:' + a_localname - attr = self.document.createAttributeNS(a_uri, qname) - node.setAttributeNodeNS(attr) - elif a_uri: - prefix = self._current_context[a_uri] - if prefix: - qname = prefix + ":" + a_localname - else: - qname = a_localname - attr = self.document.createAttributeNS(a_uri, qname) - node.setAttributeNodeNS(attr) - else: - attr = self.document.createAttribute(a_localname) - node.setAttributeNode(attr) - attr.value = value - - self.lastEvent[1] = [(START_ELEMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - - def endElementNS(self, name, tagName): - self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] - self.lastEvent = self.lastEvent[1] - - def startElement(self, name, attrs): - if self.document: - node = self.document.createElement(name) - else: - node = self.buildDocument(None, name) - - for aname,value in attrs.items(): - attr = self.document.createAttribute(aname) - attr.value = value - node.setAttributeNode(attr) - - self.lastEvent[1] = [(START_ELEMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - - def endElement(self, name): - self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] - self.lastEvent = self.lastEvent[1] - - def comment(self, s): - if self.document: - node = self.document.createComment(s) - self.lastEvent[1] = [(COMMENT, node), None] - self.lastEvent = self.lastEvent[1] - else: - event = [(COMMENT, s), None] - self.pending_events.append(event) - - def processingInstruction(self, target, data): - if self.document: - node = self.document.createProcessingInstruction(target, data) - self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None] - self.lastEvent = self.lastEvent[1] - else: - event = [(PROCESSING_INSTRUCTION, target, data), None] - self.pending_events.append(event) - - def ignorableWhitespace(self, chars): - node = self.document.createTextNode(chars) - self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] - self.lastEvent = self.lastEvent[1] - - def characters(self, chars): - node = self.document.createTextNode(chars) - self.lastEvent[1] = [(CHARACTERS, node), None] - self.lastEvent = self.lastEvent[1] - - def startDocument(self): - if self.documentFactory is None: - import xml.dom.minidom - self.documentFactory = xml.dom.minidom.Document.implementation - - def buildDocument(self, uri, tagname): - # Can't do that in startDocument, since we need the tagname - # XXX: obtain DocumentType - node = self.documentFactory.createDocument(uri, tagname, None) - self.document = node - self.lastEvent[1] = [(START_DOCUMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - # Put everything we have seen so far into the document - for e in self.pending_events: - if e[0][0] == PROCESSING_INSTRUCTION: - _,target,data = e[0] - n = self.document.createProcessingInstruction(target, data) - e[0] = (PROCESSING_INSTRUCTION, n) - elif e[0][0] == COMMENT: - n = self.document.createComment(e[0][1]) - e[0] = (COMMENT, n) - else: - raise AssertionError("Unknown pending event ",e[0][0]) - self.lastEvent[1] = e - self.lastEvent = e - self.pending_events = None - return node.firstChild - - def endDocument(self): - self.lastEvent[1] = [(END_DOCUMENT, self.document), None] - self.pop() - - def clear(self): - "clear(): Explicitly release parsing structures" - self.document = None - -class ErrorHandler: - def warning(self, exception): - print exception - def error(self, exception): - raise exception - def fatalError(self, exception): - raise exception - -class DOMEventStream: - def __init__(self, stream, parser, bufsize): - self.stream = stream - self.parser = parser - self.bufsize = bufsize - if not hasattr(self.parser, 'feed'): - self.getEvent = self._slurp - self.reset() - - def reset(self): - self.pulldom = PullDOM() - # This content handler relies on namespace support - self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) - self.parser.setContentHandler(self.pulldom) - - def __getitem__(self, pos): - rc = self.getEvent() - if rc: - return rc - raise IndexError - - def next(self): - rc = self.getEvent() - if rc: - return rc - raise StopIteration - - def __iter__(self): - return self - - def expandNode(self, node): - event = self.getEvent() - parents = [node] - while event: - token, cur_node = event - if cur_node is node: - return - if token != END_ELEMENT: - parents[-1].appendChild(cur_node) - if token == START_ELEMENT: - parents.append(cur_node) - elif token == END_ELEMENT: - del parents[-1] - event = self.getEvent() - - def getEvent(self): - # use IncrementalParser interface, so we get the desired - # pull effect - if not self.pulldom.firstEvent[1]: - self.pulldom.lastEvent = self.pulldom.firstEvent - while not self.pulldom.firstEvent[1]: - buf = self.stream.read(self.bufsize) - if not buf: - self.parser.close() - return None - self.parser.feed(buf) - rc = self.pulldom.firstEvent[1][0] - self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] - return rc - - def _slurp(self): - """ Fallback replacement for getEvent() using the - standard SAX2 interface, which means we slurp the - SAX events into memory (no performance gain, but - we are compatible to all SAX parsers). - """ - self.parser.parse(self.stream) - self.getEvent = self._emit - return self._emit() - - def _emit(self): - """ Fallback replacement for getEvent() that emits - the events that _slurp() read previously. - """ - rc = self.pulldom.firstEvent[1][0] - self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] - return rc - - def clear(self): - """clear(): Explicitly release parsing objects""" - self.pulldom.clear() - del self.pulldom - self.parser = None - self.stream = None - -class SAX2DOM(PullDOM): - - def startElementNS(self, name, tagName , attrs): - PullDOM.startElementNS(self, name, tagName, attrs) - curNode = self.elementStack[-1] - parentNode = self.elementStack[-2] - parentNode.appendChild(curNode) - - def startElement(self, name, attrs): - PullDOM.startElement(self, name, attrs) - curNode = self.elementStack[-1] - parentNode = self.elementStack[-2] - parentNode.appendChild(curNode) - - def processingInstruction(self, target, data): - PullDOM.processingInstruction(self, target, data) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - def ignorableWhitespace(self, chars): - PullDOM.ignorableWhitespace(self, chars) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - def characters(self, chars): - PullDOM.characters(self, chars) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - -default_bufsize = (2 ** 14) - 20 - -def parse(stream_or_string, parser=None, bufsize=None): - if bufsize is None: - bufsize = default_bufsize - if type(stream_or_string) in _StringTypes: - stream = open(stream_or_string) - else: - stream = stream_or_string - if not parser: - parser = xml.sax.make_parser() - return DOMEventStream(stream, parser, bufsize) - -def parseString(string, parser=None): - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - - bufsize = len(string) - buf = StringIO(string) - if not parser: - parser = xml.sax.make_parser() - return DOMEventStream(buf, parser, bufsize) diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py deleted file mode 100644 index 5c04412..0000000 --- a/Lib/xml/dom/xmlbuilder.py +++ /dev/null @@ -1,388 +0,0 @@ -"""Implementation of the DOM Level 3 'LS-Load' feature.""" - -import copy -import xml.dom - -from xml.dom.minicompat import * - -from xml.dom.NodeFilter import NodeFilter - - -__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] - - -class Options: - """Features object that has variables set for each DOMBuilder feature. - - The DOMBuilder class uses an instance of this class to pass settings to - the ExpatBuilder class. - """ - - # Note that the DOMBuilder class in LoadSave constrains which of these - # values can be set using the DOM Level 3 LoadSave feature. - - namespaces = 1 - namespace_declarations = True - validation = False - external_parameter_entities = True - external_general_entities = True - external_dtd_subset = True - validate_if_schema = False - validate = False - datatype_normalization = False - create_entity_ref_nodes = True - entities = True - whitespace_in_element_content = True - cdata_sections = True - comments = True - charset_overrides_xml_encoding = True - infoset = False - supported_mediatypes_only = False - - errorHandler = None - filter = None - - -class DOMBuilder: - entityResolver = None - errorHandler = None - filter = None - - ACTION_REPLACE = 1 - ACTION_APPEND_AS_CHILDREN = 2 - ACTION_INSERT_AFTER = 3 - ACTION_INSERT_BEFORE = 4 - - _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, - ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) - - def __init__(self): - self._options = Options() - - def _get_entityResolver(self): - return self.entityResolver - def _set_entityResolver(self, entityResolver): - self.entityResolver = entityResolver - - def _get_errorHandler(self): - return self.errorHandler - def _set_errorHandler(self, errorHandler): - self.errorHandler = errorHandler - - def _get_filter(self): - return self.filter - def _set_filter(self, filter): - self.filter = filter - - def setFeature(self, name, state): - if self.supportsFeature(name): - state = state and 1 or 0 - try: - settings = self._settings[(_name_xform(name), state)] - except KeyError: - raise xml.dom.NotSupportedErr( - "unsupported feature: %r" % (name,)) - else: - for name, value in settings: - setattr(self._options, name, value) - else: - raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) - - def supportsFeature(self, name): - return hasattr(self._options, _name_xform(name)) - - def canSetFeature(self, name, state): - key = (_name_xform(name), state and 1 or 0) - return self._settings.has_key(key) - - # This dictionary maps from (feature,value) to a list of - # (option,value) pairs that should be set on the Options object. - # If a (feature,value) setting is not in this dictionary, it is - # not supported by the DOMBuilder. - # - _settings = { - ("namespace_declarations", 0): [ - ("namespace_declarations", 0)], - ("namespace_declarations", 1): [ - ("namespace_declarations", 1)], - ("validation", 0): [ - ("validation", 0)], - ("external_general_entities", 0): [ - ("external_general_entities", 0)], - ("external_general_entities", 1): [ - ("external_general_entities", 1)], - ("external_parameter_entities", 0): [ - ("external_parameter_entities", 0)], - ("external_parameter_entities", 1): [ - ("external_parameter_entities", 1)], - ("validate_if_schema", 0): [ - ("validate_if_schema", 0)], - ("create_entity_ref_nodes", 0): [ - ("create_entity_ref_nodes", 0)], - ("create_entity_ref_nodes", 1): [ - ("create_entity_ref_nodes", 1)], - ("entities", 0): [ - ("create_entity_ref_nodes", 0), - ("entities", 0)], - ("entities", 1): [ - ("entities", 1)], - ("whitespace_in_element_content", 0): [ - ("whitespace_in_element_content", 0)], - ("whitespace_in_element_content", 1): [ - ("whitespace_in_element_content", 1)], - ("cdata_sections", 0): [ - ("cdata_sections", 0)], - ("cdata_sections", 1): [ - ("cdata_sections", 1)], - ("comments", 0): [ - ("comments", 0)], - ("comments", 1): [ - ("comments", 1)], - ("charset_overrides_xml_encoding", 0): [ - ("charset_overrides_xml_encoding", 0)], - ("charset_overrides_xml_encoding", 1): [ - ("charset_overrides_xml_encoding", 1)], - ("infoset", 0): [], - ("infoset", 1): [ - ("namespace_declarations", 0), - ("validate_if_schema", 0), - ("create_entity_ref_nodes", 0), - ("entities", 0), - ("cdata_sections", 0), - ("datatype_normalization", 1), - ("whitespace_in_element_content", 1), - ("comments", 1), - ("charset_overrides_xml_encoding", 1)], - ("supported_mediatypes_only", 0): [ - ("supported_mediatypes_only", 0)], - ("namespaces", 0): [ - ("namespaces", 0)], - ("namespaces", 1): [ - ("namespaces", 1)], - } - - def getFeature(self, name): - xname = _name_xform(name) - try: - return getattr(self._options, xname) - except AttributeError: - if name == "infoset": - options = self._options - return (options.datatype_normalization - and options.whitespace_in_element_content - and options.comments - and options.charset_overrides_xml_encoding - and not (options.namespace_declarations - or options.validate_if_schema - or options.create_entity_ref_nodes - or options.entities - or options.cdata_sections)) - raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) - - def parseURI(self, uri): - if self.entityResolver: - input = self.entityResolver.resolveEntity(None, uri) - else: - input = DOMEntityResolver().resolveEntity(None, uri) - return self.parse(input) - - def parse(self, input): - options = copy.copy(self._options) - options.filter = self.filter - options.errorHandler = self.errorHandler - fp = input.byteStream - if fp is None and options.systemId: - import urllib2 - fp = urllib2.urlopen(input.systemId) - return self._parse_bytestream(fp, options) - - def parseWithContext(self, input, cnode, action): - if action not in self._legal_actions: - raise ValueError("not a legal action") - raise NotImplementedError("Haven't written this yet...") - - def _parse_bytestream(self, stream, options): - import xml.dom.expatbuilder - builder = xml.dom.expatbuilder.makeBuilder(options) - return builder.parseFile(stream) - - -def _name_xform(name): - return name.lower().replace('-', '_') - - -class DOMEntityResolver(NewStyle): - __slots__ = '_opener', - - def resolveEntity(self, publicId, systemId): - assert systemId is not None - source = DOMInputSource() - source.publicId = publicId - source.systemId = systemId - source.byteStream = self._get_opener().open(systemId) - - # determine the encoding if the transport provided it - source.encoding = self._guess_media_encoding(source) - - # determine the base URI is we can - import posixpath, urlparse - parts = urlparse.urlparse(systemId) - scheme, netloc, path, params, query, fragment = parts - # XXX should we check the scheme here as well? - if path and not path.endswith("/"): - path = posixpath.dirname(path) + "/" - parts = scheme, netloc, path, params, query, fragment - source.baseURI = urlparse.urlunparse(parts) - - return source - - def _get_opener(self): - try: - return self._opener - except AttributeError: - self._opener = self._create_opener() - return self._opener - - def _create_opener(self): - import urllib2 - return urllib2.build_opener() - - def _guess_media_encoding(self, source): - info = source.byteStream.info() - if info.has_key("Content-Type"): - for param in info.getplist(): - if param.startswith("charset="): - return param.split("=", 1)[1].lower() - - -class DOMInputSource(NewStyle): - __slots__ = ('byteStream', 'characterStream', 'stringData', - 'encoding', 'publicId', 'systemId', 'baseURI') - - def __init__(self): - self.byteStream = None - self.characterStream = None - self.stringData = None - self.encoding = None - self.publicId = None - self.systemId = None - self.baseURI = None - - def _get_byteStream(self): - return self.byteStream - def _set_byteStream(self, byteStream): - self.byteStream = byteStream - - def _get_characterStream(self): - return self.characterStream - def _set_characterStream(self, characterStream): - self.characterStream = characterStream - - def _get_stringData(self): - return self.stringData - def _set_stringData(self, data): - self.stringData = data - - def _get_encoding(self): - return self.encoding - def _set_encoding(self, encoding): - self.encoding = encoding - - def _get_publicId(self): - return self.publicId - def _set_publicId(self, publicId): - self.publicId = publicId - - def _get_systemId(self): - return self.systemId - def _set_systemId(self, systemId): - self.systemId = systemId - - def _get_baseURI(self): - return self.baseURI - def _set_baseURI(self, uri): - self.baseURI = uri - - -class DOMBuilderFilter: - """Element filter which can be used to tailor construction of - a DOM instance. - """ - - # There's really no need for this class; concrete implementations - # should just implement the endElement() and startElement() - # methods as appropriate. Using this makes it easy to only - # implement one of them. - - FILTER_ACCEPT = 1 - FILTER_REJECT = 2 - FILTER_SKIP = 3 - FILTER_INTERRUPT = 4 - - whatToShow = NodeFilter.SHOW_ALL - - def _get_whatToShow(self): - return self.whatToShow - - def acceptNode(self, element): - return self.FILTER_ACCEPT - - def startContainer(self, element): - return self.FILTER_ACCEPT - -del NodeFilter - - -class DocumentLS: - """Mixin to create documents that conform to the load/save spec.""" - - async = False - - def _get_async(self): - return False - def _set_async(self, async): - if async: - raise xml.dom.NotSupportedErr( - "asynchronous document loading is not supported") - - def abort(self): - # What does it mean to "clear" a document? Does the - # documentElement disappear? - raise NotImplementedError( - "haven't figured out what this means yet") - - def load(self, uri): - raise NotImplementedError("haven't written this yet") - - def loadXML(self, source): - raise NotImplementedError("haven't written this yet") - - def saveXML(self, snode): - if snode is None: - snode = self - elif snode.ownerDocument is not self: - raise xml.dom.WrongDocumentErr() - return snode.toxml() - - -class DOMImplementationLS: - MODE_SYNCHRONOUS = 1 - MODE_ASYNCHRONOUS = 2 - - def createDOMBuilder(self, mode, schemaType): - if schemaType is not None: - raise xml.dom.NotSupportedErr( - "schemaType not yet supported") - if mode == self.MODE_SYNCHRONOUS: - return DOMBuilder() - if mode == self.MODE_ASYNCHRONOUS: - raise xml.dom.NotSupportedErr( - "asynchronous builders are not supported") - raise ValueError("unknown value for mode") - - def createDOMWriter(self): - raise NotImplementedError( - "the writer interface hasn't been written yet!") - - def createDOMInputSource(self): - return DOMInputSource() |