diff options
Diffstat (limited to 'Lib/xml')
-rw-r--r-- | Lib/xml/dom/__init__.py | 1 | ||||
-rw-r--r-- | Lib/xml/dom/domreg.py | 2 | ||||
-rw-r--r-- | Lib/xml/dom/expatbuilder.py | 42 | ||||
-rw-r--r-- | Lib/xml/dom/minidom.py | 293 | ||||
-rw-r--r-- | Lib/xml/dom/pulldom.py | 6 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 380 | ||||
-rw-r--r-- | Lib/xml/etree/cElementTree.py | 4 | ||||
-rw-r--r-- | Lib/xml/parsers/expat.py | 2 |
8 files changed, 413 insertions, 317 deletions
diff --git a/Lib/xml/dom/__init__.py b/Lib/xml/dom/__init__.py index 4401bdf..97cf9a6 100644 --- a/Lib/xml/dom/__init__.py +++ b/Lib/xml/dom/__init__.py @@ -17,6 +17,7 @@ pulldom -- DOM builder supporting on-demand tree-building for selected class Node: """Class giving the NodeType constants.""" + __slots__ = () # DOM implementations may use this as a base class for their own # Node implementations. If they don't, the constants defined here diff --git a/Lib/xml/dom/domreg.py b/Lib/xml/dom/domreg.py index cb35bb0..8c3d901 100644 --- a/Lib/xml/dom/domreg.py +++ b/Lib/xml/dom/domreg.py @@ -2,8 +2,6 @@ directly. Instead, the functions getDOMImplementation and registerDOMImplementation should be imported from xml.dom.""" -from xml.dom.minicompat import * # isinstance, StringTypes - # This is a list of well-known implementations. Well-known names # should be published by posting to xml-sig@python.org, and are # subsequently recorded in this file. diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py index a98fe03..f074ab9 100644 --- a/Lib/xml/dom/expatbuilder.py +++ b/Lib/xml/dom/expatbuilder.py @@ -33,8 +33,6 @@ from xml.parsers import expat from xml.dom.minidom import _append_child, _set_attribute_node from xml.dom.NodeFilter import NodeFilter -from xml.dom.minicompat import * - TEXT_NODE = Node.TEXT_NODE CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE DOCUMENT_NODE = Node.DOCUMENT_NODE @@ -283,27 +281,23 @@ class ExpatBuilder: elif childNodes and childNodes[-1].nodeType == TEXT_NODE: node = childNodes[-1] value = node.data + data - d = node.__dict__ - d['data'] = d['nodeValue'] = value + node.data = value return else: node = minidom.Text() - d = node.__dict__ - d['data'] = d['nodeValue'] = data - d['ownerDocument'] = self.document + node.data = data + node.ownerDocument = self.document _append_child(self.curNode, node) def character_data_handler(self, data): childNodes = self.curNode.childNodes if childNodes and childNodes[-1].nodeType == TEXT_NODE: node = childNodes[-1] - d = node.__dict__ - d['data'] = d['nodeValue'] = node.data + data + node.data = node.data + data return node = minidom.Text() - d = node.__dict__ - d['data'] = d['nodeValue'] = node.data + data - d['ownerDocument'] = self.document + node.data = node.data + data + node.ownerDocument = self.document _append_child(self.curNode, node) def entity_decl_handler(self, entityName, is_parameter_entity, value, @@ -363,11 +357,8 @@ class ExpatBuilder: a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, None, EMPTY_PREFIX) value = attributes[i+1] - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = value - d = a.__dict__ - d['value'] = d['nodeValue'] = value - d['ownerDocument'] = self.document + a.value = value + a.ownerDocument = self.document _set_attribute_node(node, a) if node is not self.document.documentElement: @@ -761,15 +752,13 @@ class Namespaces: else: a = minidom.Attr("xmlns", XMLNS_NAMESPACE, "xmlns", EMPTY_PREFIX) - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = uri - d = a.__dict__ - d['value'] = d['nodeValue'] = uri - d['ownerDocument'] = self.document + a.value = uri + a.ownerDocument = self.document _set_attribute_node(node, a) del self._ns_ordered_prefixes[:] if attributes: + node._ensure_attributes() _attrs = node._attrs _attrsNS = node._attrsNS for i in range(0, len(attributes), 2): @@ -785,12 +774,9 @@ class Namespaces: aname, EMPTY_PREFIX) _attrs[aname] = a _attrsNS[(EMPTY_NAMESPACE, aname)] = a - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = value - d = a.__dict__ - d['ownerDocument'] = self.document - d['value'] = d['nodeValue'] = value - d['ownerElement'] = node + a.ownerDocument = self.document + a.value = value + a.ownerElement = node if __debug__: # This only adds some asserts to the original diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index f23ad05..28e5030 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -14,7 +14,6 @@ Todo: * SAX 2 namespaces """ -import codecs import io import xml.dom @@ -47,25 +46,25 @@ class Node(xml.dom.Node): return self.toprettyxml("", "", encoding) def toprettyxml(self, indent="\t", newl="\n", encoding=None): - # indent = the indentation string to prepend, per level - # newl = the newline string to append - use_encoding = "utf-8" if encoding is None else encoding - writer = codecs.getwriter(use_encoding)(io.BytesIO()) + if encoding is None: + writer = io.StringIO() + else: + writer = io.TextIOWrapper(io.BytesIO(), + encoding=encoding, + errors="xmlcharrefreplace", + newline='\n') if self.nodeType == Node.DOCUMENT_NODE: # Can pass encoding only to document, to put it into XML header self.writexml(writer, "", indent, newl, encoding) else: self.writexml(writer, "", indent, newl) if encoding is None: - return writer.stream.getvalue().decode(use_encoding) + return writer.getvalue() else: - return writer.stream.getvalue() + return writer.detach().getvalue() def hasChildNodes(self): - if self.childNodes: - return True - else: - return False + return bool(self.childNodes) def _get_childNodes(self): return self.childNodes @@ -286,10 +285,10 @@ def _append_child(self, node): childNodes = self.childNodes if childNodes: last = childNodes[-1] - node.__dict__["previousSibling"] = last - last.__dict__["nextSibling"] = node + node.previousSibling = last + last.nextSibling = node childNodes.append(node) - node.__dict__["parentNode"] = self + node.parentNode = self def _in_document(node): # return True iff node is part of a document tree @@ -342,9 +341,10 @@ class DocumentFragment(Node): class Attr(Node): + __slots__=('_name', '_value', 'namespaceURI', + '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement') nodeType = Node.ATTRIBUTE_NODE attributes = None - ownerElement = None specified = False _is_id = False @@ -352,12 +352,11 @@ class Attr(Node): def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, prefix=None): - # skip setattr for performance - d = self.__dict__ - d["nodeName"] = d["name"] = qName - d["namespaceURI"] = namespaceURI - d["prefix"] = prefix - d['childNodes'] = NodeList() + self.ownerElement = None + self._name = qName + self.namespaceURI = namespaceURI + self._prefix = prefix + self.childNodes = NodeList() # Add the single child node that represents the value of the attr self.childNodes.append(Text()) @@ -365,9 +364,10 @@ class Attr(Node): # nodeValue and value are set elsewhere def _get_localName(self): - if 'localName' in self.__dict__: - return self.__dict__['localName'] - return self.nodeName.split(":", 1)[-1] + try: + return self._localName + except AttributeError: + return self.nodeName.split(":", 1)[-1] def _get_name(self): return self.name @@ -375,20 +375,30 @@ class Attr(Node): def _get_specified(self): return self.specified - def __setattr__(self, name, value): - d = self.__dict__ - if name in ("value", "nodeValue"): - d["value"] = d["nodeValue"] = value - d2 = self.childNodes[0].__dict__ - d2["data"] = d2["nodeValue"] = value - if self.ownerElement is not None: - _clear_id_cache(self.ownerElement) - elif name in ("name", "nodeName"): - d["name"] = d["nodeName"] = value - if self.ownerElement is not None: - _clear_id_cache(self.ownerElement) - else: - d[name] = value + def _get_name(self): + return self._name + + def _set_name(self, value): + self._name = value + if self.ownerElement is not None: + _clear_id_cache(self.ownerElement) + + nodeName = name = property(_get_name, _set_name) + + def _get_value(self): + return self._value + + def _set_value(self, value): + self._value = value + self.childNodes[0].data = value + if self.ownerElement is not None: + _clear_id_cache(self.ownerElement) + self.childNodes[0].data = value + + nodeValue = value = property(_get_value, _set_value) + + def _get_prefix(self): + return self._prefix def _set_prefix(self, prefix): nsuri = self.namespaceURI @@ -396,22 +406,16 @@ class Attr(Node): if nsuri and nsuri != XMLNS_NAMESPACE: raise xml.dom.NamespaceErr( "illegal use of 'xmlns' prefix for the wrong namespace") - d = self.__dict__ - d['prefix'] = prefix + self._prefix = prefix if prefix is None: newName = self.localName else: newName = "%s:%s" % (prefix, self.localName) if self.ownerElement: _clear_id_cache(self.ownerElement) - d['nodeName'] = d['name'] = newName + self.name = newName - def _set_value(self, value): - d = self.__dict__ - d['value'] = d['nodeValue'] = value - if self.ownerElement: - _clear_id_cache(self.ownerElement) - self.childNodes[0].data = value + prefix = property(_get_prefix, _set_prefix) def unlink(self): # This implementation does not call the base implementation @@ -586,8 +590,8 @@ class NamedNodeMap(object): _clear_id_cache(self._ownerElement) del self._attrs[n.nodeName] del self._attrsNS[(n.namespaceURI, n.localName)] - if 'ownerElement' in n.__dict__: - n.__dict__['ownerElement'] = None + if hasattr(n, 'ownerElement'): + n.ownerElement = None return n else: raise xml.dom.NotFoundErr() @@ -598,8 +602,8 @@ class NamedNodeMap(object): _clear_id_cache(self._ownerElement) del self._attrsNS[(n.namespaceURI, n.localName)] del self._attrs[n.nodeName] - if 'ownerElement' in n.__dict__: - n.__dict__['ownerElement'] = None + if hasattr(n, 'ownerElement'): + n.ownerElement = None return n else: raise xml.dom.NotFoundErr() @@ -659,6 +663,9 @@ class TypeInfo(object): _no_type = TypeInfo(None, None) class Element(Node): + __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix', + 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS', + 'nextSibling', 'previousSibling') nodeType = Node.ELEMENT_NODE nodeValue = None schemaType = _no_type @@ -674,41 +681,57 @@ class Element(Node): def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, localName=None): + self.parentNode = None self.tagName = self.nodeName = tagName self.prefix = prefix self.namespaceURI = namespaceURI self.childNodes = NodeList() + self.nextSibling = self.previousSibling = None + + # Attribute dictionaries are lazily created + # attributes are double-indexed: + # tagName -> Attribute + # URI,localName -> Attribute + # in the future: consider lazy generation + # of attribute objects this is too tricky + # for now because of headaches with + # namespaces. + self._attrs = None + self._attrsNS = None - self._attrs = {} # attributes are double-indexed: - self._attrsNS = {} # tagName -> Attribute - # URI,localName -> Attribute - # in the future: consider lazy generation - # of attribute objects this is too tricky - # for now because of headaches with - # namespaces. + def _ensure_attributes(self): + if self._attrs is None: + self._attrs = {} + self._attrsNS = {} def _get_localName(self): - if 'localName' in self.__dict__: - return self.__dict__['localName'] - return self.tagName.split(":", 1)[-1] + try: + return self._localName + except AttributeError: + return self.tagName.split(":", 1)[-1] def _get_tagName(self): return self.tagName def unlink(self): - for attr in list(self._attrs.values()): - attr.unlink() + if self._attrs is not None: + for attr in list(self._attrs.values()): + attr.unlink() self._attrs = None self._attrsNS = None Node.unlink(self) def getAttribute(self, attname): + if self._attrs is None: + return "" try: return self._attrs[attname].value except KeyError: return "" def getAttributeNS(self, namespaceURI, localName): + if self._attrsNS is None: + return "" try: return self._attrsNS[(namespaceURI, localName)].value except KeyError: @@ -718,14 +741,11 @@ class Element(Node): attr = self.getAttributeNode(attname) if attr is None: attr = Attr(attname) - # for performance - d = attr.__dict__ - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument + attr.value = value # also sets nodeValue + attr.ownerDocument = self.ownerDocument self.setAttributeNode(attr) elif value != attr.value: - d = attr.__dict__ - d["value"] = d["nodeValue"] = value + attr.value = value if attr.isId: _clear_id_cache(self) @@ -733,33 +753,33 @@ class Element(Node): prefix, localname = _nssplit(qualifiedName) attr = self.getAttributeNodeNS(namespaceURI, localname) if attr is None: - # for performance attr = Attr(qualifiedName, namespaceURI, localname, prefix) - d = attr.__dict__ - d["prefix"] = prefix - d["nodeName"] = qualifiedName - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument + attr.value = value + attr.ownerDocument = self.ownerDocument self.setAttributeNode(attr) else: - d = attr.__dict__ if value != attr.value: - d["value"] = d["nodeValue"] = value + attr.value = value if attr.isId: _clear_id_cache(self) if attr.prefix != prefix: - d["prefix"] = prefix - d["nodeName"] = qualifiedName + attr.prefix = prefix + attr.nodeName = qualifiedName def getAttributeNode(self, attrname): + if self._attrs is None: + return None return self._attrs.get(attrname) def getAttributeNodeNS(self, namespaceURI, localName): + if self._attrsNS is None: + return None return self._attrsNS.get((namespaceURI, localName)) def setAttributeNode(self, attr): if attr.ownerElement not in (None, self): raise xml.dom.InuseAttributeErr("attribute node already owned") + self._ensure_attributes() old1 = self._attrs.get(attr.name, None) if old1 is not None: self.removeAttributeNode(old1) @@ -778,6 +798,8 @@ class Element(Node): setAttributeNodeNS = setAttributeNode def removeAttribute(self, name): + if self._attrsNS is None: + raise xml.dom.NotFoundErr() try: attr = self._attrs[name] except KeyError: @@ -785,6 +807,8 @@ class Element(Node): self.removeAttributeNode(attr) def removeAttributeNS(self, namespaceURI, localName): + if self._attrsNS is None: + raise xml.dom.NotFoundErr() try: attr = self._attrsNS[(namespaceURI, localName)] except KeyError: @@ -807,9 +831,13 @@ class Element(Node): removeAttributeNodeNS = removeAttributeNode def hasAttribute(self, name): + if self._attrs is None: + return False return name in self._attrs def hasAttributeNS(self, namespaceURI, localName): + if self._attrsNS is None: + return False return (namespaceURI, localName) in self._attrsNS def getElementsByTagName(self, name): @@ -850,6 +878,7 @@ class Element(Node): writer.write("/>%s"%(newl)) def _get_attributes(self): + self._ensure_attributes() return NamedNodeMap(self._attrs, self._attrsNS, self) def hasAttributes(self): @@ -874,7 +903,7 @@ class Element(Node): if _get_containing_entref(self) is not None: raise xml.dom.NoModificationAllowedErr() if not idAttr._is_id: - idAttr.__dict__['_is_id'] = True + idAttr._is_id = True self._magic_id_nodes += 1 self.ownerDocument._magic_id_count += 1 _clear_id_cache(self) @@ -887,19 +916,20 @@ defproperty(Element, "localName", def _set_attribute_node(element, attr): _clear_id_cache(element) + element._ensure_attributes() element._attrs[attr.name] = attr element._attrsNS[(attr.namespaceURI, attr.localName)] = attr # This creates a circular reference, but Element.unlink() # breaks the cycle since the references to the attribute # dictionaries are tossed. - attr.__dict__['ownerElement'] = element - + attr.ownerElement = element class Childless: """Mixin that makes childless-ness easy to implement and avoids the complexity of the Node methods that deal with children. """ + __slots__ = () attributes = None childNodes = EmptyNodeList() @@ -938,54 +968,49 @@ class Childless: class ProcessingInstruction(Childless, Node): nodeType = Node.PROCESSING_INSTRUCTION_NODE + __slots__ = ('target', 'data') def __init__(self, target, data): - self.target = self.nodeName = target - self.data = self.nodeValue = data + self.target = target + self.data = data - def _get_data(self): + # nodeValue is an alias for data + def _get_nodeValue(self): return self.data - def _set_data(self, value): - d = self.__dict__ - d['data'] = d['nodeValue'] = value + def _set_nodeValue(self, value): + self.data = data + nodeValue = property(_get_nodeValue, _set_nodeValue) - def _get_target(self): + # nodeName is an alias for target + def _get_nodeName(self): return self.target - def _set_target(self, value): - d = self.__dict__ - d['target'] = d['nodeName'] = value - - def __setattr__(self, name, value): - if name == "data" or name == "nodeValue": - self.__dict__['data'] = self.__dict__['nodeValue'] = value - elif name == "target" or name == "nodeName": - self.__dict__['target'] = self.__dict__['nodeName'] = value - else: - self.__dict__[name] = value + def _set_nodeName(self, value): + self.target = value + nodeName = property(_get_nodeName, _set_nodeName) def writexml(self, writer, indent="", addindent="", newl=""): writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) class CharacterData(Childless, Node): + __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling') + + def __init__(self): + self.ownerDocument = self.parentNode = None + self.previousSibling = self.nextSibling = None + self._data = '' + Node.__init__(self) + def _get_length(self): return len(self.data) __len__ = _get_length def _get_data(self): - return self.__dict__['data'] + return self._data def _set_data(self, data): - d = self.__dict__ - d['data'] = d['nodeValue'] = data - - _get_nodeValue = _get_data - _set_nodeValue = _set_data + self._data = data - def __setattr__(self, name, value): - if name == "data" or name == "nodeValue": - self.__dict__['data'] = self.__dict__['nodeValue'] = value - else: - self.__dict__[name] = value + data = nodeValue = property(_get_data, _set_data) def __repr__(self): data = self.data @@ -1042,10 +1067,7 @@ defproperty(CharacterData, "length", doc="Length of the string data.") class Text(CharacterData): - # Make sure we don't add an instance __dict__ if we don't already - # have one, at least when that's possible: - # XXX this does not work, CharacterData is an old-style class - # __slots__ = () + __slots__ = () nodeType = Node.TEXT_NODE nodeName = "#text" @@ -1112,9 +1134,7 @@ class Text(CharacterData): else: break if content: - d = self.__dict__ - d['data'] = content - d['nodeValue'] = content + self.data = content return self else: return None @@ -1160,7 +1180,8 @@ class Comment(CharacterData): nodeName = "#comment" def __init__(self, data): - self.data = self.nodeValue = data + CharacterData.__init__(self) + self._data = data def writexml(self, writer, indent="", addindent="", newl=""): if "--" in self.data: @@ -1169,10 +1190,7 @@ class Comment(CharacterData): class CDATASection(Text): - # Make sure we don't add an instance __dict__ if we don't already - # have one, at least when that's possible: - # XXX this does not work, Text is an old-style class - # __slots__ = () + __slots__ = () nodeType = Node.CDATA_SECTION_NODE nodeName = "#cdata-section" @@ -1252,8 +1270,7 @@ defproperty(ReadOnlySequentialNamedNodeMap, "length", class Identified: """Mix-in class that supports the publicId and systemId attributes.""" - # XXX this does not work, this is an old-style class - # __slots__ = 'publicId', 'systemId' + __slots__ = 'publicId', 'systemId' def _identified_mixin_init(self, publicId, systemId): self.publicId = publicId @@ -1504,18 +1521,19 @@ def _clear_id_cache(node): node.ownerDocument._id_search_stack= None class Document(Node, DocumentLS): + __slots__ = ('_elem_info', 'doctype', + '_id_search_stack', 'childNodes', '_id_cache') _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) + implementation = DOMImplementation() nodeType = Node.DOCUMENT_NODE nodeName = "#document" nodeValue = None attributes = None - doctype = None parentNode = None previousSibling = nextSibling = None - implementation = DOMImplementation() # Document attributes from Level 3 (WD 9 April 2002) @@ -1530,6 +1548,7 @@ class Document(Node, DocumentLS): _magic_id_count = 0 def __init__(self): + self.doctype = None self.childNodes = NodeList() # mapping of (namespaceURI, localName) -> ElementInfo # and tagName -> ElementInfo @@ -1771,12 +1790,12 @@ class Document(Node, DocumentLS): raise xml.dom.NotSupportedErr("cannot import document type nodes") return _clone_node(node, deep, self) - def writexml(self, writer, indent="", addindent="", newl="", - encoding = None): + def writexml(self, writer, indent="", addindent="", newl="", encoding=None): if encoding is None: writer.write('<?xml version="1.0" ?>'+newl) else: - writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl)) + writer.write('<?xml version="1.0" encoding="%s"?>%s' % ( + encoding, newl)) for node in self.childNodes: node.writexml(writer, indent, addindent, newl) @@ -1815,17 +1834,15 @@ class Document(Node, DocumentLS): element.removeAttributeNode(n) else: element = None - # avoid __setattr__ - d = n.__dict__ - d['prefix'] = prefix - d['localName'] = localName - d['namespaceURI'] = namespaceURI - d['nodeName'] = name + n.prefix = prefix + n._localName = localName + n.namespaceURI = namespaceURI + n.nodeName = name if n.nodeType == Node.ELEMENT_NODE: - d['tagName'] = name + n.tagName = name else: # attribute node - d['name'] = name + n.name = name if element is not None: element.setAttributeNode(n) if is_id: diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py index d5ac8b2..43504f7 100644 --- a/Lib/xml/dom/pulldom.py +++ b/Lib/xml/dom/pulldom.py @@ -1,6 +1,5 @@ import xml.sax import xml.sax.handler -import types START_ELEMENT = "START_ELEMENT" END_ELEMENT = "END_ELEMENT" @@ -334,10 +333,7 @@ def parse(stream_or_string, parser=None, bufsize=None): return DOMEventStream(stream, parser, bufsize) def parseString(string, parser=None): - try: - from io import StringIO - except ImportError: - from io import StringIO + from io import StringIO bufsize = len(string) buf = StringIO(string) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 9742f32..9553c51 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -68,8 +68,9 @@ __all__ = [ "tostring", "tostringlist", "TreeBuilder", "VERSION", - "XML", + "XML", "XMLID", "XMLParser", "XMLTreeBuilder", + "register_namespace", ] VERSION = "1.3.0" @@ -99,34 +100,11 @@ VERSION = "1.3.0" import sys import re import warnings +import io +import contextlib +from . import ElementPath -class _SimpleElementPath: - # emulate pre-1.2 find/findtext/findall behaviour - def find(self, element, tag, namespaces=None): - for elem in element: - if elem.tag == tag: - return elem - return None - def findtext(self, element, tag, default=None, namespaces=None): - elem = self.find(element, tag) - if elem is None: - return default - return elem.text or "" - def iterfind(self, element, tag, namespaces=None): - if tag[:3] == ".//": - for elem in element.iter(tag[3:]): - yield elem - for elem in element: - if elem.tag == tag: - yield elem - def findall(self, element, tag, namespaces=None): - return list(self.iterfind(element, tag, namespaces)) - -try: - from . import ElementPath -except ImportError: - ElementPath = _SimpleElementPath() ## # Parser error. This is a subclass of <b>SyntaxError</b>. @@ -148,9 +126,9 @@ class ParseError(SyntaxError): # @defreturn flag def iselement(element): - # FIXME: not sure about this; might be a better idea to look - # for tag/attrib/text attributes - return isinstance(element, Element) or hasattr(element, "tag") + # FIXME: not sure about this; + # isinstance(element, Element) or look for tag/attrib/text attributes + return hasattr(element, 'tag') ## # Element class. This class defines the Element interface, and @@ -205,6 +183,9 @@ class Element: # constructor def __init__(self, tag, attrib={}, **extra): + if not isinstance(attrib, dict): + raise TypeError("attrib must be dict, not %s" % ( + attrib.__class__.__name__,)) attrib = attrib.copy() attrib.update(extra) self.tag = tag @@ -298,7 +279,7 @@ class Element: # @param element The element to add. def append(self, element): - # assert iselement(element) + self._assert_is_element(element) self._children.append(element) ## @@ -308,8 +289,8 @@ class Element: # @since 1.3 def extend(self, elements): - # for element in elements: - # assert iselement(element) + for element in elements: + self._assert_is_element(element) self._children.extend(elements) ## @@ -318,9 +299,15 @@ class Element: # @param index Where to insert the new subelement. def insert(self, index, element): - # assert iselement(element) + self._assert_is_element(element) self._children.insert(index, element) + def _assert_is_element(self, e): + # Need to refer to the actual Python implementation, not the + # shadowing C implementation. + if not isinstance(e, _Element): + raise TypeError('expected an Element, not %s' % type(e).__name__) + ## # Removes a matching subelement. Unlike the <b>find</b> methods, # this method compares elements based on identity, not on tag @@ -809,59 +796,38 @@ class ElementTree: # None for only if not US-ASCII or UTF-8 or Unicode. None is default. def write(self, file_or_filename, - # keyword arguments encoding=None, xml_declaration=None, default_namespace=None, method=None): - # assert self._root is not None if not method: method = "xml" elif method not in _serialize: - # FIXME: raise an ImportError for c14n if ElementC14N is missing? raise ValueError("unknown method %r" % method) if not encoding: if method == "c14n": encoding = "utf-8" else: encoding = "us-ascii" - elif encoding == str: # lxml.etree compatibility. - encoding = "unicode" else: encoding = encoding.lower() - if hasattr(file_or_filename, "write"): - file = file_or_filename - else: - if encoding != "unicode": - file = open(file_or_filename, "wb") + with _get_writer(file_or_filename, encoding) as write: + if method == "xml" and (xml_declaration or + (xml_declaration is None and + encoding not in ("utf-8", "us-ascii", "unicode"))): + declared_encoding = encoding + if encoding == "unicode": + # Retrieve the default encoding for the xml declaration + import locale + declared_encoding = locale.getpreferredencoding() + write("<?xml version='1.0' encoding='%s'?>\n" % ( + declared_encoding,)) + if method == "text": + _serialize_text(write, self._root) else: - file = open(file_or_filename, "w") - if encoding != "unicode": - def write(text): - try: - return file.write(text.encode(encoding, - "xmlcharrefreplace")) - except (TypeError, AttributeError): - _raise_serialization_error(text) - else: - write = file.write - if method == "xml" and (xml_declaration or - (xml_declaration is None and - encoding not in ("utf-8", "us-ascii", "unicode"))): - declared_encoding = encoding - if encoding == "unicode": - # Retrieve the default encoding for the xml declaration - import locale - declared_encoding = locale.getpreferredencoding() - write("<?xml version='1.0' encoding='%s'?>\n" % declared_encoding) - if method == "text": - _serialize_text(write, self._root) - else: - qnames, namespaces = _namespaces(self._root, default_namespace) - serialize = _serialize[method] - serialize(write, self._root, qnames, namespaces) - if file_or_filename is not file: - file.close() + qnames, namespaces = _namespaces(self._root, default_namespace) + serialize = _serialize[method] + serialize(write, self._root, qnames, namespaces) def write_c14n(self, file): # lxml.etree compatibility. use output method instead @@ -870,6 +836,58 @@ class ElementTree: # -------------------------------------------------------------------- # serialization support +@contextlib.contextmanager +def _get_writer(file_or_filename, encoding): + # returns text write method and release all resourses after using + try: + write = file_or_filename.write + except AttributeError: + # file_or_filename is a file name + if encoding == "unicode": + file = open(file_or_filename, "w") + else: + file = open(file_or_filename, "w", encoding=encoding, + errors="xmlcharrefreplace") + with file: + yield file.write + else: + # file_or_filename is a file-like object + # encoding determines if it is a text or binary writer + if encoding == "unicode": + # use a text writer as is + yield write + else: + # wrap a binary writer with TextIOWrapper + with contextlib.ExitStack() as stack: + if isinstance(file_or_filename, io.BufferedIOBase): + file = file_or_filename + elif isinstance(file_or_filename, io.RawIOBase): + file = io.BufferedWriter(file_or_filename) + # Keep the original file open when the BufferedWriter is + # destroyed + stack.callback(file.detach) + else: + # This is to handle passed objects that aren't in the + # IOBase hierarchy, but just have a write method + file = io.BufferedIOBase() + file.writable = lambda: True + file.write = write + try: + # TextIOWrapper uses this methods to determine + # if BOM (for UTF-16, etc) should be added + file.seekable = file_or_filename.seekable + file.tell = file_or_filename.tell + except AttributeError: + pass + file = io.TextIOWrapper(file, + encoding=encoding, + errors="xmlcharrefreplace", + newline="\n") + # Keep the original file open when the TextIOWrapper is + # destroyed + stack.callback(file.detach) + yield file.write + def _namespaces(elem, default_namespace=None): # identify namespaces used in this tree @@ -909,11 +927,7 @@ def _namespaces(elem, default_namespace=None): _raise_serialization_error(qname) # populate qname and namespaces table - try: - iterate = elem.iter - except AttributeError: - iterate = elem.getiterator # cET compatibility - for elem in iterate(): + for elem in elem.iter(): tag = elem.tag if isinstance(tag, QName): if tag.text not in qnames: @@ -1085,6 +1099,8 @@ _namespace_map = { # dublin core "http://purl.org/dc/elements/1.1/": "dc", } +# For tests and troubleshooting +register_namespace._namespace_map = _namespace_map def _raise_serialization_error(text): raise TypeError( @@ -1153,22 +1169,13 @@ def _escape_attrib_html(text): # @defreturn string def tostring(element, encoding=None, method=None): - class dummy: - pass - data = [] - file = dummy() - file.write = data.append - ElementTree(element).write(file, encoding, method=method) - if encoding in (str, "unicode"): - return "".join(data) - else: - return b"".join(data) + stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() + ElementTree(element).write(stream, encoding, method=method) + return stream.getvalue() ## # Generates a string representation of an XML element, including all -# subelements. If encoding is False, the string is returned as a -# sequence of string fragments; otherwise it is a sequence of -# bytestrings. +# subelements. # # @param element An Element instance. # @keyparam encoding Optional output encoding (default is US-ASCII). @@ -1179,15 +1186,29 @@ def tostring(element, encoding=None, method=None): # @defreturn sequence # @since 1.3 +class _ListDataStream(io.BufferedIOBase): + """ An auxiliary stream accumulating into a list reference + """ + def __init__(self, lst): + self.lst = lst + + def writable(self): + return True + + def seekable(self): + return True + + def write(self, b): + self.lst.append(b) + + def tell(self): + return len(self.lst) + def tostringlist(element, encoding=None, method=None): - class dummy: - pass - data = [] - file = dummy() - file.write = data.append - ElementTree(element).write(file, encoding, method=method) - # FIXME: merge small fragments into larger parts - return data + lst = [] + stream = _ListDataStream(lst) + ElementTree(element).write(stream, encoding, method=method) + return lst ## # Writes an element tree or element structure to sys.stdout. This @@ -1509,24 +1530,30 @@ class XMLParser: self.target = self._target = target self._error = expat.error self._names = {} # name memo cache - # callbacks + # main callbacks parser.DefaultHandlerExpand = self._default - parser.StartElementHandler = self._start - parser.EndElementHandler = self._end - parser.CharacterDataHandler = self._data - # optional callbacks - parser.CommentHandler = self._comment - parser.ProcessingInstructionHandler = self._pi + if hasattr(target, 'start'): + parser.StartElementHandler = self._start + if hasattr(target, 'end'): + parser.EndElementHandler = self._end + if hasattr(target, 'data'): + parser.CharacterDataHandler = target.data + # miscellaneous callbacks + if hasattr(target, 'comment'): + parser.CommentHandler = target.comment + if hasattr(target, 'pi'): + parser.ProcessingInstructionHandler = target.pi # let expat do the buffering, if supported try: - self._parser.buffer_text = 1 + parser.buffer_text = 1 except AttributeError: pass # use new-style attribute handling, if supported try: - self._parser.ordered_attributes = 1 - self._parser.specified_attributes = 1 - parser.StartElementHandler = self._start_list + parser.ordered_attributes = 1 + parser.specified_attributes = 1 + if hasattr(target, 'start'): + parser.StartElementHandler = self._start_list except AttributeError: pass self._doctype = None @@ -1570,44 +1597,29 @@ class XMLParser: attrib[fixname(attrib_in[i])] = attrib_in[i+1] return self.target.start(tag, attrib) - def _data(self, text): - return self.target.data(text) - def _end(self, tag): return self.target.end(self._fixname(tag)) - def _comment(self, data): - try: - comment = self.target.comment - except AttributeError: - pass - else: - return comment(data) - - def _pi(self, target, data): - try: - pi = self.target.pi - except AttributeError: - pass - else: - return pi(target, data) - def _default(self, text): prefix = text[:1] if prefix == "&": # deal with undefined entities try: - self.target.data(self.entity[text[1:-1]]) + data_handler = self.target.data + except AttributeError: + return + try: + data_handler(self.entity[text[1:-1]]) except KeyError: from xml.parsers import expat err = expat.error( "undefined entity %s: line %d, column %d" % - (text, self._parser.ErrorLineNumber, - self._parser.ErrorColumnNumber) + (text, self.parser.ErrorLineNumber, + self.parser.ErrorColumnNumber) ) err.code = 11 # XML_ERROR_UNDEFINED_ENTITY - err.lineno = self._parser.ErrorLineNumber - err.offset = self._parser.ErrorColumnNumber + err.lineno = self.parser.ErrorLineNumber + err.offset = self.parser.ErrorColumnNumber raise err elif prefix == "<" and text[:9] == "<!DOCTYPE": self._doctype = [] # inside a doctype declaration @@ -1625,16 +1637,16 @@ class XMLParser: type = self._doctype[1] if type == "PUBLIC" and n == 4: name, type, pubid, system = self._doctype + if pubid: + pubid = pubid[1:-1] elif type == "SYSTEM" and n == 3: name, type, system = self._doctype pubid = None else: return - if pubid: - pubid = pubid[1:-1] if hasattr(self.target, "doctype"): self.target.doctype(name, pubid, system[1:-1]) - elif self.doctype is not self._XMLParser__doctype: + elif self.doctype != self._XMLParser__doctype: # warn about deprecated call self._XMLParser__doctype(name, pubid, system[1:-1]) self.doctype(name, pubid, system[1:-1]) @@ -1665,7 +1677,7 @@ class XMLParser: def feed(self, data): try: - self._parser.Parse(data, 0) + self.parser.Parse(data, 0) except self._error as v: self._raiseerror(v) @@ -1677,12 +1689,100 @@ class XMLParser: def close(self): try: - self._parser.Parse("", 1) # end of data + self.parser.Parse("", 1) # end of data except self._error as v: self._raiseerror(v) - tree = self.target.close() - del self.target, self._parser # get rid of circular references - return tree + try: + close_handler = self.target.close + except AttributeError: + pass + else: + return close_handler() + finally: + # get rid of circular references + del self.parser, self._parser + del self.target, self._target + + +# Import the C accelerators +try: + # Element, SubElement, ParseError, TreeBuilder, XMLParser + from _elementtree import * +except ImportError: + pass +else: + # Overwrite 'ElementTree.parse' and 'iterparse' to use the C XMLParser + + class ElementTree(ElementTree): + def parse(self, source, parser=None): + close_source = False + if not hasattr(source, 'read'): + source = open(source, 'rb') + close_source = True + try: + if parser is not None: + while True: + data = source.read(65536) + if not data: + break + parser.feed(data) + self._root = parser.close() + else: + parser = XMLParser() + self._root = parser._parse(source) + return self._root + finally: + if close_source: + source.close() + + class iterparse: + root = None + def __init__(self, file, events=None): + self._close_file = False + if not hasattr(file, 'read'): + file = open(file, 'rb') + self._close_file = True + self._file = file + self._events = [] + self._index = 0 + self._error = None + self.root = self._root = None + b = TreeBuilder() + self._parser = XMLParser(b) + self._parser._setevents(self._events, events) + + def __next__(self): + while True: + try: + item = self._events[self._index] + self._index += 1 + return item + except IndexError: + pass + if self._error: + e = self._error + self._error = None + raise e + if self._parser is None: + self.root = self._root + if self._close_file: + self._file.close() + raise StopIteration + # load event buffer + del self._events[:] + self._index = 0 + data = self._file.read(16384) + if data: + try: + self._parser.feed(data) + except SyntaxError as exc: + self._error = exc + else: + self._root = self._parser.close() + self._parser = None + + def __iter__(self): + return self # compatibility XMLTreeBuilder = XMLParser diff --git a/Lib/xml/etree/cElementTree.py b/Lib/xml/etree/cElementTree.py index a6f127a..368e679 100644 --- a/Lib/xml/etree/cElementTree.py +++ b/Lib/xml/etree/cElementTree.py @@ -1,3 +1,3 @@ -# Wrapper module for _elementtree +# Deprecated alias for xml.etree.ElementTree -from _elementtree import * +from xml.etree.ElementTree import * diff --git a/Lib/xml/parsers/expat.py b/Lib/xml/parsers/expat.py index a805b82..bcbe9fb 100644 --- a/Lib/xml/parsers/expat.py +++ b/Lib/xml/parsers/expat.py @@ -1,6 +1,4 @@ """Interface to the Expat non-validating XML parser.""" -__version__ = '$Revision$' - import sys from pyexpat import * |