summaryrefslogtreecommitdiffstats
path: root/Lib/xml
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/xml')
-rw-r--r--Lib/xml/dom/__init__.py1
-rw-r--r--Lib/xml/dom/domreg.py2
-rw-r--r--Lib/xml/dom/expatbuilder.py42
-rw-r--r--Lib/xml/dom/minidom.py293
-rw-r--r--Lib/xml/dom/pulldom.py6
-rw-r--r--Lib/xml/etree/ElementTree.py380
-rw-r--r--Lib/xml/etree/cElementTree.py4
-rw-r--r--Lib/xml/parsers/expat.py2
8 files changed, 413 insertions, 317 deletions
diff --git a/Lib/xml/dom/__init__.py b/Lib/xml/dom/__init__.py
index 4401bdf..97cf9a6 100644
--- a/Lib/xml/dom/__init__.py
+++ b/Lib/xml/dom/__init__.py
@@ -17,6 +17,7 @@ pulldom -- DOM builder supporting on-demand tree-building for selected
class Node:
"""Class giving the NodeType constants."""
+ __slots__ = ()
# DOM implementations may use this as a base class for their own
# Node implementations. If they don't, the constants defined here
diff --git a/Lib/xml/dom/domreg.py b/Lib/xml/dom/domreg.py
index cb35bb0..8c3d901 100644
--- a/Lib/xml/dom/domreg.py
+++ b/Lib/xml/dom/domreg.py
@@ -2,8 +2,6 @@
directly. Instead, the functions getDOMImplementation and
registerDOMImplementation should be imported from xml.dom."""
-from xml.dom.minicompat import * # isinstance, StringTypes
-
# This is a list of well-known implementations. Well-known names
# should be published by posting to xml-sig@python.org, and are
# subsequently recorded in this file.
diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py
index a98fe03..f074ab9 100644
--- a/Lib/xml/dom/expatbuilder.py
+++ b/Lib/xml/dom/expatbuilder.py
@@ -33,8 +33,6 @@ from xml.parsers import expat
from xml.dom.minidom import _append_child, _set_attribute_node
from xml.dom.NodeFilter import NodeFilter
-from xml.dom.minicompat import *
-
TEXT_NODE = Node.TEXT_NODE
CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
DOCUMENT_NODE = Node.DOCUMENT_NODE
@@ -283,27 +281,23 @@ class ExpatBuilder:
elif childNodes and childNodes[-1].nodeType == TEXT_NODE:
node = childNodes[-1]
value = node.data + data
- d = node.__dict__
- d['data'] = d['nodeValue'] = value
+ node.data = value
return
else:
node = minidom.Text()
- d = node.__dict__
- d['data'] = d['nodeValue'] = data
- d['ownerDocument'] = self.document
+ node.data = data
+ node.ownerDocument = self.document
_append_child(self.curNode, node)
def character_data_handler(self, data):
childNodes = self.curNode.childNodes
if childNodes and childNodes[-1].nodeType == TEXT_NODE:
node = childNodes[-1]
- d = node.__dict__
- d['data'] = d['nodeValue'] = node.data + data
+ node.data = node.data + data
return
node = minidom.Text()
- d = node.__dict__
- d['data'] = d['nodeValue'] = node.data + data
- d['ownerDocument'] = self.document
+ node.data = node.data + data
+ node.ownerDocument = self.document
_append_child(self.curNode, node)
def entity_decl_handler(self, entityName, is_parameter_entity, value,
@@ -363,11 +357,8 @@ class ExpatBuilder:
a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
None, EMPTY_PREFIX)
value = attributes[i+1]
- d = a.childNodes[0].__dict__
- d['data'] = d['nodeValue'] = value
- d = a.__dict__
- d['value'] = d['nodeValue'] = value
- d['ownerDocument'] = self.document
+ a.value = value
+ a.ownerDocument = self.document
_set_attribute_node(node, a)
if node is not self.document.documentElement:
@@ -761,15 +752,13 @@ class Namespaces:
else:
a = minidom.Attr("xmlns", XMLNS_NAMESPACE,
"xmlns", EMPTY_PREFIX)
- d = a.childNodes[0].__dict__
- d['data'] = d['nodeValue'] = uri
- d = a.__dict__
- d['value'] = d['nodeValue'] = uri
- d['ownerDocument'] = self.document
+ a.value = uri
+ a.ownerDocument = self.document
_set_attribute_node(node, a)
del self._ns_ordered_prefixes[:]
if attributes:
+ node._ensure_attributes()
_attrs = node._attrs
_attrsNS = node._attrsNS
for i in range(0, len(attributes), 2):
@@ -785,12 +774,9 @@ class Namespaces:
aname, EMPTY_PREFIX)
_attrs[aname] = a
_attrsNS[(EMPTY_NAMESPACE, aname)] = a
- d = a.childNodes[0].__dict__
- d['data'] = d['nodeValue'] = value
- d = a.__dict__
- d['ownerDocument'] = self.document
- d['value'] = d['nodeValue'] = value
- d['ownerElement'] = node
+ a.ownerDocument = self.document
+ a.value = value
+ a.ownerElement = node
if __debug__:
# This only adds some asserts to the original
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index f23ad05..28e5030 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -14,7 +14,6 @@ Todo:
* SAX 2 namespaces
"""
-import codecs
import io
import xml.dom
@@ -47,25 +46,25 @@ class Node(xml.dom.Node):
return self.toprettyxml("", "", encoding)
def toprettyxml(self, indent="\t", newl="\n", encoding=None):
- # indent = the indentation string to prepend, per level
- # newl = the newline string to append
- use_encoding = "utf-8" if encoding is None else encoding
- writer = codecs.getwriter(use_encoding)(io.BytesIO())
+ if encoding is None:
+ writer = io.StringIO()
+ else:
+ writer = io.TextIOWrapper(io.BytesIO(),
+ encoding=encoding,
+ errors="xmlcharrefreplace",
+ newline='\n')
if self.nodeType == Node.DOCUMENT_NODE:
# Can pass encoding only to document, to put it into XML header
self.writexml(writer, "", indent, newl, encoding)
else:
self.writexml(writer, "", indent, newl)
if encoding is None:
- return writer.stream.getvalue().decode(use_encoding)
+ return writer.getvalue()
else:
- return writer.stream.getvalue()
+ return writer.detach().getvalue()
def hasChildNodes(self):
- if self.childNodes:
- return True
- else:
- return False
+ return bool(self.childNodes)
def _get_childNodes(self):
return self.childNodes
@@ -286,10 +285,10 @@ def _append_child(self, node):
childNodes = self.childNodes
if childNodes:
last = childNodes[-1]
- node.__dict__["previousSibling"] = last
- last.__dict__["nextSibling"] = node
+ node.previousSibling = last
+ last.nextSibling = node
childNodes.append(node)
- node.__dict__["parentNode"] = self
+ node.parentNode = self
def _in_document(node):
# return True iff node is part of a document tree
@@ -342,9 +341,10 @@ class DocumentFragment(Node):
class Attr(Node):
+ __slots__=('_name', '_value', 'namespaceURI',
+ '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
nodeType = Node.ATTRIBUTE_NODE
attributes = None
- ownerElement = None
specified = False
_is_id = False
@@ -352,12 +352,11 @@ class Attr(Node):
def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
prefix=None):
- # skip setattr for performance
- d = self.__dict__
- d["nodeName"] = d["name"] = qName
- d["namespaceURI"] = namespaceURI
- d["prefix"] = prefix
- d['childNodes'] = NodeList()
+ self.ownerElement = None
+ self._name = qName
+ self.namespaceURI = namespaceURI
+ self._prefix = prefix
+ self.childNodes = NodeList()
# Add the single child node that represents the value of the attr
self.childNodes.append(Text())
@@ -365,9 +364,10 @@ class Attr(Node):
# nodeValue and value are set elsewhere
def _get_localName(self):
- if 'localName' in self.__dict__:
- return self.__dict__['localName']
- return self.nodeName.split(":", 1)[-1]
+ try:
+ return self._localName
+ except AttributeError:
+ return self.nodeName.split(":", 1)[-1]
def _get_name(self):
return self.name
@@ -375,20 +375,30 @@ class Attr(Node):
def _get_specified(self):
return self.specified
- def __setattr__(self, name, value):
- d = self.__dict__
- if name in ("value", "nodeValue"):
- d["value"] = d["nodeValue"] = value
- d2 = self.childNodes[0].__dict__
- d2["data"] = d2["nodeValue"] = value
- if self.ownerElement is not None:
- _clear_id_cache(self.ownerElement)
- elif name in ("name", "nodeName"):
- d["name"] = d["nodeName"] = value
- if self.ownerElement is not None:
- _clear_id_cache(self.ownerElement)
- else:
- d[name] = value
+ def _get_name(self):
+ return self._name
+
+ def _set_name(self, value):
+ self._name = value
+ if self.ownerElement is not None:
+ _clear_id_cache(self.ownerElement)
+
+ nodeName = name = property(_get_name, _set_name)
+
+ def _get_value(self):
+ return self._value
+
+ def _set_value(self, value):
+ self._value = value
+ self.childNodes[0].data = value
+ if self.ownerElement is not None:
+ _clear_id_cache(self.ownerElement)
+ self.childNodes[0].data = value
+
+ nodeValue = value = property(_get_value, _set_value)
+
+ def _get_prefix(self):
+ return self._prefix
def _set_prefix(self, prefix):
nsuri = self.namespaceURI
@@ -396,22 +406,16 @@ class Attr(Node):
if nsuri and nsuri != XMLNS_NAMESPACE:
raise xml.dom.NamespaceErr(
"illegal use of 'xmlns' prefix for the wrong namespace")
- d = self.__dict__
- d['prefix'] = prefix
+ self._prefix = prefix
if prefix is None:
newName = self.localName
else:
newName = "%s:%s" % (prefix, self.localName)
if self.ownerElement:
_clear_id_cache(self.ownerElement)
- d['nodeName'] = d['name'] = newName
+ self.name = newName
- def _set_value(self, value):
- d = self.__dict__
- d['value'] = d['nodeValue'] = value
- if self.ownerElement:
- _clear_id_cache(self.ownerElement)
- self.childNodes[0].data = value
+ prefix = property(_get_prefix, _set_prefix)
def unlink(self):
# This implementation does not call the base implementation
@@ -586,8 +590,8 @@ class NamedNodeMap(object):
_clear_id_cache(self._ownerElement)
del self._attrs[n.nodeName]
del self._attrsNS[(n.namespaceURI, n.localName)]
- if 'ownerElement' in n.__dict__:
- n.__dict__['ownerElement'] = None
+ if hasattr(n, 'ownerElement'):
+ n.ownerElement = None
return n
else:
raise xml.dom.NotFoundErr()
@@ -598,8 +602,8 @@ class NamedNodeMap(object):
_clear_id_cache(self._ownerElement)
del self._attrsNS[(n.namespaceURI, n.localName)]
del self._attrs[n.nodeName]
- if 'ownerElement' in n.__dict__:
- n.__dict__['ownerElement'] = None
+ if hasattr(n, 'ownerElement'):
+ n.ownerElement = None
return n
else:
raise xml.dom.NotFoundErr()
@@ -659,6 +663,9 @@ class TypeInfo(object):
_no_type = TypeInfo(None, None)
class Element(Node):
+ __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
+ 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
+ 'nextSibling', 'previousSibling')
nodeType = Node.ELEMENT_NODE
nodeValue = None
schemaType = _no_type
@@ -674,41 +681,57 @@ class Element(Node):
def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
localName=None):
+ self.parentNode = None
self.tagName = self.nodeName = tagName
self.prefix = prefix
self.namespaceURI = namespaceURI
self.childNodes = NodeList()
+ self.nextSibling = self.previousSibling = None
+
+ # Attribute dictionaries are lazily created
+ # attributes are double-indexed:
+ # tagName -> Attribute
+ # URI,localName -> Attribute
+ # in the future: consider lazy generation
+ # of attribute objects this is too tricky
+ # for now because of headaches with
+ # namespaces.
+ self._attrs = None
+ self._attrsNS = None
- self._attrs = {} # attributes are double-indexed:
- self._attrsNS = {} # tagName -> Attribute
- # URI,localName -> Attribute
- # in the future: consider lazy generation
- # of attribute objects this is too tricky
- # for now because of headaches with
- # namespaces.
+ def _ensure_attributes(self):
+ if self._attrs is None:
+ self._attrs = {}
+ self._attrsNS = {}
def _get_localName(self):
- if 'localName' in self.__dict__:
- return self.__dict__['localName']
- return self.tagName.split(":", 1)[-1]
+ try:
+ return self._localName
+ except AttributeError:
+ return self.tagName.split(":", 1)[-1]
def _get_tagName(self):
return self.tagName
def unlink(self):
- for attr in list(self._attrs.values()):
- attr.unlink()
+ if self._attrs is not None:
+ for attr in list(self._attrs.values()):
+ attr.unlink()
self._attrs = None
self._attrsNS = None
Node.unlink(self)
def getAttribute(self, attname):
+ if self._attrs is None:
+ return ""
try:
return self._attrs[attname].value
except KeyError:
return ""
def getAttributeNS(self, namespaceURI, localName):
+ if self._attrsNS is None:
+ return ""
try:
return self._attrsNS[(namespaceURI, localName)].value
except KeyError:
@@ -718,14 +741,11 @@ class Element(Node):
attr = self.getAttributeNode(attname)
if attr is None:
attr = Attr(attname)
- # for performance
- d = attr.__dict__
- d["value"] = d["nodeValue"] = value
- d["ownerDocument"] = self.ownerDocument
+ attr.value = value # also sets nodeValue
+ attr.ownerDocument = self.ownerDocument
self.setAttributeNode(attr)
elif value != attr.value:
- d = attr.__dict__
- d["value"] = d["nodeValue"] = value
+ attr.value = value
if attr.isId:
_clear_id_cache(self)
@@ -733,33 +753,33 @@ class Element(Node):
prefix, localname = _nssplit(qualifiedName)
attr = self.getAttributeNodeNS(namespaceURI, localname)
if attr is None:
- # for performance
attr = Attr(qualifiedName, namespaceURI, localname, prefix)
- d = attr.__dict__
- d["prefix"] = prefix
- d["nodeName"] = qualifiedName
- d["value"] = d["nodeValue"] = value
- d["ownerDocument"] = self.ownerDocument
+ attr.value = value
+ attr.ownerDocument = self.ownerDocument
self.setAttributeNode(attr)
else:
- d = attr.__dict__
if value != attr.value:
- d["value"] = d["nodeValue"] = value
+ attr.value = value
if attr.isId:
_clear_id_cache(self)
if attr.prefix != prefix:
- d["prefix"] = prefix
- d["nodeName"] = qualifiedName
+ attr.prefix = prefix
+ attr.nodeName = qualifiedName
def getAttributeNode(self, attrname):
+ if self._attrs is None:
+ return None
return self._attrs.get(attrname)
def getAttributeNodeNS(self, namespaceURI, localName):
+ if self._attrsNS is None:
+ return None
return self._attrsNS.get((namespaceURI, localName))
def setAttributeNode(self, attr):
if attr.ownerElement not in (None, self):
raise xml.dom.InuseAttributeErr("attribute node already owned")
+ self._ensure_attributes()
old1 = self._attrs.get(attr.name, None)
if old1 is not None:
self.removeAttributeNode(old1)
@@ -778,6 +798,8 @@ class Element(Node):
setAttributeNodeNS = setAttributeNode
def removeAttribute(self, name):
+ if self._attrsNS is None:
+ raise xml.dom.NotFoundErr()
try:
attr = self._attrs[name]
except KeyError:
@@ -785,6 +807,8 @@ class Element(Node):
self.removeAttributeNode(attr)
def removeAttributeNS(self, namespaceURI, localName):
+ if self._attrsNS is None:
+ raise xml.dom.NotFoundErr()
try:
attr = self._attrsNS[(namespaceURI, localName)]
except KeyError:
@@ -807,9 +831,13 @@ class Element(Node):
removeAttributeNodeNS = removeAttributeNode
def hasAttribute(self, name):
+ if self._attrs is None:
+ return False
return name in self._attrs
def hasAttributeNS(self, namespaceURI, localName):
+ if self._attrsNS is None:
+ return False
return (namespaceURI, localName) in self._attrsNS
def getElementsByTagName(self, name):
@@ -850,6 +878,7 @@ class Element(Node):
writer.write("/>%s"%(newl))
def _get_attributes(self):
+ self._ensure_attributes()
return NamedNodeMap(self._attrs, self._attrsNS, self)
def hasAttributes(self):
@@ -874,7 +903,7 @@ class Element(Node):
if _get_containing_entref(self) is not None:
raise xml.dom.NoModificationAllowedErr()
if not idAttr._is_id:
- idAttr.__dict__['_is_id'] = True
+ idAttr._is_id = True
self._magic_id_nodes += 1
self.ownerDocument._magic_id_count += 1
_clear_id_cache(self)
@@ -887,19 +916,20 @@ defproperty(Element, "localName",
def _set_attribute_node(element, attr):
_clear_id_cache(element)
+ element._ensure_attributes()
element._attrs[attr.name] = attr
element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
# This creates a circular reference, but Element.unlink()
# breaks the cycle since the references to the attribute
# dictionaries are tossed.
- attr.__dict__['ownerElement'] = element
-
+ attr.ownerElement = element
class Childless:
"""Mixin that makes childless-ness easy to implement and avoids
the complexity of the Node methods that deal with children.
"""
+ __slots__ = ()
attributes = None
childNodes = EmptyNodeList()
@@ -938,54 +968,49 @@ class Childless:
class ProcessingInstruction(Childless, Node):
nodeType = Node.PROCESSING_INSTRUCTION_NODE
+ __slots__ = ('target', 'data')
def __init__(self, target, data):
- self.target = self.nodeName = target
- self.data = self.nodeValue = data
+ self.target = target
+ self.data = data
- def _get_data(self):
+ # nodeValue is an alias for data
+ def _get_nodeValue(self):
return self.data
- def _set_data(self, value):
- d = self.__dict__
- d['data'] = d['nodeValue'] = value
+ def _set_nodeValue(self, value):
+ self.data = data
+ nodeValue = property(_get_nodeValue, _set_nodeValue)
- def _get_target(self):
+ # nodeName is an alias for target
+ def _get_nodeName(self):
return self.target
- def _set_target(self, value):
- d = self.__dict__
- d['target'] = d['nodeName'] = value
-
- def __setattr__(self, name, value):
- if name == "data" or name == "nodeValue":
- self.__dict__['data'] = self.__dict__['nodeValue'] = value
- elif name == "target" or name == "nodeName":
- self.__dict__['target'] = self.__dict__['nodeName'] = value
- else:
- self.__dict__[name] = value
+ def _set_nodeName(self, value):
+ self.target = value
+ nodeName = property(_get_nodeName, _set_nodeName)
def writexml(self, writer, indent="", addindent="", newl=""):
writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
class CharacterData(Childless, Node):
+ __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
+
+ def __init__(self):
+ self.ownerDocument = self.parentNode = None
+ self.previousSibling = self.nextSibling = None
+ self._data = ''
+ Node.__init__(self)
+
def _get_length(self):
return len(self.data)
__len__ = _get_length
def _get_data(self):
- return self.__dict__['data']
+ return self._data
def _set_data(self, data):
- d = self.__dict__
- d['data'] = d['nodeValue'] = data
-
- _get_nodeValue = _get_data
- _set_nodeValue = _set_data
+ self._data = data
- def __setattr__(self, name, value):
- if name == "data" or name == "nodeValue":
- self.__dict__['data'] = self.__dict__['nodeValue'] = value
- else:
- self.__dict__[name] = value
+ data = nodeValue = property(_get_data, _set_data)
def __repr__(self):
data = self.data
@@ -1042,10 +1067,7 @@ defproperty(CharacterData, "length", doc="Length of the string data.")
class Text(CharacterData):
- # Make sure we don't add an instance __dict__ if we don't already
- # have one, at least when that's possible:
- # XXX this does not work, CharacterData is an old-style class
- # __slots__ = ()
+ __slots__ = ()
nodeType = Node.TEXT_NODE
nodeName = "#text"
@@ -1112,9 +1134,7 @@ class Text(CharacterData):
else:
break
if content:
- d = self.__dict__
- d['data'] = content
- d['nodeValue'] = content
+ self.data = content
return self
else:
return None
@@ -1160,7 +1180,8 @@ class Comment(CharacterData):
nodeName = "#comment"
def __init__(self, data):
- self.data = self.nodeValue = data
+ CharacterData.__init__(self)
+ self._data = data
def writexml(self, writer, indent="", addindent="", newl=""):
if "--" in self.data:
@@ -1169,10 +1190,7 @@ class Comment(CharacterData):
class CDATASection(Text):
- # Make sure we don't add an instance __dict__ if we don't already
- # have one, at least when that's possible:
- # XXX this does not work, Text is an old-style class
- # __slots__ = ()
+ __slots__ = ()
nodeType = Node.CDATA_SECTION_NODE
nodeName = "#cdata-section"
@@ -1252,8 +1270,7 @@ defproperty(ReadOnlySequentialNamedNodeMap, "length",
class Identified:
"""Mix-in class that supports the publicId and systemId attributes."""
- # XXX this does not work, this is an old-style class
- # __slots__ = 'publicId', 'systemId'
+ __slots__ = 'publicId', 'systemId'
def _identified_mixin_init(self, publicId, systemId):
self.publicId = publicId
@@ -1504,18 +1521,19 @@ def _clear_id_cache(node):
node.ownerDocument._id_search_stack= None
class Document(Node, DocumentLS):
+ __slots__ = ('_elem_info', 'doctype',
+ '_id_search_stack', 'childNodes', '_id_cache')
_child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
+ implementation = DOMImplementation()
nodeType = Node.DOCUMENT_NODE
nodeName = "#document"
nodeValue = None
attributes = None
- doctype = None
parentNode = None
previousSibling = nextSibling = None
- implementation = DOMImplementation()
# Document attributes from Level 3 (WD 9 April 2002)
@@ -1530,6 +1548,7 @@ class Document(Node, DocumentLS):
_magic_id_count = 0
def __init__(self):
+ self.doctype = None
self.childNodes = NodeList()
# mapping of (namespaceURI, localName) -> ElementInfo
# and tagName -> ElementInfo
@@ -1771,12 +1790,12 @@ class Document(Node, DocumentLS):
raise xml.dom.NotSupportedErr("cannot import document type nodes")
return _clone_node(node, deep, self)
- def writexml(self, writer, indent="", addindent="", newl="",
- encoding = None):
+ def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
if encoding is None:
writer.write('<?xml version="1.0" ?>'+newl)
else:
- writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
+ writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
+ encoding, newl))
for node in self.childNodes:
node.writexml(writer, indent, addindent, newl)
@@ -1815,17 +1834,15 @@ class Document(Node, DocumentLS):
element.removeAttributeNode(n)
else:
element = None
- # avoid __setattr__
- d = n.__dict__
- d['prefix'] = prefix
- d['localName'] = localName
- d['namespaceURI'] = namespaceURI
- d['nodeName'] = name
+ n.prefix = prefix
+ n._localName = localName
+ n.namespaceURI = namespaceURI
+ n.nodeName = name
if n.nodeType == Node.ELEMENT_NODE:
- d['tagName'] = name
+ n.tagName = name
else:
# attribute node
- d['name'] = name
+ n.name = name
if element is not None:
element.setAttributeNode(n)
if is_id:
diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py
index d5ac8b2..43504f7 100644
--- a/Lib/xml/dom/pulldom.py
+++ b/Lib/xml/dom/pulldom.py
@@ -1,6 +1,5 @@
import xml.sax
import xml.sax.handler
-import types
START_ELEMENT = "START_ELEMENT"
END_ELEMENT = "END_ELEMENT"
@@ -334,10 +333,7 @@ def parse(stream_or_string, parser=None, bufsize=None):
return DOMEventStream(stream, parser, bufsize)
def parseString(string, parser=None):
- try:
- from io import StringIO
- except ImportError:
- from io import StringIO
+ from io import StringIO
bufsize = len(string)
buf = StringIO(string)
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index ff8ff7d..e8e309c 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -68,8 +68,9 @@ __all__ = [
"tostring", "tostringlist",
"TreeBuilder",
"VERSION",
- "XML",
+ "XML", "XMLID",
"XMLParser", "XMLTreeBuilder",
+ "register_namespace",
]
VERSION = "1.3.0"
@@ -99,34 +100,11 @@ VERSION = "1.3.0"
import sys
import re
import warnings
+import io
+import contextlib
+from . import ElementPath
-class _SimpleElementPath:
- # emulate pre-1.2 find/findtext/findall behaviour
- def find(self, element, tag, namespaces=None):
- for elem in element:
- if elem.tag == tag:
- return elem
- return None
- def findtext(self, element, tag, default=None, namespaces=None):
- elem = self.find(element, tag)
- if elem is None:
- return default
- return elem.text or ""
- def iterfind(self, element, tag, namespaces=None):
- if tag[:3] == ".//":
- for elem in element.iter(tag[3:]):
- yield elem
- for elem in element:
- if elem.tag == tag:
- yield elem
- def findall(self, element, tag, namespaces=None):
- return list(self.iterfind(element, tag, namespaces))
-
-try:
- from . import ElementPath
-except ImportError:
- ElementPath = _SimpleElementPath()
##
# Parser error. This is a subclass of <b>SyntaxError</b>.
@@ -148,9 +126,9 @@ class ParseError(SyntaxError):
# @defreturn flag
def iselement(element):
- # FIXME: not sure about this; might be a better idea to look
- # for tag/attrib/text attributes
- return isinstance(element, Element) or hasattr(element, "tag")
+ # FIXME: not sure about this;
+ # isinstance(element, Element) or look for tag/attrib/text attributes
+ return hasattr(element, 'tag')
##
# Element class. This class defines the Element interface, and
@@ -205,6 +183,9 @@ class Element:
# constructor
def __init__(self, tag, attrib={}, **extra):
+ if not isinstance(attrib, dict):
+ raise TypeError("attrib must be dict, not %s" % (
+ attrib.__class__.__name__,))
attrib = attrib.copy()
attrib.update(extra)
self.tag = tag
@@ -298,7 +279,7 @@ class Element:
# @param element The element to add.
def append(self, element):
- # assert iselement(element)
+ self._assert_is_element(element)
self._children.append(element)
##
@@ -308,8 +289,8 @@ class Element:
# @since 1.3
def extend(self, elements):
- # for element in elements:
- # assert iselement(element)
+ for element in elements:
+ self._assert_is_element(element)
self._children.extend(elements)
##
@@ -318,9 +299,15 @@ class Element:
# @param index Where to insert the new subelement.
def insert(self, index, element):
- # assert iselement(element)
+ self._assert_is_element(element)
self._children.insert(index, element)
+ def _assert_is_element(self, e):
+ # Need to refer to the actual Python implementation, not the
+ # shadowing C implementation.
+ if not isinstance(e, _Element):
+ raise TypeError('expected an Element, not %s' % type(e).__name__)
+
##
# Removes a matching subelement. Unlike the <b>find</b> methods,
# this method compares elements based on identity, not on tag
@@ -810,59 +797,38 @@ class ElementTree:
# "c14n"; default is "xml").
def write(self, file_or_filename,
- # keyword arguments
encoding=None,
xml_declaration=None,
default_namespace=None,
method=None):
- # assert self._root is not None
if not method:
method = "xml"
elif method not in _serialize:
- # FIXME: raise an ImportError for c14n if ElementC14N is missing?
raise ValueError("unknown method %r" % method)
if not encoding:
if method == "c14n":
encoding = "utf-8"
else:
encoding = "us-ascii"
- elif encoding == str: # lxml.etree compatibility.
- encoding = "unicode"
else:
encoding = encoding.lower()
- if hasattr(file_or_filename, "write"):
- file = file_or_filename
- else:
- if encoding != "unicode":
- file = open(file_or_filename, "wb")
+ with _get_writer(file_or_filename, encoding) as write:
+ if method == "xml" and (xml_declaration or
+ (xml_declaration is None and
+ encoding not in ("utf-8", "us-ascii", "unicode"))):
+ declared_encoding = encoding
+ if encoding == "unicode":
+ # Retrieve the default encoding for the xml declaration
+ import locale
+ declared_encoding = locale.getpreferredencoding()
+ write("<?xml version='1.0' encoding='%s'?>\n" % (
+ declared_encoding,))
+ if method == "text":
+ _serialize_text(write, self._root)
else:
- file = open(file_or_filename, "w")
- if encoding != "unicode":
- def write(text):
- try:
- return file.write(text.encode(encoding,
- "xmlcharrefreplace"))
- except (TypeError, AttributeError):
- _raise_serialization_error(text)
- else:
- write = file.write
- if method == "xml" and (xml_declaration or
- (xml_declaration is None and
- encoding not in ("utf-8", "us-ascii", "unicode"))):
- declared_encoding = encoding
- if encoding == "unicode":
- # Retrieve the default encoding for the xml declaration
- import locale
- declared_encoding = locale.getpreferredencoding()
- write("<?xml version='1.0' encoding='%s'?>\n" % declared_encoding)
- if method == "text":
- _serialize_text(write, self._root)
- else:
- qnames, namespaces = _namespaces(self._root, default_namespace)
- serialize = _serialize[method]
- serialize(write, self._root, qnames, namespaces)
- if file_or_filename is not file:
- file.close()
+ qnames, namespaces = _namespaces(self._root, default_namespace)
+ serialize = _serialize[method]
+ serialize(write, self._root, qnames, namespaces)
def write_c14n(self, file):
# lxml.etree compatibility. use output method instead
@@ -871,6 +837,58 @@ class ElementTree:
# --------------------------------------------------------------------
# serialization support
+@contextlib.contextmanager
+def _get_writer(file_or_filename, encoding):
+ # returns text write method and release all resourses after using
+ try:
+ write = file_or_filename.write
+ except AttributeError:
+ # file_or_filename is a file name
+ if encoding == "unicode":
+ file = open(file_or_filename, "w")
+ else:
+ file = open(file_or_filename, "w", encoding=encoding,
+ errors="xmlcharrefreplace")
+ with file:
+ yield file.write
+ else:
+ # file_or_filename is a file-like object
+ # encoding determines if it is a text or binary writer
+ if encoding == "unicode":
+ # use a text writer as is
+ yield write
+ else:
+ # wrap a binary writer with TextIOWrapper
+ with contextlib.ExitStack() as stack:
+ if isinstance(file_or_filename, io.BufferedIOBase):
+ file = file_or_filename
+ elif isinstance(file_or_filename, io.RawIOBase):
+ file = io.BufferedWriter(file_or_filename)
+ # Keep the original file open when the BufferedWriter is
+ # destroyed
+ stack.callback(file.detach)
+ else:
+ # This is to handle passed objects that aren't in the
+ # IOBase hierarchy, but just have a write method
+ file = io.BufferedIOBase()
+ file.writable = lambda: True
+ file.write = write
+ try:
+ # TextIOWrapper uses this methods to determine
+ # if BOM (for UTF-16, etc) should be added
+ file.seekable = file_or_filename.seekable
+ file.tell = file_or_filename.tell
+ except AttributeError:
+ pass
+ file = io.TextIOWrapper(file,
+ encoding=encoding,
+ errors="xmlcharrefreplace",
+ newline="\n")
+ # Keep the original file open when the TextIOWrapper is
+ # destroyed
+ stack.callback(file.detach)
+ yield file.write
+
def _namespaces(elem, default_namespace=None):
# identify namespaces used in this tree
@@ -910,11 +928,7 @@ def _namespaces(elem, default_namespace=None):
_raise_serialization_error(qname)
# populate qname and namespaces table
- try:
- iterate = elem.iter
- except AttributeError:
- iterate = elem.getiterator # cET compatibility
- for elem in iterate():
+ for elem in elem.iter():
tag = elem.tag
if isinstance(tag, QName):
if tag.text not in qnames:
@@ -1086,6 +1100,8 @@ _namespace_map = {
# dublin core
"http://purl.org/dc/elements/1.1/": "dc",
}
+# For tests and troubleshooting
+register_namespace._namespace_map = _namespace_map
def _raise_serialization_error(text):
raise TypeError(
@@ -1154,22 +1170,13 @@ def _escape_attrib_html(text):
# @defreturn string
def tostring(element, encoding=None, method=None):
- class dummy:
- pass
- data = []
- file = dummy()
- file.write = data.append
- ElementTree(element).write(file, encoding, method=method)
- if encoding in (str, "unicode"):
- return "".join(data)
- else:
- return b"".join(data)
+ stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
+ ElementTree(element).write(stream, encoding, method=method)
+ return stream.getvalue()
##
# Generates a string representation of an XML element, including all
-# subelements. If encoding is False, the string is returned as a
-# sequence of string fragments; otherwise it is a sequence of
-# bytestrings.
+# subelements.
#
# @param element An Element instance.
# @keyparam encoding Optional output encoding (default is US-ASCII).
@@ -1180,15 +1187,29 @@ def tostring(element, encoding=None, method=None):
# @defreturn sequence
# @since 1.3
+class _ListDataStream(io.BufferedIOBase):
+ """ An auxiliary stream accumulating into a list reference
+ """
+ def __init__(self, lst):
+ self.lst = lst
+
+ def writable(self):
+ return True
+
+ def seekable(self):
+ return True
+
+ def write(self, b):
+ self.lst.append(b)
+
+ def tell(self):
+ return len(self.lst)
+
def tostringlist(element, encoding=None, method=None):
- class dummy:
- pass
- data = []
- file = dummy()
- file.write = data.append
- ElementTree(element).write(file, encoding, method=method)
- # FIXME: merge small fragments into larger parts
- return data
+ lst = []
+ stream = _ListDataStream(lst)
+ ElementTree(element).write(stream, encoding, method=method)
+ return lst
##
# Writes an element tree or element structure to sys.stdout. This
@@ -1510,24 +1531,30 @@ class XMLParser:
self.target = self._target = target
self._error = expat.error
self._names = {} # name memo cache
- # callbacks
+ # main callbacks
parser.DefaultHandlerExpand = self._default
- parser.StartElementHandler = self._start
- parser.EndElementHandler = self._end
- parser.CharacterDataHandler = self._data
- # optional callbacks
- parser.CommentHandler = self._comment
- parser.ProcessingInstructionHandler = self._pi
+ if hasattr(target, 'start'):
+ parser.StartElementHandler = self._start
+ if hasattr(target, 'end'):
+ parser.EndElementHandler = self._end
+ if hasattr(target, 'data'):
+ parser.CharacterDataHandler = target.data
+ # miscellaneous callbacks
+ if hasattr(target, 'comment'):
+ parser.CommentHandler = target.comment
+ if hasattr(target, 'pi'):
+ parser.ProcessingInstructionHandler = target.pi
# let expat do the buffering, if supported
try:
- self._parser.buffer_text = 1
+ parser.buffer_text = 1
except AttributeError:
pass
# use new-style attribute handling, if supported
try:
- self._parser.ordered_attributes = 1
- self._parser.specified_attributes = 1
- parser.StartElementHandler = self._start_list
+ parser.ordered_attributes = 1
+ parser.specified_attributes = 1
+ if hasattr(target, 'start'):
+ parser.StartElementHandler = self._start_list
except AttributeError:
pass
self._doctype = None
@@ -1571,44 +1598,29 @@ class XMLParser:
attrib[fixname(attrib_in[i])] = attrib_in[i+1]
return self.target.start(tag, attrib)
- def _data(self, text):
- return self.target.data(text)
-
def _end(self, tag):
return self.target.end(self._fixname(tag))
- def _comment(self, data):
- try:
- comment = self.target.comment
- except AttributeError:
- pass
- else:
- return comment(data)
-
- def _pi(self, target, data):
- try:
- pi = self.target.pi
- except AttributeError:
- pass
- else:
- return pi(target, data)
-
def _default(self, text):
prefix = text[:1]
if prefix == "&":
# deal with undefined entities
try:
- self.target.data(self.entity[text[1:-1]])
+ data_handler = self.target.data
+ except AttributeError:
+ return
+ try:
+ data_handler(self.entity[text[1:-1]])
except KeyError:
from xml.parsers import expat
err = expat.error(
"undefined entity %s: line %d, column %d" %
- (text, self._parser.ErrorLineNumber,
- self._parser.ErrorColumnNumber)
+ (text, self.parser.ErrorLineNumber,
+ self.parser.ErrorColumnNumber)
)
err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
- err.lineno = self._parser.ErrorLineNumber
- err.offset = self._parser.ErrorColumnNumber
+ err.lineno = self.parser.ErrorLineNumber
+ err.offset = self.parser.ErrorColumnNumber
raise err
elif prefix == "<" and text[:9] == "<!DOCTYPE":
self._doctype = [] # inside a doctype declaration
@@ -1626,16 +1638,16 @@ class XMLParser:
type = self._doctype[1]
if type == "PUBLIC" and n == 4:
name, type, pubid, system = self._doctype
+ if pubid:
+ pubid = pubid[1:-1]
elif type == "SYSTEM" and n == 3:
name, type, system = self._doctype
pubid = None
else:
return
- if pubid:
- pubid = pubid[1:-1]
if hasattr(self.target, "doctype"):
self.target.doctype(name, pubid, system[1:-1])
- elif self.doctype is not self._XMLParser__doctype:
+ elif self.doctype != self._XMLParser__doctype:
# warn about deprecated call
self._XMLParser__doctype(name, pubid, system[1:-1])
self.doctype(name, pubid, system[1:-1])
@@ -1666,7 +1678,7 @@ class XMLParser:
def feed(self, data):
try:
- self._parser.Parse(data, 0)
+ self.parser.Parse(data, 0)
except self._error as v:
self._raiseerror(v)
@@ -1678,12 +1690,100 @@ class XMLParser:
def close(self):
try:
- self._parser.Parse("", 1) # end of data
+ self.parser.Parse("", 1) # end of data
except self._error as v:
self._raiseerror(v)
- tree = self.target.close()
- del self.target, self._parser # get rid of circular references
- return tree
+ try:
+ close_handler = self.target.close
+ except AttributeError:
+ pass
+ else:
+ return close_handler()
+ finally:
+ # get rid of circular references
+ del self.parser, self._parser
+ del self.target, self._target
+
+
+# Import the C accelerators
+try:
+ # Element, SubElement, ParseError, TreeBuilder, XMLParser
+ from _elementtree import *
+except ImportError:
+ pass
+else:
+ # Overwrite 'ElementTree.parse' and 'iterparse' to use the C XMLParser
+
+ class ElementTree(ElementTree):
+ def parse(self, source, parser=None):
+ close_source = False
+ if not hasattr(source, 'read'):
+ source = open(source, 'rb')
+ close_source = True
+ try:
+ if parser is not None:
+ while True:
+ data = source.read(65536)
+ if not data:
+ break
+ parser.feed(data)
+ self._root = parser.close()
+ else:
+ parser = XMLParser()
+ self._root = parser._parse(source)
+ return self._root
+ finally:
+ if close_source:
+ source.close()
+
+ class iterparse:
+ root = None
+ def __init__(self, file, events=None):
+ self._close_file = False
+ if not hasattr(file, 'read'):
+ file = open(file, 'rb')
+ self._close_file = True
+ self._file = file
+ self._events = []
+ self._index = 0
+ self._error = None
+ self.root = self._root = None
+ b = TreeBuilder()
+ self._parser = XMLParser(b)
+ self._parser._setevents(self._events, events)
+
+ def __next__(self):
+ while True:
+ try:
+ item = self._events[self._index]
+ self._index += 1
+ return item
+ except IndexError:
+ pass
+ if self._error:
+ e = self._error
+ self._error = None
+ raise e
+ if self._parser is None:
+ self.root = self._root
+ if self._close_file:
+ self._file.close()
+ raise StopIteration
+ # load event buffer
+ del self._events[:]
+ self._index = 0
+ data = self._file.read(16384)
+ if data:
+ try:
+ self._parser.feed(data)
+ except SyntaxError as exc:
+ self._error = exc
+ else:
+ self._root = self._parser.close()
+ self._parser = None
+
+ def __iter__(self):
+ return self
# compatibility
XMLTreeBuilder = XMLParser
diff --git a/Lib/xml/etree/cElementTree.py b/Lib/xml/etree/cElementTree.py
index a6f127a..368e679 100644
--- a/Lib/xml/etree/cElementTree.py
+++ b/Lib/xml/etree/cElementTree.py
@@ -1,3 +1,3 @@
-# Wrapper module for _elementtree
+# Deprecated alias for xml.etree.ElementTree
-from _elementtree import *
+from xml.etree.ElementTree import *
diff --git a/Lib/xml/parsers/expat.py b/Lib/xml/parsers/expat.py
index a805b82..bcbe9fb 100644
--- a/Lib/xml/parsers/expat.py
+++ b/Lib/xml/parsers/expat.py
@@ -1,6 +1,4 @@
"""Interface to the Expat non-validating XML parser."""
-__version__ = '$Revision$'
-
import sys
from pyexpat import *