From 0374a82f75b6929eb334d64e974926402e36c6a8 Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Thu, 9 Apr 2009 21:54:50 +0000 Subject: Issue #2170: refactored xml.dom.minidom.normalize, increasing both its clarity and its speed. --- Lib/test/test_minidom.py | 161 +++++++++++++++++++++++++++++++++++++++++++++++ Lib/xml/dom/minidom.py | 29 ++++----- Misc/NEWS | 3 + 3 files changed, 175 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index e56d512..c2f6743 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -790,6 +790,167 @@ class MinidomTest(unittest.TestCase): "testNormalize -- single empty node removed") doc.unlink() + def testNormalizeCombineAndNextSibling(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("first")) + root.appendChild(doc.createTextNode("second")) + root.appendChild(doc.createElement("i")) + self.confirm(len(root.childNodes) == 3 + and root.childNodes.length == 3, + "testNormalizeCombineAndNextSibling -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2 + and root.firstChild.data == "firstsecond" + and root.firstChild is not root.lastChild + and root.firstChild.nextSibling is root.lastChild + and root.firstChild.previousSibling is None + and root.lastChild.previousSibling is root.firstChild + and root.lastChild.nextSibling is None + , "testNormalizeCombinedAndNextSibling -- result") + doc.unlink() + + def testNormalizeDeleteWithPrevSibling(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("first")) + root.appendChild(doc.createTextNode("")) + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2, + "testNormalizeDeleteWithPrevSibling -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 1 + and root.childNodes.length == 1 + and root.firstChild.data == "first" + and root.firstChild is root.lastChild + and root.firstChild.nextSibling is None + and root.firstChild.previousSibling is None + , "testNormalizeDeleteWithPrevSibling -- result") + doc.unlink() + + def testNormalizeDeleteWithNextSibling(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("")) + root.appendChild(doc.createTextNode("second")) + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2, + "testNormalizeDeleteWithNextSibling -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 1 + and root.childNodes.length == 1 + and root.firstChild.data == "second" + and root.firstChild is root.lastChild + and root.firstChild.nextSibling is None + and root.firstChild.previousSibling is None + , "testNormalizeDeleteWithNextSibling -- result") + doc.unlink() + + def testNormalizeDeleteWithTwoNonTextSiblings(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createElement("i")) + root.appendChild(doc.createTextNode("")) + root.appendChild(doc.createElement("i")) + self.confirm(len(root.childNodes) == 3 + and root.childNodes.length == 3, + "testNormalizeDeleteWithTwoSiblings -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2 + and root.firstChild is not root.lastChild + and root.firstChild.nextSibling is root.lastChild + and root.firstChild.previousSibling is None + and root.lastChild.previousSibling is root.firstChild + and root.lastChild.nextSibling is None + , "testNormalizeDeleteWithTwoSiblings -- result") + doc.unlink() + + def testNormalizeDeleteAndCombine(self): + doc = parseString("") + root = doc.documentElement + root.appendChild(doc.createTextNode("")) + root.appendChild(doc.createTextNode("second")) + root.appendChild(doc.createTextNode("")) + root.appendChild(doc.createTextNode("fourth")) + root.appendChild(doc.createTextNode("")) + self.confirm(len(root.childNodes) == 5 + and root.childNodes.length == 5, + "testNormalizeDeleteAndCombine -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 1 + and root.childNodes.length == 1 + and root.firstChild is root.lastChild + and root.firstChild.data == "secondfourth" + and root.firstChild.previousSibling is None + and root.firstChild.nextSibling is None + , "testNormalizeDeleteAndCombine -- result") + doc.unlink() + + def testNormalizeRecursion(self): + doc = parseString("" + "" + "" + "t" + # + #x + "" + "" + "" + "t2" + #x2 + "" + "t3" + #x3 + "" + # + "") + root = doc.documentElement + root.childNodes[0].appendChild(doc.createTextNode("")) + root.childNodes[0].appendChild(doc.createTextNode("x")) + root.childNodes[1].childNodes[0].appendChild(doc.createTextNode("x2")) + root.childNodes[1].appendChild(doc.createTextNode("x3")) + root.appendChild(doc.createTextNode("")) + self.confirm(len(root.childNodes) == 3 + and root.childNodes.length == 3 + and len(root.childNodes[0].childNodes) == 4 + and root.childNodes[0].childNodes.length == 4 + and len(root.childNodes[1].childNodes) == 3 + and root.childNodes[1].childNodes.length == 3 + and len(root.childNodes[1].childNodes[0].childNodes) == 2 + and root.childNodes[1].childNodes[0].childNodes.length == 2 + , "testNormalize2 -- preparation") + doc.normalize() + self.confirm(len(root.childNodes) == 2 + and root.childNodes.length == 2 + and len(root.childNodes[0].childNodes) == 2 + and root.childNodes[0].childNodes.length == 2 + and len(root.childNodes[1].childNodes) == 2 + and root.childNodes[1].childNodes.length == 2 + and len(root.childNodes[1].childNodes[0].childNodes) == 1 + and root.childNodes[1].childNodes[0].childNodes.length == 1 + , "testNormalize2 -- childNodes lengths") + self.confirm(root.childNodes[0].childNodes[1].data == "tx" + and root.childNodes[1].childNodes[0].childNodes[0].data == "t2x2" + and root.childNodes[1].childNodes[1].data == "t3x3" + , "testNormalize2 -- joined text fields") + self.confirm(root.childNodes[0].childNodes[1].nextSibling is None + and root.childNodes[0].childNodes[1].previousSibling + is root.childNodes[0].childNodes[0] + and root.childNodes[0].childNodes[0].previousSibling is None + and root.childNodes[0].childNodes[0].nextSibling + is root.childNodes[0].childNodes[1] + and root.childNodes[1].childNodes[1].nextSibling is None + and root.childNodes[1].childNodes[1].previousSibling + is root.childNodes[1].childNodes[0] + and root.childNodes[1].childNodes[0].previousSibling is None + and root.childNodes[1].childNodes[0].nextSibling + is root.childNodes[1].childNodes[1] + , "testNormalize2 -- sibling pointers") + doc.unlink() + + def testBug1433694(self): doc = parseString("t") node = doc.documentElement diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index ad42947..b8bd451 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -177,34 +177,27 @@ class Node(xml.dom.Node): L = [] for child in self.childNodes: if child.nodeType == Node.TEXT_NODE: - data = child.data - if data and L and L[-1].nodeType == child.nodeType: + if not child.data: + # empty text node; discard + if L: + L[-1].nextSibling = child.nextSibling + if child.nextSibling: + child.nextSibling.previousSibling = child.previousSibling + child.unlink() + elif L and L[-1].nodeType == child.nodeType: # collapse text node node = L[-1] node.data = node.data + child.data node.nextSibling = child.nextSibling + if child.nextSibling: + child.nextSibling.previousSibling = node child.unlink() - elif data: - if L: - L[-1].nextSibling = child - child.previousSibling = L[-1] - else: - child.previousSibling = None - L.append(child) else: - # empty text node; discard - child.unlink() + L.append(child) else: - if L: - L[-1].nextSibling = child - child.previousSibling = L[-1] - else: - child.previousSibling = None L.append(child) if child.nodeType == Node.ELEMENT_NODE: child.normalize() - if L: - L[-1].nextSibling = None self.childNodes[:] = L def cloneNode(self, deep): diff --git a/Misc/NEWS b/Misc/NEWS index 9acbea2..e393f2e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1 Core and Builtins ----------------- +- Issue #2170: refactored xml.dom.minidom.normalize, increasing both + its clarity and its speed. + - Issue #2396: the memoryview object was backported from Python 3.1. - Fix a problem in PyErr_NormalizeException that leads to "undetected errors" -- cgit v0.12