summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-04-14 08:09:09 (GMT)
committerGitHub <noreply@github.com>2019-04-14 08:09:09 (GMT)
commite9927e1820caea01e576141d9a623ea394d43dad (patch)
tree4f758ae025e9b5c1bb6198bb4524571da118489f
parentffca16e25a70fd44a87b13b379b5ec0c7a11e926 (diff)
downloadcpython-e9927e1820caea01e576141d9a623ea394d43dad.zip
cpython-e9927e1820caea01e576141d9a623ea394d43dad.tar.gz
cpython-e9927e1820caea01e576141d9a623ea394d43dad.tar.bz2
bpo-30485: support a default prefix mapping in ElementPath by passing None as prefix (#1823)
-rw-r--r--Doc/library/xml.etree.elementtree.rst9
-rw-r--r--Lib/test/test_xml_etree.py6
-rw-r--r--Lib/xml/etree/ElementPath.py33
-rw-r--r--Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst3
4 files changed, 39 insertions, 12 deletions
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index 9bee0ea..c83e719 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -764,7 +764,8 @@ Element Objects
Finds the first subelement matching *match*. *match* may be a tag name
or a :ref:`path <elementtree-xpath>`. Returns an element instance
or ``None``. *namespaces* is an optional mapping from namespace prefix
- to full name.
+ to full name. Pass ``None`` as prefix to move all unprefixed tag names
+ in the expression into the given namespace.
.. method:: findall(match, namespaces=None)
@@ -772,7 +773,8 @@ Element Objects
Finds all matching subelements, by tag name or
:ref:`path <elementtree-xpath>`. Returns a list containing all matching
elements in document order. *namespaces* is an optional mapping from
- namespace prefix to full name.
+ namespace prefix to full name. Pass ``None`` as prefix to move all
+ unprefixed tag names in the expression into the given namespace.
.. method:: findtext(match, default=None, namespaces=None)
@@ -782,7 +784,8 @@ Element Objects
of the first matching element, or *default* if no element was found.
Note that if the matching element has no text content an empty string
is returned. *namespaces* is an optional mapping from namespace prefix
- to full name.
+ to full name. Pass ``None`` as prefix to move all unprefixed tag names
+ in the expression into the given namespace.
.. method:: getchildren()
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index bdcd4e0..2f7a3b6 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2463,6 +2463,12 @@ class ElementFindTest(unittest.TestCase):
nsmap = {'xx': 'Y'}
self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
+ nsmap = {'xx': 'X', None: 'Y'}
+ self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
+ self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
+ nsmap = {'xx': 'X', '': 'Y'}
+ with self.assertRaisesRegex(ValueError, 'namespace prefix'):
+ root.findall(".//xx:b", namespaces=nsmap)
def test_bad_find(self):
e = ET.XML(SAMPLE_XML)
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index ef32917..0e3854f 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -71,16 +71,22 @@ xpath_tokenizer_re = re.compile(
)
def xpath_tokenizer(pattern, namespaces=None):
+ default_namespace = namespaces.get(None) if namespaces else None
for token in xpath_tokenizer_re.findall(pattern):
tag = token[1]
- if tag and tag[0] != "{" and ":" in tag:
- try:
+ if tag and tag[0] != "{":
+ if ":" in tag:
prefix, uri = tag.split(":", 1)
- if not namespaces:
- raise KeyError
- yield token[0], "{%s}%s" % (namespaces[prefix], uri)
- except KeyError:
- raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
+ try:
+ if not namespaces:
+ raise KeyError
+ yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+ except KeyError:
+ raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
+ elif default_namespace:
+ yield token[0], "{%s}%s" % (default_namespace, tag)
+ else:
+ yield token
else:
yield token
@@ -264,10 +270,19 @@ class _SelectorContext:
def iterfind(elem, path, namespaces=None):
# compile selector pattern
- cache_key = (path, None if namespaces is None
- else tuple(sorted(namespaces.items())))
if path[-1:] == "/":
path = path + "*" # implicit all (FIXME: keep this?)
+
+ cache_key = (path,)
+ if namespaces:
+ if '' in namespaces:
+ raise ValueError("empty namespace prefix must be passed as None, not the empty string")
+ if None in namespaces:
+ cache_key += (namespaces[None],) + tuple(sorted(
+ item for item in namespaces.items() if item[0] is not None))
+ else:
+ cache_key += tuple(sorted(namespaces.items()))
+
try:
selector = _cache[cache_key]
except KeyError:
diff --git a/Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst b/Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst
new file mode 100644
index 0000000..6c82efd
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst
@@ -0,0 +1,3 @@
+Path expressions in xml.etree.ElementTree can now avoid explicit namespace
+prefixes for tags (or the "{namespace}tag" notation) by passing a default
+namespace with a 'None' prefix.