summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSjoerd Mullender <sjoerd@acm.org>2000-07-10 08:09:48 (GMT)
committerSjoerd Mullender <sjoerd@acm.org>2000-07-10 08:09:48 (GMT)
commitf98506478122c3c3002e91e5562520b02d7bd132 (patch)
tree941ad31eb57dd429a6a38b45ea2089da516667c4
parentb16714b4d0657829d4050058b240f0390c61018f (diff)
downloadcpython-f98506478122c3c3002e91e5562520b02d7bd132.zip
cpython-f98506478122c3c3002e91e5562520b02d7bd132.tar.gz
cpython-f98506478122c3c3002e91e5562520b02d7bd132.tar.bz2
Better error handling of bad entity references. Before when an & in
an attribute value was not escaped, you could get two syntax errors: one about a missing semicolon and one about an unknown entity. Now you get only one about a bogus ampersand.
-rw-r--r--Lib/xmllib.py41
1 files changed, 25 insertions, 16 deletions
diff --git a/Lib/xmllib.py b/Lib/xmllib.py
index 024e7f8..bbd4b54 100644
--- a/Lib/xmllib.py
+++ b/Lib/xmllib.py
@@ -181,37 +181,46 @@ class XMLParser:
res = amp.search(data, i)
if res is None:
return data
- res = ref.match(data, res.start(0))
+ s = res.start(0)
+ res = ref.match(data, s)
if res is None:
self.syntax_error("bogus `&'")
- i =i+1
+ i = s+1
continue
i = res.end(0)
- if data[i - 1] != ';':
- self.syntax_error("`;' missing after entity/char reference")
- i = i-1
str = res.group(1)
- pre = data[:res.start(0)]
- post = data[i:]
+ rescan = 0
if str[0] == '#':
if str[1] == 'x':
str = chr(string.atoi(str[2:], 16))
else:
str = chr(string.atoi(str[1:]))
- data = pre + str + post
- i = res.start(0)+len(str)
+ if data[i - 1] != ';':
+ self.syntax_error("`;' missing after char reference")
+ i = i-1
elif all:
if self.entitydefs.has_key(str):
- data = pre + self.entitydefs[str] + post
- i = res.start(0) # rescan substituted text
+ str = self.entitydefs[str]
+ rescan = 1
+ elif data[i - 1] != ';':
+ self.syntax_error("bogus `&'")
+ i = s + 1 # just past the &
+ continue
else:
self.syntax_error("reference to unknown entity `&%s;'" % str)
- # can't do it, so keep the entity ref in
- data = pre + '&' + str + ';' + post
- i = res.start(0) + len(str) + 2
+ str = '&' + str + ';'
+ elif data[i - 1] != ';':
+ self.syntax_error("bogus `&'")
+ i = s + 1 # just past the &
+ continue
+
+ # when we get here, str contains the translated text and i points
+ # to the end of the string that is to be replaced
+ data = data[:s] + str + data[i:]
+ if rescan:
+ i = s
else:
- # just translating character references
- pass # i is already postioned correctly
+ i = s + len(str)
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is