diff options
author | Sjoerd Mullender <sjoerd@acm.org> | 2000-07-10 08:09:48 (GMT) |
---|---|---|
committer | Sjoerd Mullender <sjoerd@acm.org> | 2000-07-10 08:09:48 (GMT) |
commit | f98506478122c3c3002e91e5562520b02d7bd132 (patch) | |
tree | 941ad31eb57dd429a6a38b45ea2089da516667c4 /Lib | |
parent | b16714b4d0657829d4050058b240f0390c61018f (diff) | |
download | cpython-f98506478122c3c3002e91e5562520b02d7bd132.zip cpython-f98506478122c3c3002e91e5562520b02d7bd132.tar.gz cpython-f98506478122c3c3002e91e5562520b02d7bd132.tar.bz2 |
Better error handling of bad entity references. Before when an & in
an attribute value was not escaped, you could get two syntax errors:
one about a missing semicolon and one about an unknown entity. Now
you get only one about a bogus ampersand.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/xmllib.py | 41 |
1 files changed, 25 insertions, 16 deletions
diff --git a/Lib/xmllib.py b/Lib/xmllib.py index 024e7f8..bbd4b54 100644 --- a/Lib/xmllib.py +++ b/Lib/xmllib.py @@ -181,37 +181,46 @@ class XMLParser: res = amp.search(data, i) if res is None: return data - res = ref.match(data, res.start(0)) + s = res.start(0) + res = ref.match(data, s) if res is None: self.syntax_error("bogus `&'") - i =i+1 + i = s+1 continue i = res.end(0) - if data[i - 1] != ';': - self.syntax_error("`;' missing after entity/char reference") - i = i-1 str = res.group(1) - pre = data[:res.start(0)] - post = data[i:] + rescan = 0 if str[0] == '#': if str[1] == 'x': str = chr(string.atoi(str[2:], 16)) else: str = chr(string.atoi(str[1:])) - data = pre + str + post - i = res.start(0)+len(str) + if data[i - 1] != ';': + self.syntax_error("`;' missing after char reference") + i = i-1 elif all: if self.entitydefs.has_key(str): - data = pre + self.entitydefs[str] + post - i = res.start(0) # rescan substituted text + str = self.entitydefs[str] + rescan = 1 + elif data[i - 1] != ';': + self.syntax_error("bogus `&'") + i = s + 1 # just past the & + continue else: self.syntax_error("reference to unknown entity `&%s;'" % str) - # can't do it, so keep the entity ref in - data = pre + '&' + str + ';' + post - i = res.start(0) + len(str) + 2 + str = '&' + str + ';' + elif data[i - 1] != ';': + self.syntax_error("bogus `&'") + i = s + 1 # just past the & + continue + + # when we get here, str contains the translated text and i points + # to the end of the string that is to be replaced + data = data[:s] + str + data[i:] + if rescan: + i = s else: - # just translating character references - pass # i is already postioned correctly + i = s + len(str) # Internal -- handle data as far as reasonable. May leave state # and data to be processed by a subsequent call. If 'end' is |