summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeal Norwitz <nnorwitz@gmail.com>2005-09-30 04:58:23 (GMT)
committerNeal Norwitz <nnorwitz@gmail.com>2005-09-30 04:58:23 (GMT)
commitbc3710f83d52e6cdd91b779a06fdb777b62f322d (patch)
treef1107d9e0ad9d155a1fa8e9be7d70a585872e68e
parent2c89f1a4f0ef21ffc4c0b603ad42b7eddcac5a7b (diff)
downloadcpython-bc3710f83d52e6cdd91b779a06fdb777b62f322d.zip
cpython-bc3710f83d52e6cdd91b779a06fdb777b62f322d.tar.gz
cpython-bc3710f83d52e6cdd91b779a06fdb777b62f322d.tar.bz2
- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1,
but Python incorrectly assumes it is in UTF-8 format
-rw-r--r--Lib/test/test_minidom.py9
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS5
-rw-r--r--Modules/pyexpat.c7
4 files changed, 20 insertions, 2 deletions
diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py
index 3154fbf..8b4c715 100644
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -889,6 +889,15 @@ def testEncodings():
and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>'
and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>',
"testEncodings - encoding EURO SIGN")
+
+ # Verify that character decoding errors throw exceptions instead of crashing
+ try:
+ doc = parseString('<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
+ except UnicodeDecodeError:
+ pass
+ else:
+ print 'parsing with bad encoding should raise a UnicodeDecodeError'
+
doc.unlink()
class UserDataHandler:
diff --git a/Misc/ACKS b/Misc/ACKS
index c599172..0fdb79f 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -303,6 +303,7 @@ Flemming Kjær Jensen
Jiba
Orjan Johansen
Simon Johnston
+Evan Jones
Richard Jones
Irmen de Jong
Lucas de Jonge
diff --git a/Misc/NEWS b/Misc/NEWS
index c2b32d8..47c0ea4 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -4,7 +4,7 @@ Python News
(editors: check NEWS.help for information about editing NEWS using ReST.)
-What's New in Python 2.4.3a0?
+What's New in Python 2.4.3c1?
=============================
*Release date: XX-XX-200X*
@@ -12,6 +12,9 @@ What's New in Python 2.4.3a0?
Extension Modules
-----------------
+- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1,
+ but Python incorrectly assumes it is in UTF-8 format
+
- Fix parse errors in the readline module when compiling without threads.
Library
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index e6c14f8..438f760 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -417,6 +417,9 @@ string_intern(xmlparseobject *self, const char* str)
{
PyObject *result = STRING_CONV_FUNC(str);
PyObject *value;
+ /* result can be NULL if the unicode conversion failed. */
+ if (!result)
+ return result;
if (!self->intern)
return result;
value = PyDict_GetItem(self->intern, result);
@@ -572,7 +575,9 @@ my_StartElementHandler(void *userData,
Py_DECREF(v);
}
}
- args = Py_BuildValue("(NN)", string_intern(self, name), container);
+ args = string_intern(self, name);
+ if (args != NULL)
+ args = Py_BuildValue("(NN)", args, container);
if (args == NULL) {
Py_DECREF(container);
return;