From bc3710f83d52e6cdd91b779a06fdb777b62f322d Mon Sep 17 00:00:00 2001 From: Neal Norwitz Date: Fri, 30 Sep 2005 04:58:23 +0000 Subject: - Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, but Python incorrectly assumes it is in UTF-8 format --- Lib/test/test_minidom.py | 9 +++++++++ Misc/ACKS | 1 + Misc/NEWS | 5 ++++- Modules/pyexpat.c | 7 ++++++- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 3154fbf..8b4c715 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -889,6 +889,15 @@ def testEncodings(): and doc.toxml('utf-8') == '\xe2\x82\xac' and doc.toxml('iso-8859-15') == '\xa4', "testEncodings - encoding EURO SIGN") + + # Verify that character decoding errors throw exceptions instead of crashing + try: + doc = parseString('Comment \xe7a va ? Tr\xe8s bien ?') + except UnicodeDecodeError: + pass + else: + print 'parsing with bad encoding should raise a UnicodeDecodeError' + doc.unlink() class UserDataHandler: diff --git a/Misc/ACKS b/Misc/ACKS index c599172..0fdb79f 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -303,6 +303,7 @@ Flemming Kj Jiba Orjan Johansen Simon Johnston +Evan Jones Richard Jones Irmen de Jong Lucas de Jonge diff --git a/Misc/NEWS b/Misc/NEWS index c2b32d8..47c0ea4 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -4,7 +4,7 @@ Python News (editors: check NEWS.help for information about editing NEWS using ReST.) -What's New in Python 2.4.3a0? +What's New in Python 2.4.3c1? ============================= *Release date: XX-XX-200X* @@ -12,6 +12,9 @@ What's New in Python 2.4.3a0? Extension Modules ----------------- +- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, + but Python incorrectly assumes it is in UTF-8 format + - Fix parse errors in the readline module when compiling without threads. Library diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index e6c14f8..438f760 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -417,6 +417,9 @@ string_intern(xmlparseobject *self, const char* str) { PyObject *result = STRING_CONV_FUNC(str); PyObject *value; + /* result can be NULL if the unicode conversion failed. */ + if (!result) + return result; if (!self->intern) return result; value = PyDict_GetItem(self->intern, result); @@ -572,7 +575,9 @@ my_StartElementHandler(void *userData, Py_DECREF(v); } } - args = Py_BuildValue("(NN)", string_intern(self, name), container); + args = string_intern(self, name); + if (args != NULL) + args = Py_BuildValue("(NN)", args, container); if (args == NULL) { Py_DECREF(container); return; -- cgit v0.12