summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2004-04-17 19:36:48 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2004-04-17 19:36:48 (GMT)
commit61e40bd897da8ab4bf2dffe817d0163e984c1e40 (patch)
treec8500cc600b30c400873e3c6c0a838d43b9d83fd
parente5fced781bbf892a498bd2422b49d9ac1a2d6352 (diff)
downloadcpython-61e40bd897da8ab4bf2dffe817d0163e984c1e40.zip
cpython-61e40bd897da8ab4bf2dffe817d0163e984c1e40.tar.gz
cpython-61e40bd897da8ab4bf2dffe817d0163e984c1e40.tar.bz2
Special case normalization of empty strings. Fixes #924361.
Backported to 2.3.
-rw-r--r--Lib/test/test_unicodedata.py1
-rw-r--r--Modules/unicodedata.c7
2 files changed, 8 insertions, 0 deletions
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 61b4ffb..8157fb3 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -170,6 +170,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
def test_normalize(self):
self.assertRaises(TypeError, self.db.normalize)
self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
+ self.assertEqual(self.db.normalize('NFKC', u''), u'')
# The rest can be found in test_normalization.py
# which requires an external file.
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 311db29..ba218a3 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -515,6 +515,13 @@ unicodedata_normalize(PyObject *self, PyObject *args)
&form, &PyUnicode_Type, &input))
return NULL;
+ if (PyUnicode_GetSize(input) == 0) {
+ /* Special case empty input strings, since resizing
+ them later would cause internal errors. */
+ Py_INCREF(input);
+ return input;
+ }
+
if (strcmp(form, "NFC") == 0)
return nfc_nfkc(input, 0);
if (strcmp(form, "NFKC") == 0)