From 4894c306266b5fb3a6cf8429cbb6bc31d3e23e4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Walter=20D=C3=B6rwald?= Date: Fri, 24 Oct 2003 14:25:28 +0000 Subject: Fix a bug in the memory reallocation code of PyUnicode_TranslateCharmap(). charmaptranslate_makespace() allocated more memory than required for the next replacement but didn't remember that fact, so memory size was growing exponentially every time a replacement string is longer that one character. This fixes SF bug #828737. --- Lib/test/test_codeccallbacks.py | 12 ++++++++++++ Objects/unicodeobject.c | 39 ++++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index ae75229..289e838 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -690,6 +690,18 @@ class CodecCallbackTest(unittest.TestCase): self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1}) self.assertRaises(TypeError, u"\xff".translate, {0xff: ()}) + def test_bug828737(self): + charmap = { + ord("&"): u"&", + ord("<"): u"<", + ord(">"): u">", + ord('"'): u""", + } + + for n in (1, 10, 100, 1000): + text = u'abcghi'*n + text.translate(charmap) + def test_main(): test.test_support.run_unittest(CodecCallbackTest) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 03559da..e4fe531 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3222,19 +3222,19 @@ int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result) if not reallocate and adjust various state variables. Return 0 on success, -1 on error */ static -int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsize, +int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int requiredsize) { - if (requiredsize > *outsize) { + int oldsize = PyUnicode_GET_SIZE(*outobj); + if (requiredsize > oldsize) { /* remember old output position */ int outpos = *outp-PyUnicode_AS_UNICODE(*outobj); /* exponentially overallocate to minimize reallocations */ - if (requiredsize < 2 * *outsize) - requiredsize = 2 * *outsize; + if (requiredsize < 2 * oldsize) + requiredsize = 2 * oldsize; if (_PyUnicode_Resize(outobj, requiredsize) < 0) return -1; *outp = PyUnicode_AS_UNICODE(*outobj) + outpos; - *outsize = requiredsize; } return 0; } @@ -3245,14 +3245,15 @@ int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsiz The called must decref result. Return 0 on success, -1 on error. */ static -int charmaptranslate_output(Py_UNICODE c, PyObject *mapping, - PyObject **outobj, int *outsize, Py_UNICODE **outp, PyObject **res) +int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp, + int insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp, + PyObject **res) { - if (charmaptranslate_lookup(c, mapping, res)) + if (charmaptranslate_lookup(*curinp, mapping, res)) return -1; if (*res==NULL) { /* not found => default to 1:1 mapping */ - *(*outp)++ = (Py_UNICODE)c; + *(*outp)++ = *curinp; } else if (*res==Py_None) ; @@ -3268,8 +3269,10 @@ int charmaptranslate_output(Py_UNICODE c, PyObject *mapping, } else if (repsize!=0) { /* more than one character */ - int requiredsize = *outsize + repsize - 1; - if (charmaptranslate_makespace(outobj, outp, outsize, requiredsize)) + int requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) + + (insize - (*curinp-*startinp)) + + repsize - 1; + if (charmaptranslate_makespace(outobj, outp, requiredsize)) return -1; memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize); *outp += repsize; @@ -3294,7 +3297,6 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, Py_UNICODE *str; /* current output position */ int respos = 0; - int ressize; char *reason = "character maps to "; PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -3312,16 +3314,15 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, replacements, if we need more, we'll resize */ res = PyUnicode_FromUnicode(NULL, size); if (res == NULL) - goto onError; + goto onError; if (size == 0) return res; str = PyUnicode_AS_UNICODE(res); - ressize = size; while (p