summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2003-10-24 14:25:28 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2003-10-24 14:25:28 (GMT)
commit4894c306266b5fb3a6cf8429cbb6bc31d3e23e4e (patch)
treecb49811c63b1b476a07c19af8d8ab6f5d84ab3aa
parent6a5b02774284c20d6860edc16157cb99a0c0b3ca (diff)
downloadcpython-4894c306266b5fb3a6cf8429cbb6bc31d3e23e4e.zip
cpython-4894c306266b5fb3a6cf8429cbb6bc31d3e23e4e.tar.gz
cpython-4894c306266b5fb3a6cf8429cbb6bc31d3e23e4e.tar.bz2
Fix a bug in the memory reallocation code of PyUnicode_TranslateCharmap().
charmaptranslate_makespace() allocated more memory than required for the next replacement but didn't remember that fact, so memory size was growing exponentially every time a replacement string is longer that one character. This fixes SF bug #828737.
-rw-r--r--Lib/test/test_codeccallbacks.py12
-rw-r--r--Objects/unicodeobject.c39
2 files changed, 32 insertions, 19 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index ae75229..289e838 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -690,6 +690,18 @@ class CodecCallbackTest(unittest.TestCase):
self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
+ def test_bug828737(self):
+ charmap = {
+ ord("&"): u"&amp;",
+ ord("<"): u"&lt;",
+ ord(">"): u"&gt;",
+ ord('"'): u"&quot;",
+ }
+
+ for n in (1, 10, 100, 1000):
+ text = u'abc<def>ghi'*n
+ text.translate(charmap)
+
def test_main():
test.test_support.run_unittest(CodecCallbackTest)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 03559da..e4fe531 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3222,19 +3222,19 @@ int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
if not reallocate and adjust various state variables.
Return 0 on success, -1 on error */
static
-int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsize,
+int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp,
int requiredsize)
{
- if (requiredsize > *outsize) {
+ int oldsize = PyUnicode_GET_SIZE(*outobj);
+ if (requiredsize > oldsize) {
/* remember old output position */
int outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
/* exponentially overallocate to minimize reallocations */
- if (requiredsize < 2 * *outsize)
- requiredsize = 2 * *outsize;
+ if (requiredsize < 2 * oldsize)
+ requiredsize = 2 * oldsize;
if (_PyUnicode_Resize(outobj, requiredsize) < 0)
return -1;
*outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
- *outsize = requiredsize;
}
return 0;
}
@@ -3245,14 +3245,15 @@ int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsiz
The called must decref result.
Return 0 on success, -1 on error. */
static
-int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
- PyObject **outobj, int *outsize, Py_UNICODE **outp, PyObject **res)
+int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp,
+ int insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp,
+ PyObject **res)
{
- if (charmaptranslate_lookup(c, mapping, res))
+ if (charmaptranslate_lookup(*curinp, mapping, res))
return -1;
if (*res==NULL) {
/* not found => default to 1:1 mapping */
- *(*outp)++ = (Py_UNICODE)c;
+ *(*outp)++ = *curinp;
}
else if (*res==Py_None)
;
@@ -3268,8 +3269,10 @@ int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
}
else if (repsize!=0) {
/* more than one character */
- int requiredsize = *outsize + repsize - 1;
- if (charmaptranslate_makespace(outobj, outp, outsize, requiredsize))
+ int requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) +
+ (insize - (*curinp-*startinp)) +
+ repsize - 1;
+ if (charmaptranslate_makespace(outobj, outp, requiredsize))
return -1;
memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
*outp += repsize;
@@ -3294,7 +3297,6 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
Py_UNICODE *str;
/* current output position */
int respos = 0;
- int ressize;
char *reason = "character maps to <undefined>";
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
@@ -3312,16 +3314,15 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
replacements, if we need more, we'll resize */
res = PyUnicode_FromUnicode(NULL, size);
if (res == NULL)
- goto onError;
+ goto onError;
if (size == 0)
return res;
str = PyUnicode_AS_UNICODE(res);
- ressize = size;
while (p<endp) {
/* try to encode it */
PyObject *x = NULL;
- if (charmaptranslate_output(*p, mapping, &res, &ressize, &str, &x)) {
+ if (charmaptranslate_output(startp, p, size, mapping, &res, &str, &x)) {
Py_XDECREF(x);
goto onError;
}
@@ -3340,7 +3341,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
/* find all untranslatable characters */
while (collend < endp) {
- if (charmaptranslate_lookup(*collend, mapping, &x))
+ if (charmaptranslate_lookup(*collend, mapping, &x))
goto onError;
Py_XDECREF(x);
if (x!=Py_None)
@@ -3379,7 +3380,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
char buffer[2+29+1+1];
char *cp;
sprintf(buffer, "&#%d;", (int)*p);
- if (charmaptranslate_makespace(&res, &str, &ressize,
+ if (charmaptranslate_makespace(&res, &str,
(str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
goto onError;
for (cp = buffer; *cp; ++cp)
@@ -3395,7 +3396,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
goto onError;
/* generate replacement */
repsize = PyUnicode_GET_SIZE(repunicode);
- if (charmaptranslate_makespace(&res, &str, &ressize,
+ if (charmaptranslate_makespace(&res, &str,
(str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
Py_DECREF(repunicode);
goto onError;
@@ -3409,7 +3410,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
}
/* Resize if we allocated to much */
respos = str-PyUnicode_AS_UNICODE(res);
- if (respos<ressize) {
+ if (respos<PyUnicode_GET_SIZE(res)) {
if (_PyUnicode_Resize(&res, respos) < 0)
goto onError;
}