diff options
-rw-r--r-- | Objects/unicodeobject.c | 101 |
1 files changed, 79 insertions, 22 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 76fb175..4b99ad8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -76,6 +76,7 @@ Unicode Integration Proposal (see file Misc/unicode.txt). #ifdef MS_WIN32 #include <windows.h> #endif + /* Limit for the Unicode object free list */ #define MAX_UNICODE_FREELIST_SIZE 1024 @@ -87,18 +88,17 @@ Unicode Integration Proposal (see file Misc/unicode.txt). limit. This reduces malloc() overhead for small Unicode objects. At worst this will result in MAX_UNICODE_FREELIST_SIZE * - (sizeof(PyUnicodeObject) + STAYALIVE_SIZE_LIMIT + + (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT + malloc()-overhead) bytes of unused garbage. Setting the limit to 0 effectively turns the feature off. - XXX The feature is currently turned off because there are - apparently some lingering bugs in its implementation which I - haven't yet been able to sort out. + Note: This is an experimental feature ! If you get core dumps when + using Unicode objects, turn this feature off. */ -#define STAYALIVE_SIZE_LIMIT 0 +#define KEEPALIVE_SIZE_LIMIT 9 /* Endianness switches; defaults to little endian */ @@ -125,9 +125,9 @@ int _PyUnicode_Resize(register PyUnicodeObject *unicode, { void *oldstr; - /* Shortcut if there's nothing to do. */ + /* Shortcut if there's nothing much to do. */ if (unicode->length == length) - return 0; + goto reset; /* Resizing unicode_empty is not allowed. */ if (unicode == unicode_empty) { @@ -148,6 +148,7 @@ int _PyUnicode_Resize(register PyUnicodeObject *unicode, unicode->str[length] = 0; unicode->length = length; + reset: /* Reset the object caches */ if (unicode->utf8str) { Py_DECREF(unicode->utf8str); @@ -158,6 +159,23 @@ int _PyUnicode_Resize(register PyUnicodeObject *unicode, return 0; } +int PyUnicode_Resize(PyObject **unicode, + int length) +{ + PyUnicodeObject *v; + + if (unicode == NULL) { + PyErr_BadInternalCall(); + return -1; + } + v = (PyUnicodeObject *)*unicode; + if (v == NULL || !PyUnicode_Check(v) || v->ob_refcnt != 1) { + PyErr_BadInternalCall(); + return -1; + } + return _PyUnicode_Resize(v, length); +} + /* We allocate one more byte to make sure the string is Ux0000 terminated -- XXX is this needed ? @@ -185,7 +203,9 @@ PyUnicodeObject *_PyUnicode_New(int length) unicode->ob_type = &PyUnicode_Type; _Py_NewReference((PyObject *)unicode); if (unicode->str) { - if (unicode->length < length && + /* Keep-Alive optimization: we only upsize the buffer, + never downsize it. */ + if ((unicode->length < length) && _PyUnicode_Resize(unicode, length)) { free(unicode->str); PyMem_DEL(unicode); @@ -220,19 +240,25 @@ PyUnicodeObject *_PyUnicode_New(int length) static void _PyUnicode_Free(register PyUnicodeObject *unicode) { - Py_XDECREF(unicode->utf8str); if (unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) { - if (unicode->length >= STAYALIVE_SIZE_LIMIT) { + /* Keep-Alive optimization */ + if (unicode->length >= KEEPALIVE_SIZE_LIMIT) { free(unicode->str); unicode->str = NULL; unicode->length = 0; } + if (unicode->utf8str) { + Py_DECREF(unicode->utf8str); + unicode->utf8str = NULL; + } + /* Add to free list */ *(PyUnicodeObject **)unicode = unicode_freelist; unicode_freelist = unicode; unicode_freelist_size++; } else { free(unicode->str); + Py_XDECREF(unicode->utf8str); PyMem_DEL(unicode); } } @@ -665,7 +691,8 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s, } } *p = '\0'; - _PyString_Resize(&v, p - q); + if (_PyString_Resize(&v, p - q)) + goto onError; done: return v; @@ -1047,7 +1074,8 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, break; } } - _PyUnicode_Resize(v, (int)(p - buf)); + if (_PyUnicode_Resize(v, (int)(p - buf))) + goto onError; return (PyObject *)v; onError: @@ -1119,9 +1147,14 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, *p++ = q[1]; *p = '\0'; - _PyString_Resize(&repr, p - q); + if (_PyString_Resize(&repr, p - q)) + goto onError; return repr; + + onError: + Py_DECREF(repr); + return NULL; } PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, @@ -1209,7 +1242,8 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, s += i; *p++ = x; } - _PyUnicode_Resize(v, (int)(p - buf)); + if (_PyUnicode_Resize(v, (int)(p - buf))) + goto onError; return (PyObject *)v; onError: @@ -1247,9 +1281,14 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, *p++ = (char) ch; } *p = '\0'; - _PyString_Resize(&repr, p - q); + if (_PyString_Resize(&repr, p - q)) + goto onError; return repr; + + onError: + Py_DECREF(repr); + return NULL; } PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) @@ -1305,6 +1344,7 @@ int latin1_encoding_error(const Py_UNICODE **source, } else if (strcmp(errors,"replace") == 0) { **dest = '?'; + (*dest)++; return 0; } else { @@ -1321,12 +1361,13 @@ PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p, const char *errors) { PyObject *repr; - char *s; + char *s, *start; repr = PyString_FromStringAndSize(NULL, size); if (repr == NULL) return NULL; s = PyString_AS_STRING(repr); + start = s; while (size-- > 0) { Py_UNICODE ch = *p++; if (ch >= 256) { @@ -1337,6 +1378,10 @@ PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p, else *s++ = (char)ch; } + /* Resize if error handling skipped some characters */ + if (s - start < PyString_GET_SIZE(repr)) + if (_PyString_Resize(&repr, s - start)) + goto onError; return repr; onError: @@ -1411,8 +1456,9 @@ PyObject *PyUnicode_DecodeASCII(const char *s, "ordinal not in range(128)")) goto onError; } - if (p - PyUnicode_AS_UNICODE(v) < size) - _PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v))); + if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v)) + if (_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v)))) + goto onError; return (PyObject *)v; onError: @@ -1438,6 +1484,7 @@ int ascii_encoding_error(const Py_UNICODE **source, } else if (strcmp(errors,"replace") == 0) { **dest = '?'; + (*dest)++; return 0; } else { @@ -1454,12 +1501,13 @@ PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p, const char *errors) { PyObject *repr; - char *s; + char *s, *start; repr = PyString_FromStringAndSize(NULL, size); if (repr == NULL) return NULL; s = PyString_AS_STRING(repr); + start = s; while (size-- > 0) { Py_UNICODE ch = *p++; if (ch >= 128) { @@ -1470,6 +1518,10 @@ PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p, else *s++ = (char)ch; } + /* Resize if error handling skipped some characters */ + if (s - start < PyString_GET_SIZE(repr)) + if (_PyString_Resize(&repr, s - start)) + goto onError; return repr; onError: @@ -1898,7 +1950,8 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *s, Py_DECREF(x); } if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v)) - _PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v))); + if (_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v)))) + goto onError; done: return (PyObject *)v; @@ -1959,7 +2012,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, continue; } if (0 < ch && ch < 256) { - *output++ = (char) ch; + *output++ = ch; continue; } /* All other characters are considered invalid */ @@ -4539,7 +4592,8 @@ PyObject *PyUnicode_Format(PyObject *format, Py_DECREF(args); } Py_DECREF(uformat); - _PyUnicode_Resize(result, reslen - rescnt); + if (_PyUnicode_Resize(result, reslen - rescnt)) + goto onError; return (PyObject *)result; onError: @@ -4605,6 +4659,9 @@ _PyUnicode_Fini() while (u != NULL) { PyUnicodeObject *v = u; u = *(PyUnicodeObject **)u; + if (v->str) + free(v->str); + Py_XDECREF(v->utf8str); free(v); } Py_XDECREF(unicode_empty); |