summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c101
1 files changed, 79 insertions, 22 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 76fb175..4b99ad8 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -76,6 +76,7 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
#ifdef MS_WIN32
#include <windows.h>
#endif
+
/* Limit for the Unicode object free list */
#define MAX_UNICODE_FREELIST_SIZE 1024
@@ -87,18 +88,17 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
limit. This reduces malloc() overhead for small Unicode objects.
At worst this will result in MAX_UNICODE_FREELIST_SIZE *
- (sizeof(PyUnicodeObject) + STAYALIVE_SIZE_LIMIT +
+ (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
malloc()-overhead) bytes of unused garbage.
Setting the limit to 0 effectively turns the feature off.
- XXX The feature is currently turned off because there are
- apparently some lingering bugs in its implementation which I
- haven't yet been able to sort out.
+ Note: This is an experimental feature ! If you get core dumps when
+ using Unicode objects, turn this feature off.
*/
-#define STAYALIVE_SIZE_LIMIT 0
+#define KEEPALIVE_SIZE_LIMIT 9
/* Endianness switches; defaults to little endian */
@@ -125,9 +125,9 @@ int _PyUnicode_Resize(register PyUnicodeObject *unicode,
{
void *oldstr;
- /* Shortcut if there's nothing to do. */
+ /* Shortcut if there's nothing much to do. */
if (unicode->length == length)
- return 0;
+ goto reset;
/* Resizing unicode_empty is not allowed. */
if (unicode == unicode_empty) {
@@ -148,6 +148,7 @@ int _PyUnicode_Resize(register PyUnicodeObject *unicode,
unicode->str[length] = 0;
unicode->length = length;
+ reset:
/* Reset the object caches */
if (unicode->utf8str) {
Py_DECREF(unicode->utf8str);
@@ -158,6 +159,23 @@ int _PyUnicode_Resize(register PyUnicodeObject *unicode,
return 0;
}
+int PyUnicode_Resize(PyObject **unicode,
+ int length)
+{
+ PyUnicodeObject *v;
+
+ if (unicode == NULL) {
+ PyErr_BadInternalCall();
+ return -1;
+ }
+ v = (PyUnicodeObject *)*unicode;
+ if (v == NULL || !PyUnicode_Check(v) || v->ob_refcnt != 1) {
+ PyErr_BadInternalCall();
+ return -1;
+ }
+ return _PyUnicode_Resize(v, length);
+}
+
/* We allocate one more byte to make sure the string is
Ux0000 terminated -- XXX is this needed ?
@@ -185,7 +203,9 @@ PyUnicodeObject *_PyUnicode_New(int length)
unicode->ob_type = &PyUnicode_Type;
_Py_NewReference((PyObject *)unicode);
if (unicode->str) {
- if (unicode->length < length &&
+ /* Keep-Alive optimization: we only upsize the buffer,
+ never downsize it. */
+ if ((unicode->length < length) &&
_PyUnicode_Resize(unicode, length)) {
free(unicode->str);
PyMem_DEL(unicode);
@@ -220,19 +240,25 @@ PyUnicodeObject *_PyUnicode_New(int length)
static
void _PyUnicode_Free(register PyUnicodeObject *unicode)
{
- Py_XDECREF(unicode->utf8str);
if (unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) {
- if (unicode->length >= STAYALIVE_SIZE_LIMIT) {
+ /* Keep-Alive optimization */
+ if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
free(unicode->str);
unicode->str = NULL;
unicode->length = 0;
}
+ if (unicode->utf8str) {
+ Py_DECREF(unicode->utf8str);
+ unicode->utf8str = NULL;
+ }
+ /* Add to free list */
*(PyUnicodeObject **)unicode = unicode_freelist;
unicode_freelist = unicode;
unicode_freelist_size++;
}
else {
free(unicode->str);
+ Py_XDECREF(unicode->utf8str);
PyMem_DEL(unicode);
}
}
@@ -665,7 +691,8 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
}
}
*p = '\0';
- _PyString_Resize(&v, p - q);
+ if (_PyString_Resize(&v, p - q))
+ goto onError;
done:
return v;
@@ -1047,7 +1074,8 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
break;
}
}
- _PyUnicode_Resize(v, (int)(p - buf));
+ if (_PyUnicode_Resize(v, (int)(p - buf)))
+ goto onError;
return (PyObject *)v;
onError:
@@ -1119,9 +1147,14 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
*p++ = q[1];
*p = '\0';
- _PyString_Resize(&repr, p - q);
+ if (_PyString_Resize(&repr, p - q))
+ goto onError;
return repr;
+
+ onError:
+ Py_DECREF(repr);
+ return NULL;
}
PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
@@ -1209,7 +1242,8 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
s += i;
*p++ = x;
}
- _PyUnicode_Resize(v, (int)(p - buf));
+ if (_PyUnicode_Resize(v, (int)(p - buf)))
+ goto onError;
return (PyObject *)v;
onError:
@@ -1247,9 +1281,14 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
*p++ = (char) ch;
}
*p = '\0';
- _PyString_Resize(&repr, p - q);
+ if (_PyString_Resize(&repr, p - q))
+ goto onError;
return repr;
+
+ onError:
+ Py_DECREF(repr);
+ return NULL;
}
PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
@@ -1305,6 +1344,7 @@ int latin1_encoding_error(const Py_UNICODE **source,
}
else if (strcmp(errors,"replace") == 0) {
**dest = '?';
+ (*dest)++;
return 0;
}
else {
@@ -1321,12 +1361,13 @@ PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
const char *errors)
{
PyObject *repr;
- char *s;
+ char *s, *start;
repr = PyString_FromStringAndSize(NULL, size);
if (repr == NULL)
return NULL;
s = PyString_AS_STRING(repr);
+ start = s;
while (size-- > 0) {
Py_UNICODE ch = *p++;
if (ch >= 256) {
@@ -1337,6 +1378,10 @@ PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
else
*s++ = (char)ch;
}
+ /* Resize if error handling skipped some characters */
+ if (s - start < PyString_GET_SIZE(repr))
+ if (_PyString_Resize(&repr, s - start))
+ goto onError;
return repr;
onError:
@@ -1411,8 +1456,9 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
"ordinal not in range(128)"))
goto onError;
}
- if (p - PyUnicode_AS_UNICODE(v) < size)
- _PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v)));
+ if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
+ if (_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v))))
+ goto onError;
return (PyObject *)v;
onError:
@@ -1438,6 +1484,7 @@ int ascii_encoding_error(const Py_UNICODE **source,
}
else if (strcmp(errors,"replace") == 0) {
**dest = '?';
+ (*dest)++;
return 0;
}
else {
@@ -1454,12 +1501,13 @@ PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
const char *errors)
{
PyObject *repr;
- char *s;
+ char *s, *start;
repr = PyString_FromStringAndSize(NULL, size);
if (repr == NULL)
return NULL;
s = PyString_AS_STRING(repr);
+ start = s;
while (size-- > 0) {
Py_UNICODE ch = *p++;
if (ch >= 128) {
@@ -1470,6 +1518,10 @@ PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
else
*s++ = (char)ch;
}
+ /* Resize if error handling skipped some characters */
+ if (s - start < PyString_GET_SIZE(repr))
+ if (_PyString_Resize(&repr, s - start))
+ goto onError;
return repr;
onError:
@@ -1898,7 +1950,8 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *s,
Py_DECREF(x);
}
if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
- _PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v)));
+ if (_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v))))
+ goto onError;
done:
return (PyObject *)v;
@@ -1959,7 +2012,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
continue;
}
if (0 < ch && ch < 256) {
- *output++ = (char) ch;
+ *output++ = ch;
continue;
}
/* All other characters are considered invalid */
@@ -4539,7 +4592,8 @@ PyObject *PyUnicode_Format(PyObject *format,
Py_DECREF(args);
}
Py_DECREF(uformat);
- _PyUnicode_Resize(result, reslen - rescnt);
+ if (_PyUnicode_Resize(result, reslen - rescnt))
+ goto onError;
return (PyObject *)result;
onError:
@@ -4605,6 +4659,9 @@ _PyUnicode_Fini()
while (u != NULL) {
PyUnicodeObject *v = u;
u = *(PyUnicodeObject **)u;
+ if (v->str)
+ free(v->str);
+ Py_XDECREF(v->utf8str);
free(v);
}
Py_XDECREF(unicode_empty);