summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-10-03 21:03:17 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-10-03 21:03:17 (GMT)
commitafffce489b369d0237d49fd512219619cfa92965 (patch)
treee047fd111ccb577cdfcae0720fdb59fb264567dc /Objects
parent75f108174aedf9ffc4433c7cb70ab197f0175802 (diff)
downloadcpython-afffce489b369d0237d49fd512219619cfa92965.zip
cpython-afffce489b369d0237d49fd512219619cfa92965.tar.gz
cpython-afffce489b369d0237d49fd512219619cfa92965.tar.bz2
Unicode: resize_compact() and resize_inplace() fills also the Unicode strings
with invalid bytes in debug mode, as done by PyUnicode_New()
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c38
1 files changed, 33 insertions, 5 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 83f2a2a..09067e9 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -640,6 +640,25 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind,
}
}
+#ifdef Py_DEBUG
+/* Fill the data of an Unicode string with invalid characters to detect bugs
+ earlier.
+
+ _PyUnicode_CheckConsistency(str, 1) detects invalid characters, at least for
+ ASCII and UCS-4 strings. U+00FF is invalid in ASCII and U+FFFFFFFF is an
+ invalid character in Unicode 6.0. */
+static void
+unicode_fill_invalid(PyObject *unicode, Py_ssize_t old_length)
+{
+ int kind = PyUnicode_KIND(unicode);
+ Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode);
+ Py_ssize_t length = _PyUnicode_LENGTH(unicode);
+ if (length <= old_length)
+ return;
+ memset(data + old_length * kind, 0xff, (length - old_length) * kind);
+}
+#endif
+
static PyObject*
resize_compact(PyObject *unicode, Py_ssize_t length)
{
@@ -648,6 +667,10 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
Py_ssize_t new_size;
int share_wstr;
PyObject *new_unicode;
+#ifdef Py_DEBUG
+ Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
+
assert(unicode_modifiable(unicode));
assert(PyUnicode_IS_READY(unicode));
assert(PyUnicode_IS_COMPACT(unicode));
@@ -683,6 +706,9 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
if (!PyUnicode_IS_ASCII(unicode))
_PyUnicode_WSTR_LENGTH(unicode) = length;
}
+#ifdef Py_DEBUG
+ unicode_fill_invalid(unicode, old_length);
+#endif
PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
length, 0);
assert(_PyUnicode_CheckConsistency(unicode, 0));
@@ -701,6 +727,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
Py_ssize_t char_size;
int share_wstr, share_utf8;
void *data;
+#ifdef Py_DEBUG
+ Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
data = _PyUnicode_DATA_ANY(unicode);
char_size = PyUnicode_KIND(unicode);
@@ -736,6 +765,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
}
_PyUnicode_LENGTH(unicode) = length;
PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
+#ifdef Py_DEBUG
+ unicode_fill_invalid(unicode, old_length);
+#endif
if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) {
assert(_PyUnicode_CheckConsistency(unicode, 0));
return 0;
@@ -1060,11 +1092,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
}
}
#ifdef Py_DEBUG
- /* Fill the data with invalid characters to detect bugs earlier.
- _PyUnicode_CheckConsistency(str, 1) detects invalid characters,
- at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
- and U+FFFFFFFF is an invalid character in Unicode 6.0. */
- memset(data, 0xff, size * kind);
+ unicode_fill_invalid((PyObject*)unicode, 0);
#endif
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
return obj;