diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2012-10-03 21:03:17 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2012-10-03 21:03:17 (GMT) |
commit | afffce489b369d0237d49fd512219619cfa92965 (patch) | |
tree | e047fd111ccb577cdfcae0720fdb59fb264567dc /Objects | |
parent | 75f108174aedf9ffc4433c7cb70ab197f0175802 (diff) | |
download | cpython-afffce489b369d0237d49fd512219619cfa92965.zip cpython-afffce489b369d0237d49fd512219619cfa92965.tar.gz cpython-afffce489b369d0237d49fd512219619cfa92965.tar.bz2 |
Unicode: resize_compact() and resize_inplace() fills also the Unicode strings
with invalid bytes in debug mode, as done by PyUnicode_New()
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 38 |
1 files changed, 33 insertions, 5 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 83f2a2a..09067e9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -640,6 +640,25 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind, } } +#ifdef Py_DEBUG +/* Fill the data of an Unicode string with invalid characters to detect bugs + earlier. + + _PyUnicode_CheckConsistency(str, 1) detects invalid characters, at least for + ASCII and UCS-4 strings. U+00FF is invalid in ASCII and U+FFFFFFFF is an + invalid character in Unicode 6.0. */ +static void +unicode_fill_invalid(PyObject *unicode, Py_ssize_t old_length) +{ + int kind = PyUnicode_KIND(unicode); + Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode); + Py_ssize_t length = _PyUnicode_LENGTH(unicode); + if (length <= old_length) + return; + memset(data + old_length * kind, 0xff, (length - old_length) * kind); +} +#endif + static PyObject* resize_compact(PyObject *unicode, Py_ssize_t length) { @@ -648,6 +667,10 @@ resize_compact(PyObject *unicode, Py_ssize_t length) Py_ssize_t new_size; int share_wstr; PyObject *new_unicode; +#ifdef Py_DEBUG + Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); +#endif + assert(unicode_modifiable(unicode)); assert(PyUnicode_IS_READY(unicode)); assert(PyUnicode_IS_COMPACT(unicode)); @@ -683,6 +706,9 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = length; } +#ifdef Py_DEBUG + unicode_fill_invalid(unicode, old_length); +#endif PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), length, 0); assert(_PyUnicode_CheckConsistency(unicode, 0)); @@ -701,6 +727,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) Py_ssize_t char_size; int share_wstr, share_utf8; void *data; +#ifdef Py_DEBUG + Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); +#endif data = _PyUnicode_DATA_ANY(unicode); char_size = PyUnicode_KIND(unicode); @@ -736,6 +765,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) } _PyUnicode_LENGTH(unicode) = length; PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); +#ifdef Py_DEBUG + unicode_fill_invalid(unicode, old_length); +#endif if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; @@ -1060,11 +1092,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) } } #ifdef Py_DEBUG - /* Fill the data with invalid characters to detect bugs earlier. - _PyUnicode_CheckConsistency(str, 1) detects invalid characters, - at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII - and U+FFFFFFFF is an invalid character in Unicode 6.0. */ - memset(data, 0xff, size * kind); + unicode_fill_invalid((PyObject*)unicode, 0); #endif assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0)); return obj; |