diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-10-03 23:05:08 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-10-03 23:05:08 (GMT) |
commit | a41463c2037e7130283f16fef9747aac4de94b13 (patch) | |
tree | ab640f064d76426c15b06a933ab78aacdc65b670 | |
parent | 95663110143ebf254a251b747d16589226151727 (diff) | |
download | cpython-a41463c2037e7130283f16fef9747aac4de94b13.zip cpython-a41463c2037e7130283f16fef9747aac4de94b13.tar.gz cpython-a41463c2037e7130283f16fef9747aac4de94b13.tar.bz2 |
Document utf8_length and wstr_length states
Ensure these states with assertions in _PyUnicode_CheckConsistency().
-rw-r--r-- | Include/unicodeobject.h | 19 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 84 |
2 files changed, 56 insertions, 47 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 8e19ebc..3dee11f 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -226,9 +226,11 @@ typedef struct { * ready = 1 * ascii = 0 * utf8 != data - * wstr is shared with data if kind=PyUnicode_2BYTE_KIND - and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and - sizeof(wchar_4)=4 + * utf8_length = 0 if utf8 is NULL + * wstr is shared with data and wstr_length=length + if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 + or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 + * wstr_length = 0 if wstr is NULL - legacy string, not ready: @@ -239,6 +241,7 @@ typedef struct { * wstr is not NULL * data.any is NULL * utf8 is NULL + * utf8_length = 0 * interned = SSTATE_NOT_INTERNED * ascii = 0 @@ -250,10 +253,12 @@ typedef struct { * compact = 0 * ready = 1 * data.any is not NULL - * utf8 is shared with data.any if ascii = 1 - * wstr is shared with data.any if kind=PyUnicode_2BYTE_KIND - and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and - sizeof(wchar_4)=4 + * utf8 is shared and utf8_length = length with data.any if ascii = 1 + * utf8_length = 0 if utf8 is NULL + * wstr is shared and wstr_length = length with data.any + if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 + or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 + * wstr_length = 0 if wstr is NULL Compact strings use only one memory block (structure + characters), whereas legacy strings use one block for the structure and one block diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 42d061a..84c8dca 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -300,50 +300,47 @@ _PyUnicode_CheckConsistency(void *op) assert(kind == PyUnicode_1BYTE_KIND); assert(ascii->state.ready == 1); } - else if (ascii->state.compact == 1) { + else { PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; void *data; - assert(kind == PyUnicode_1BYTE_KIND - || kind == PyUnicode_2BYTE_KIND - || kind == PyUnicode_4BYTE_KIND); - assert(ascii->state.ascii == 0); - assert(ascii->state.ready == 1); - data = compact + 1; - assert (compact->utf8 != data); - if ( -#if SIZEOF_WCHAR_T == 2 - kind == PyUnicode_2BYTE_KIND -#else - kind == PyUnicode_4BYTE_KIND -#endif - ) - assert(ascii->wstr == data); - else - assert(ascii->wstr != data); - } else { - PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; - PyUnicodeObject *unicode = (PyUnicodeObject *)op; - if (kind == PyUnicode_WCHAR_KIND) { - assert(ascii->state.compact == 0); - assert(ascii->state.ascii == 0); - assert(ascii->state.ready == 0); - assert(ascii->wstr != NULL); - assert(unicode->data.any == NULL); - assert(compact->utf8 == NULL); - assert(ascii->state.interned == SSTATE_NOT_INTERNED); - } - else { + if (ascii->state.compact == 1) { + data = compact + 1; assert(kind == PyUnicode_1BYTE_KIND || kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_4BYTE_KIND); - assert(ascii->state.compact == 0); + assert(ascii->state.ascii == 0); assert(ascii->state.ready == 1); - assert(unicode->data.any != NULL); - if (ascii->state.ascii) - assert (compact->utf8 == unicode->data.any); - else - assert (compact->utf8 != unicode->data.any); + assert (compact->utf8 != data); + } else { + PyUnicodeObject *unicode = (PyUnicodeObject *)op; + + data = unicode->data.any; + if (kind == PyUnicode_WCHAR_KIND) { + assert(ascii->state.compact == 0); + assert(ascii->state.ascii == 0); + assert(ascii->state.ready == 0); + assert(ascii->wstr != NULL); + assert(data == NULL); + assert(compact->utf8 == NULL); + assert(ascii->state.interned == SSTATE_NOT_INTERNED); + } + else { + assert(kind == PyUnicode_1BYTE_KIND + || kind == PyUnicode_2BYTE_KIND + || kind == PyUnicode_4BYTE_KIND); + assert(ascii->state.compact == 0); + assert(ascii->state.ready == 1); + assert(data != NULL); + if (ascii->state.ascii) { + assert (compact->utf8 == data); + assert (compact->utf8_length == ascii->length); + } + else + assert (compact->utf8 != data); + } + } + if (kind != PyUnicode_WCHAR_KIND) { if ( #if SIZEOF_WCHAR_T == 2 kind == PyUnicode_2BYTE_KIND @@ -351,10 +348,17 @@ _PyUnicode_CheckConsistency(void *op) kind == PyUnicode_4BYTE_KIND #endif ) - assert(ascii->wstr == unicode->data.any); - else - assert(ascii->wstr != unicode->data.any); + { + assert(ascii->wstr == data); + assert(compact->wstr_length == ascii->length); + } else + assert(ascii->wstr != data); } + + if (compact->utf8 == NULL) + assert(compact->utf8_length == 0); + if (ascii->wstr == NULL) + assert(compact->wstr_length == 0); } return 1; } |