summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/unicodeobject.h19
-rw-r--r--Objects/unicodeobject.c84
2 files changed, 56 insertions, 47 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 8e19ebc..3dee11f 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -226,9 +226,11 @@ typedef struct {
* ready = 1
* ascii = 0
* utf8 != data
- * wstr is shared with data if kind=PyUnicode_2BYTE_KIND
- and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and
- sizeof(wchar_4)=4
+ * utf8_length = 0 if utf8 is NULL
+ * wstr is shared with data and wstr_length=length
+ if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+ or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
+ * wstr_length = 0 if wstr is NULL
- legacy string, not ready:
@@ -239,6 +241,7 @@ typedef struct {
* wstr is not NULL
* data.any is NULL
* utf8 is NULL
+ * utf8_length = 0
* interned = SSTATE_NOT_INTERNED
* ascii = 0
@@ -250,10 +253,12 @@ typedef struct {
* compact = 0
* ready = 1
* data.any is not NULL
- * utf8 is shared with data.any if ascii = 1
- * wstr is shared with data.any if kind=PyUnicode_2BYTE_KIND
- and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and
- sizeof(wchar_4)=4
+ * utf8 is shared and utf8_length = length with data.any if ascii = 1
+ * utf8_length = 0 if utf8 is NULL
+ * wstr is shared and wstr_length = length with data.any
+ if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+ or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
+ * wstr_length = 0 if wstr is NULL
Compact strings use only one memory block (structure + characters),
whereas legacy strings use one block for the structure and one block
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 42d061a..84c8dca 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -300,50 +300,47 @@ _PyUnicode_CheckConsistency(void *op)
assert(kind == PyUnicode_1BYTE_KIND);
assert(ascii->state.ready == 1);
}
- else if (ascii->state.compact == 1) {
+ else {
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
void *data;
- assert(kind == PyUnicode_1BYTE_KIND
- || kind == PyUnicode_2BYTE_KIND
- || kind == PyUnicode_4BYTE_KIND);
- assert(ascii->state.ascii == 0);
- assert(ascii->state.ready == 1);
- data = compact + 1;
- assert (compact->utf8 != data);
- if (
-#if SIZEOF_WCHAR_T == 2
- kind == PyUnicode_2BYTE_KIND
-#else
- kind == PyUnicode_4BYTE_KIND
-#endif
- )
- assert(ascii->wstr == data);
- else
- assert(ascii->wstr != data);
- } else {
- PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
- PyUnicodeObject *unicode = (PyUnicodeObject *)op;
- if (kind == PyUnicode_WCHAR_KIND) {
- assert(ascii->state.compact == 0);
- assert(ascii->state.ascii == 0);
- assert(ascii->state.ready == 0);
- assert(ascii->wstr != NULL);
- assert(unicode->data.any == NULL);
- assert(compact->utf8 == NULL);
- assert(ascii->state.interned == SSTATE_NOT_INTERNED);
- }
- else {
+ if (ascii->state.compact == 1) {
+ data = compact + 1;
assert(kind == PyUnicode_1BYTE_KIND
|| kind == PyUnicode_2BYTE_KIND
|| kind == PyUnicode_4BYTE_KIND);
- assert(ascii->state.compact == 0);
+ assert(ascii->state.ascii == 0);
assert(ascii->state.ready == 1);
- assert(unicode->data.any != NULL);
- if (ascii->state.ascii)
- assert (compact->utf8 == unicode->data.any);
- else
- assert (compact->utf8 != unicode->data.any);
+ assert (compact->utf8 != data);
+ } else {
+ PyUnicodeObject *unicode = (PyUnicodeObject *)op;
+
+ data = unicode->data.any;
+ if (kind == PyUnicode_WCHAR_KIND) {
+ assert(ascii->state.compact == 0);
+ assert(ascii->state.ascii == 0);
+ assert(ascii->state.ready == 0);
+ assert(ascii->wstr != NULL);
+ assert(data == NULL);
+ assert(compact->utf8 == NULL);
+ assert(ascii->state.interned == SSTATE_NOT_INTERNED);
+ }
+ else {
+ assert(kind == PyUnicode_1BYTE_KIND
+ || kind == PyUnicode_2BYTE_KIND
+ || kind == PyUnicode_4BYTE_KIND);
+ assert(ascii->state.compact == 0);
+ assert(ascii->state.ready == 1);
+ assert(data != NULL);
+ if (ascii->state.ascii) {
+ assert (compact->utf8 == data);
+ assert (compact->utf8_length == ascii->length);
+ }
+ else
+ assert (compact->utf8 != data);
+ }
+ }
+ if (kind != PyUnicode_WCHAR_KIND) {
if (
#if SIZEOF_WCHAR_T == 2
kind == PyUnicode_2BYTE_KIND
@@ -351,10 +348,17 @@ _PyUnicode_CheckConsistency(void *op)
kind == PyUnicode_4BYTE_KIND
#endif
)
- assert(ascii->wstr == unicode->data.any);
- else
- assert(ascii->wstr != unicode->data.any);
+ {
+ assert(ascii->wstr == data);
+ assert(compact->wstr_length == ascii->length);
+ } else
+ assert(ascii->wstr != data);
}
+
+ if (compact->utf8 == NULL)
+ assert(compact->utf8_length == 0);
+ if (ascii->wstr == NULL)
+ assert(compact->wstr_length == 0);
}
return 1;
}