summaryrefslogtreecommitdiffstats
path: root/Include/unicodeobject.h
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-10-03 01:20:16 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-10-03 01:20:16 (GMT)
commit910337b42ebd845f87fc2fadcbf6e3061b5d4c97 (patch)
tree7a423b11b85113a71cfd363cd0791a64e3049c9b /Include/unicodeobject.h
parent4fae54cb0ee1f8b9959025fd3e75795e4c634a0f (diff)
downloadcpython-910337b42ebd845f87fc2fadcbf6e3061b5d4c97.zip
cpython-910337b42ebd845f87fc2fadcbf6e3061b5d4c97.tar.gz
cpython-910337b42ebd845f87fc2fadcbf6e3061b5d4c97.tar.bz2
Add _PyUnicode_CheckConsistency() macro to help debugging
* Document Unicode string states * Use _PyUnicode_CheckConsistency() to ensure that objects are always consistent.
Diffstat (limited to 'Include/unicodeobject.h')
-rw-r--r--Include/unicodeobject.h46
1 files changed, 46 insertions, 0 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 99dcdd8..ba73e56 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -206,6 +206,52 @@ extern "C" {
immediately follow the structure. utf8_length and wstr_length can be found
in the length field; the utf8 pointer is equal to the data pointer. */
typedef struct {
+ /* Unicode strings can be in 4 states:
+
+ - compact ascii:
+
+ * structure = PyASCIIObject
+ * kind = PyUnicode_1BYTE_KIND
+ * compact = 1
+ * ascii = 1
+ * ready = 1
+ * utf8 = data
+
+ - compact:
+
+ * structure = PyCompactUnicodeObject
+ * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
+ PyUnicode_4BYTE_KIND
+ * compact = 1
+ * ready = 1
+ * (ascii = 0)
+
+ - string created by the legacy API (not ready):
+
+ * structure = PyUnicodeObject
+ * kind = PyUnicode_WCHAR_KIND
+ * compact = 0
+ * ready = 0
+ * wstr is not NULL
+ * data.any is NULL
+ * utf8 is NULL
+ * interned = SSTATE_NOT_INTERNED
+ * (ascii = 0)
+
+ - string created by the legacy API, ready:
+
+ * structure = PyUnicodeObject structure
+ * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
+ PyUnicode_4BYTE_KIND
+ * compact = 0
+ * ready = 1
+ * data.any is not NULL
+ * (ascii = 0)
+
+ String created by the legacy API becomes ready when calling
+ PyUnicode_READY().
+
+ See also _PyUnicode_CheckConsistency(). */
PyObject_HEAD
Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */