diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-20 17:56:05 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-20 17:56:05 (GMT) |
commit | 77faf69ca1e024cd48c82c882bfcad34be05da63 (patch) | |
tree | 63bb76b978bb287516a1978617d4f6b53df3d6f9 | |
parent | d5c4022d2abc67ef72de326bb12023bb969e80a9 (diff) | |
download | cpython-77faf69ca1e024cd48c82c882bfcad34be05da63.zip cpython-77faf69ca1e024cd48c82c882bfcad34be05da63.tar.gz cpython-77faf69ca1e024cd48c82c882bfcad34be05da63.tar.bz2 |
_PyUnicode_CheckConsistency() also checks maxchar maximum value,
not only its minimum value
-rw-r--r-- | Include/unicodeobject.h | 13 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 12 |
2 files changed, 17 insertions, 8 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 71404f3..6a31e48 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -303,19 +303,22 @@ typedef struct { - PyUnicode_1BYTE_KIND (1): * character type = Py_UCS1 (8 bits, unsigned) - * if ascii is set, all characters must be in range - U+0000-U+007F, otherwise at least one character must be in range + * all characters are in the range U+0000-U+00FF (latin1) + * if ascii is set, all characters are in the range U+0000-U+007F + (ASCII), otherwise at least one character is in the range U+0080-U+00FF - PyUnicode_2BYTE_KIND (2): * character type = Py_UCS2 (16 bits, unsigned) - * at least one character must be in range U+0100-U+FFFF + * all characters are in the range U+0000-U+FFFF (BMP) + * at least one character is in the range U+0100-U+FFFF - PyUnicode_4BYTE_KIND (4): * character type = Py_UCS4 (32 bits, unsigned) - * at least one character must be in range U+10000-U+10FFFF + * all characters are in the range U+0000-U+10FFFF + * at least one character is in the range U+10000-U+10FFFF */ unsigned int kind:3; /* Compact is with respect to the allocation scheme. Compact unicode @@ -323,7 +326,7 @@ typedef struct { one block for the PyUnicodeObject struct and another for its data buffer. */ unsigned int compact:1; - /* The string only contains characters in range U+0000-U+007F (ASCII) + /* The string only contains characters in the range U+0000-U+007F (ASCII) and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is set, use the PyASCIIObject structure. */ unsigned int ascii:1; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 18780ea..9c1705d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -392,15 +392,21 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) maxchar = ch; } if (kind == PyUnicode_1BYTE_KIND) { - if (ascii->state.ascii == 0) + if (ascii->state.ascii == 0) { assert(maxchar >= 128); + assert(maxchar <= 255); + } else assert(maxchar < 128); } - else if (kind == PyUnicode_2BYTE_KIND) + else if (kind == PyUnicode_2BYTE_KIND) { assert(maxchar >= 0x100); - else + assert(maxchar <= 0xFFFF); + } + else { assert(maxchar >= 0x10000); + assert(maxchar <= 0x10FFFF); + } } if (check_content && !unicode_is_singleton(op)) assert(ascii->hash == -1); |