diff options
author | Victor Stinner <vstinner@wyplay.com> | 2011-09-29 12:14:38 (GMT) |
---|---|---|
committer | Victor Stinner <vstinner@wyplay.com> | 2011-09-29 12:14:38 (GMT) |
commit | a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd (patch) | |
tree | 8281535ac9844dbbb50bffc0ea3756cd80aee552 | |
parent | ff1ef074ede8574ad3a346810e08a45ab171416d (diff) | |
download | cpython-a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd.zip cpython-a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd.tar.gz cpython-a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd.tar.bz2 |
Add a note in PyUnicode_CopyCharacters() doc: it doesn't write null character
Cleanup also the code (avoid the goto).
-rw-r--r-- | Include/unicodeobject.h | 8 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 83 |
2 files changed, 47 insertions, 44 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 99f54c3..a8c3e8b 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -521,9 +521,9 @@ PyAPI_FUNC(int) _PyUnicode_Ready( /* Copy character from one unicode object into another, this function performs character conversion when necessary and falls back to memcpy if possible. - Fail if 'to' is smaller than how_many or smaller than len(from)-from_start, - or if kind(from[from_start:from_start+how_many]) > kind(to), or if to has - more than 1 reference. + Fail if to is too small (smaller than how_many or smaller than + len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > + kind(to), or if to has more than 1 reference. Return the number of written character, or return -1 and raise an exception on error. @@ -533,6 +533,8 @@ PyAPI_FUNC(int) _PyUnicode_Ready( how_many = min(how_many, len(from) - from_start) to[to_start:to_start+how_many] = from[from_start:from_start+how_many] return how_many + + Note: The function doesn't write a terminating null character. */ #ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 387974d..395f146 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -615,8 +615,8 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many) { - unsigned int from_kind; - unsigned int to_kind; + unsigned int from_kind, to_kind; + void *from_data, *to_data; assert(PyUnicode_Check(from)); assert(PyUnicode_Check(to)); @@ -645,44 +645,20 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, _PyUnicode_DIRTY(to); from_kind = PyUnicode_KIND(from); + from_data = PyUnicode_DATA(from); to_kind = PyUnicode_KIND(to); + to_data = PyUnicode_DATA(to); if (from_kind == to_kind) { /* fast path */ - Py_MEMCPY((char*)PyUnicode_DATA(to) + Py_MEMCPY((char*)to_data + PyUnicode_KIND_SIZE(to_kind, to_start), - (char*)PyUnicode_DATA(from) + (char*)from_data + PyUnicode_KIND_SIZE(from_kind, from_start), PyUnicode_KIND_SIZE(to_kind, how_many)); - return how_many; } - - if (from_kind > to_kind) { - /* slow path to check for character overflow */ - const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); - void *from_data = PyUnicode_DATA(from); - void *to_data = PyUnicode_DATA(to); - Py_UCS4 ch, maxchar; - Py_ssize_t i; - int overflow; - - maxchar = 0; - overflow = 0; - for (i=0; i < how_many; i++) { - ch = PyUnicode_READ(from_kind, from_data, from_start + i); - if (ch > maxchar) { - maxchar = ch; - if (maxchar > to_maxchar) { - overflow = 1; - break; - } - } - PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); - } - if (!overflow) - return how_many; - } - else if (from_kind == PyUnicode_1BYTE_KIND && to_kind == PyUnicode_2BYTE_KIND) + else if (from_kind == PyUnicode_1BYTE_KIND + && to_kind == PyUnicode_2BYTE_KIND) { _PyUnicode_CONVERT_BYTES( Py_UCS1, Py_UCS2, @@ -690,7 +666,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, PyUnicode_1BYTE_DATA(from) + from_start + how_many, PyUnicode_2BYTE_DATA(to) + to_start ); - return how_many; } else if (from_kind == PyUnicode_1BYTE_KIND && to_kind == PyUnicode_4BYTE_KIND) @@ -701,7 +676,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, PyUnicode_1BYTE_DATA(from) + from_start + how_many, PyUnicode_4BYTE_DATA(to) + to_start ); - return how_many; } else if (from_kind == PyUnicode_2BYTE_KIND && to_kind == PyUnicode_4BYTE_KIND) @@ -712,14 +686,41 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, PyUnicode_2BYTE_DATA(from) + from_start + how_many, PyUnicode_4BYTE_DATA(to) + to_start ); - return how_many; } - PyErr_Format(PyExc_ValueError, - "Cannot copy UCS%u characters " - "into a string of UCS%u characters", - 1 << (from_kind - 1), - 1 << (to_kind -1)); - return -1; + else { + int invalid_kinds; + if (from_kind > to_kind) { + /* slow path to check for character overflow */ + const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); + Py_UCS4 ch, maxchar; + Py_ssize_t i; + + maxchar = 0; + invalid_kinds = 0; + for (i=0; i < how_many; i++) { + ch = PyUnicode_READ(from_kind, from_data, from_start + i); + if (ch > maxchar) { + maxchar = ch; + if (maxchar > to_maxchar) { + invalid_kinds = 1; + break; + } + } + PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); + } + } + else + invalid_kinds = 1; + if (invalid_kinds) { + PyErr_Format(PyExc_ValueError, + "Cannot copy UCS%u characters " + "into a string of UCS%u characters", + 1 << (from_kind - 1), + 1 << (to_kind -1)); + return -1; + } + } + return how_many; } /* Find the maximum code point and count the number of surrogate pairs so a |