From a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 29 Sep 2011 14:14:38 +0200 Subject: Add a note in PyUnicode_CopyCharacters() doc: it doesn't write null character Cleanup also the code (avoid the goto). --- Include/unicodeobject.h | 8 +++-- Objects/unicodeobject.c | 83 +++++++++++++++++++++++++------------------------ 2 files changed, 47 insertions(+), 44 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 99f54c3..a8c3e8b 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -521,9 +521,9 @@ PyAPI_FUNC(int) _PyUnicode_Ready( /* Copy character from one unicode object into another, this function performs character conversion when necessary and falls back to memcpy if possible. - Fail if 'to' is smaller than how_many or smaller than len(from)-from_start, - or if kind(from[from_start:from_start+how_many]) > kind(to), or if to has - more than 1 reference. + Fail if to is too small (smaller than how_many or smaller than + len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > + kind(to), or if to has more than 1 reference. Return the number of written character, or return -1 and raise an exception on error. @@ -533,6 +533,8 @@ PyAPI_FUNC(int) _PyUnicode_Ready( how_many = min(how_many, len(from) - from_start) to[to_start:to_start+how_many] = from[from_start:from_start+how_many] return how_many + + Note: The function doesn't write a terminating null character. */ #ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 387974d..395f146 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -615,8 +615,8 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many) { - unsigned int from_kind; - unsigned int to_kind; + unsigned int from_kind, to_kind; + void *from_data, *to_data; assert(PyUnicode_Check(from)); assert(PyUnicode_Check(to)); @@ -645,44 +645,20 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, _PyUnicode_DIRTY(to); from_kind = PyUnicode_KIND(from); + from_data = PyUnicode_DATA(from); to_kind = PyUnicode_KIND(to); + to_data = PyUnicode_DATA(to); if (from_kind == to_kind) { /* fast path */ - Py_MEMCPY((char*)PyUnicode_DATA(to) + Py_MEMCPY((char*)to_data + PyUnicode_KIND_SIZE(to_kind, to_start), - (char*)PyUnicode_DATA(from) + (char*)from_data + PyUnicode_KIND_SIZE(from_kind, from_start), PyUnicode_KIND_SIZE(to_kind, how_many)); - return how_many; } - - if (from_kind > to_kind) { - /* slow path to check for character overflow */ - const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); - void *from_data = PyUnicode_DATA(from); - void *to_data = PyUnicode_DATA(to); - Py_UCS4 ch, maxchar; - Py_ssize_t i; - int overflow; - - maxchar = 0; - overflow = 0; - for (i=0; i < how_many; i++) { - ch = PyUnicode_READ(from_kind, from_data, from_start + i); - if (ch > maxchar) { - maxchar = ch; - if (maxchar > to_maxchar) { - overflow = 1; - break; - } - } - PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); - } - if (!overflow) - return how_many; - } - else if (from_kind == PyUnicode_1BYTE_KIND && to_kind == PyUnicode_2BYTE_KIND) + else if (from_kind == PyUnicode_1BYTE_KIND + && to_kind == PyUnicode_2BYTE_KIND) { _PyUnicode_CONVERT_BYTES( Py_UCS1, Py_UCS2, @@ -690,7 +666,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, PyUnicode_1BYTE_DATA(from) + from_start + how_many, PyUnicode_2BYTE_DATA(to) + to_start ); - return how_many; } else if (from_kind == PyUnicode_1BYTE_KIND && to_kind == PyUnicode_4BYTE_KIND) @@ -701,7 +676,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, PyUnicode_1BYTE_DATA(from) + from_start + how_many, PyUnicode_4BYTE_DATA(to) + to_start ); - return how_many; } else if (from_kind == PyUnicode_2BYTE_KIND && to_kind == PyUnicode_4BYTE_KIND) @@ -712,14 +686,41 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, PyUnicode_2BYTE_DATA(from) + from_start + how_many, PyUnicode_4BYTE_DATA(to) + to_start ); - return how_many; } - PyErr_Format(PyExc_ValueError, - "Cannot copy UCS%u characters " - "into a string of UCS%u characters", - 1 << (from_kind - 1), - 1 << (to_kind -1)); - return -1; + else { + int invalid_kinds; + if (from_kind > to_kind) { + /* slow path to check for character overflow */ + const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); + Py_UCS4 ch, maxchar; + Py_ssize_t i; + + maxchar = 0; + invalid_kinds = 0; + for (i=0; i < how_many; i++) { + ch = PyUnicode_READ(from_kind, from_data, from_start + i); + if (ch > maxchar) { + maxchar = ch; + if (maxchar > to_maxchar) { + invalid_kinds = 1; + break; + } + } + PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); + } + } + else + invalid_kinds = 1; + if (invalid_kinds) { + PyErr_Format(PyExc_ValueError, + "Cannot copy UCS%u characters " + "into a string of UCS%u characters", + 1 << (from_kind - 1), + 1 << (to_kind -1)); + return -1; + } + } + return how_many; } /* Find the maximum code point and count the number of surrogate pairs so a -- cgit v0.12