summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@wyplay.com>2011-09-29 12:14:38 (GMT)
committerVictor Stinner <vstinner@wyplay.com>2011-09-29 12:14:38 (GMT)
commita0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd (patch)
tree8281535ac9844dbbb50bffc0ea3756cd80aee552
parentff1ef074ede8574ad3a346810e08a45ab171416d (diff)
downloadcpython-a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd.zip
cpython-a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd.tar.gz
cpython-a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd.tar.bz2
Add a note in PyUnicode_CopyCharacters() doc: it doesn't write null character
Cleanup also the code (avoid the goto).
-rw-r--r--Include/unicodeobject.h8
-rw-r--r--Objects/unicodeobject.c83
2 files changed, 47 insertions, 44 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 99f54c3..a8c3e8b 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -521,9 +521,9 @@ PyAPI_FUNC(int) _PyUnicode_Ready(
/* Copy character from one unicode object into another, this function performs
character conversion when necessary and falls back to memcpy if possible.
- Fail if 'to' is smaller than how_many or smaller than len(from)-from_start,
- or if kind(from[from_start:from_start+how_many]) > kind(to), or if to has
- more than 1 reference.
+ Fail if to is too small (smaller than how_many or smaller than
+ len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
+ kind(to), or if to has more than 1 reference.
Return the number of written character, or return -1 and raise an exception
on error.
@@ -533,6 +533,8 @@ PyAPI_FUNC(int) _PyUnicode_Ready(
how_many = min(how_many, len(from) - from_start)
to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
return how_many
+
+ Note: The function doesn't write a terminating null character.
*/
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 387974d..395f146 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -615,8 +615,8 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
PyObject *from, Py_ssize_t from_start,
Py_ssize_t how_many)
{
- unsigned int from_kind;
- unsigned int to_kind;
+ unsigned int from_kind, to_kind;
+ void *from_data, *to_data;
assert(PyUnicode_Check(from));
assert(PyUnicode_Check(to));
@@ -645,44 +645,20 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
_PyUnicode_DIRTY(to);
from_kind = PyUnicode_KIND(from);
+ from_data = PyUnicode_DATA(from);
to_kind = PyUnicode_KIND(to);
+ to_data = PyUnicode_DATA(to);
if (from_kind == to_kind) {
/* fast path */
- Py_MEMCPY((char*)PyUnicode_DATA(to)
+ Py_MEMCPY((char*)to_data
+ PyUnicode_KIND_SIZE(to_kind, to_start),
- (char*)PyUnicode_DATA(from)
+ (char*)from_data
+ PyUnicode_KIND_SIZE(from_kind, from_start),
PyUnicode_KIND_SIZE(to_kind, how_many));
- return how_many;
}
-
- if (from_kind > to_kind) {
- /* slow path to check for character overflow */
- const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
- void *from_data = PyUnicode_DATA(from);
- void *to_data = PyUnicode_DATA(to);
- Py_UCS4 ch, maxchar;
- Py_ssize_t i;
- int overflow;
-
- maxchar = 0;
- overflow = 0;
- for (i=0; i < how_many; i++) {
- ch = PyUnicode_READ(from_kind, from_data, from_start + i);
- if (ch > maxchar) {
- maxchar = ch;
- if (maxchar > to_maxchar) {
- overflow = 1;
- break;
- }
- }
- PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
- }
- if (!overflow)
- return how_many;
- }
- else if (from_kind == PyUnicode_1BYTE_KIND && to_kind == PyUnicode_2BYTE_KIND)
+ else if (from_kind == PyUnicode_1BYTE_KIND
+ && to_kind == PyUnicode_2BYTE_KIND)
{
_PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS2,
@@ -690,7 +666,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
PyUnicode_2BYTE_DATA(to) + to_start
);
- return how_many;
}
else if (from_kind == PyUnicode_1BYTE_KIND
&& to_kind == PyUnicode_4BYTE_KIND)
@@ -701,7 +676,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
PyUnicode_4BYTE_DATA(to) + to_start
);
- return how_many;
}
else if (from_kind == PyUnicode_2BYTE_KIND
&& to_kind == PyUnicode_4BYTE_KIND)
@@ -712,14 +686,41 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
PyUnicode_4BYTE_DATA(to) + to_start
);
- return how_many;
}
- PyErr_Format(PyExc_ValueError,
- "Cannot copy UCS%u characters "
- "into a string of UCS%u characters",
- 1 << (from_kind - 1),
- 1 << (to_kind -1));
- return -1;
+ else {
+ int invalid_kinds;
+ if (from_kind > to_kind) {
+ /* slow path to check for character overflow */
+ const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
+ Py_UCS4 ch, maxchar;
+ Py_ssize_t i;
+
+ maxchar = 0;
+ invalid_kinds = 0;
+ for (i=0; i < how_many; i++) {
+ ch = PyUnicode_READ(from_kind, from_data, from_start + i);
+ if (ch > maxchar) {
+ maxchar = ch;
+ if (maxchar > to_maxchar) {
+ invalid_kinds = 1;
+ break;
+ }
+ }
+ PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
+ }
+ }
+ else
+ invalid_kinds = 1;
+ if (invalid_kinds) {
+ PyErr_Format(PyExc_ValueError,
+ "Cannot copy UCS%u characters "
+ "into a string of UCS%u characters",
+ 1 << (from_kind - 1),
+ 1 << (to_kind -1));
+ return -1;
+ }
+ }
+ return how_many;
}
/* Find the maximum code point and count the number of surrogate pairs so a