summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-06-16 00:22:37 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-06-16 00:22:37 (GMT)
commitc9d369f1bf78c48083679b2afa5f31d8378ea94d (patch)
tree610388d08918fbb6fdafa6e5f7fa7123d0ec8a19
parentf05e17ece9ee4cf4d04e0657e6c7c9283a233968 (diff)
downloadcpython-c9d369f1bf78c48083679b2afa5f31d8378ea94d.zip
cpython-c9d369f1bf78c48083679b2afa5f31d8378ea94d.tar.gz
cpython-c9d369f1bf78c48083679b2afa5f31d8378ea94d.tar.bz2
Optimize _PyUnicode_FastCopyCharacters() when maxchar(from) > maxchar(to)
-rw-r--r--Objects/unicodeobject.c130
-rw-r--r--Python/formatter_unicode.c6
2 files changed, 79 insertions, 57 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 9c1a5f0..80a583c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1128,7 +1128,6 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
{
unsigned int from_kind, to_kind;
void *from_data, *to_data;
- int fast;
assert(0 <= how_many);
assert(0 <= from_start);
@@ -1137,41 +1136,40 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
assert(PyUnicode_IS_READY(from));
assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
- if (how_many == 0)
- return 0;
-
assert(PyUnicode_Check(to));
assert(PyUnicode_IS_READY(to));
assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
+ if (how_many == 0)
+ return 0;
+
from_kind = PyUnicode_KIND(from);
from_data = PyUnicode_DATA(from);
to_kind = PyUnicode_KIND(to);
to_data = PyUnicode_DATA(to);
-#ifdef Py_DEBUG
- if (!check_maxchar
- && (from_kind > to_kind
- || (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))))
- {
- const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
- Py_UCS4 ch;
- Py_ssize_t i;
- for (i=0; i < how_many; i++) {
- ch = PyUnicode_READ(from_kind, from_data, from_start + i);
- assert(ch <= to_maxchar);
- }
- }
+ if (from_kind == to_kind) {
+ if (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)) {
+ /* Writing Latin-1 characters into an ASCII string requires to
+ check that all written characters are pure ASCII */
+#ifndef Py_DEBUG
+ if (check_maxchar) {
+ Py_UCS4 max_char;
+ max_char = ucs1lib_find_max_char(from_data,
+ (char*)from_data + how_many);
+ if (max_char >= 128)
+ return -1;
+ }
+#else
+ const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
+ Py_UCS4 ch;
+ Py_ssize_t i;
+ for (i=0; i < how_many; i++) {
+ ch = PyUnicode_READ(from_kind, from_data, from_start + i);
+ assert(ch <= to_maxchar);
+ }
#endif
- fast = (from_kind == to_kind);
- if (check_maxchar
- && (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))
- {
- /* deny latin1 => ascii */
- fast = 0;
- }
-
- if (fast) {
+ }
Py_MEMCPY((char*)to_data + to_kind * to_start,
(char*)from_data + from_kind * from_start,
to_kind * how_many);
@@ -1207,42 +1205,62 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
);
}
else {
- /* check if max_char(from substring) <= max_char(to) */
- if (from_kind > to_kind
- /* latin1 => ascii */
- || (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))
+ assert (PyUnicode_MAX_CHAR_VALUE(from) > PyUnicode_MAX_CHAR_VALUE(to));
+
+#ifndef Py_DEBUG
+ if (!check_maxchar) {
+ if (from_kind == PyUnicode_2BYTE_KIND
+ && to_kind == PyUnicode_1BYTE_KIND)
+ {
+ _PyUnicode_CONVERT_BYTES(
+ Py_UCS2, Py_UCS1,
+ PyUnicode_2BYTE_DATA(from) + from_start,
+ PyUnicode_2BYTE_DATA(from) + from_start + how_many,
+ PyUnicode_1BYTE_DATA(to) + to_start
+ );
+ }
+ else if (from_kind == PyUnicode_4BYTE_KIND
+ && to_kind == PyUnicode_1BYTE_KIND)
+ {
+ _PyUnicode_CONVERT_BYTES(
+ Py_UCS4, Py_UCS1,
+ PyUnicode_4BYTE_DATA(from) + from_start,
+ PyUnicode_4BYTE_DATA(from) + from_start + how_many,
+ PyUnicode_1BYTE_DATA(to) + to_start
+ );
+ }
+ else if (from_kind == PyUnicode_4BYTE_KIND
+ && to_kind == PyUnicode_2BYTE_KIND)
+ {
+ _PyUnicode_CONVERT_BYTES(
+ Py_UCS4, Py_UCS2,
+ PyUnicode_4BYTE_DATA(from) + from_start,
+ PyUnicode_4BYTE_DATA(from) + from_start + how_many,
+ PyUnicode_2BYTE_DATA(to) + to_start
+ );
+ }
+ else {
+ assert(0);
+ return -1;
+ }
+ }
+ else
+#endif
{
- /* slow path to check for character overflow */
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
Py_UCS4 ch;
Py_ssize_t i;
-#ifdef Py_DEBUG
for (i=0; i < how_many; i++) {
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
+#ifndef Py_DEBUG
assert(ch <= to_maxchar);
- PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
- }
#else
- if (!check_maxchar) {
- for (i=0; i < how_many; i++) {
- ch = PyUnicode_READ(from_kind, from_data, from_start + i);
- PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
- }
- }
- else {
- for (i=0; i < how_many; i++) {
- ch = PyUnicode_READ(from_kind, from_data, from_start + i);
- if (ch > to_maxchar)
- return 1;
- PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
- }
- }
+ if (ch > to_maxchar)
+ return -1;
#endif
- }
- else {
- assert(0 && "inconsistent state");
- return 1;
+ PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
+ }
}
}
return 0;
@@ -13876,9 +13894,11 @@ PyUnicode_Format(PyObject *format, PyObject *args)
}
}
- _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
- temp, pindex, len);
- writer.pos += len;
+ if (len) {
+ _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
+ temp, pindex, len);
+ writer.pos += len;
+ }
if (width > len) {
sublen = width - len;
FILL(writer.kind, writer.data, ' ', writer.pos, sublen);
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index fdb587d..cd66670 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -786,8 +786,10 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format,
goto done;
/* Then the source string. */
- _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
- value, 0, len);
+ if (len) {
+ _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+ value, 0, len);
+ }
writer->pos += (len + rpad);
result = 0;