summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2014-04-05 12:27:07 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2014-04-05 12:27:07 (GMT)
commit872b291b9605f0f5a072137182034959416e36bd (patch)
tree496b410a8cdbc3b378e8439ca5a2b2a10f72ae7e /Objects
parent4ff33af2574aa09bc8926c0deb116c886cb745d1 (diff)
downloadcpython-872b291b9605f0f5a072137182034959416e36bd.zip
cpython-872b291b9605f0f5a072137182034959416e36bd.tar.gz
cpython-872b291b9605f0f5a072137182034959416e36bd.tar.bz2
Issue #21118: Optimize also str.translate() for ASCII => ASCII deletion
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c48
1 files changed, 29 insertions, 19 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2183773..11f2011 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8561,7 +8561,8 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
if (item == Py_None) {
/* deletion: skip fast translate */
- goto exit;
+ translate[ch] = 0xfe;
+ return 1;
}
if (item == NULL) {
@@ -8614,12 +8615,12 @@ exit:
translated into writer, raise an exception and return -1 on error. */
static int
unicode_fast_translate(PyObject *input, PyObject *mapping,
- _PyUnicodeWriter *writer)
+ _PyUnicodeWriter *writer, int ignore)
{
- Py_UCS1 translate[128], ch, ch2;
+ Py_UCS1 ascii_table[128], ch, ch2;
Py_ssize_t len;
Py_UCS1 *in, *end, *out;
- int res;
+ int res = 0;
if (PyUnicode_READY(input) == -1)
return -1;
@@ -8627,7 +8628,7 @@ unicode_fast_translate(PyObject *input, PyObject *mapping,
return 0;
len = PyUnicode_GET_LENGTH(input);
- memset(translate, 0xff, 128);
+ memset(ascii_table, 0xff, 128);
in = PyUnicode_1BYTE_DATA(input);
end = in + len;
@@ -8636,23 +8637,32 @@ unicode_fast_translate(PyObject *input, PyObject *mapping,
assert(PyUnicode_GET_LENGTH(writer->buffer) == len);
out = PyUnicode_1BYTE_DATA(writer->buffer);
- for (; in < end; in++, out++) {
+ for (; in < end; in++) {
ch = *in;
- ch2 = translate[ch];
+ ch2 = ascii_table[ch];
if (ch2 == 0xff) {
- res = unicode_fast_translate_lookup(mapping, ch, translate);
- if (res < 0)
+ int translate = unicode_fast_translate_lookup(mapping, ch,
+ ascii_table);
+ if (translate < 0)
return -1;
- if (res == 0) {
- writer->pos = in - PyUnicode_1BYTE_DATA(input);
- return 0;
- }
- ch2 = translate[ch];
+ if (translate == 0)
+ goto exit;
+ ch2 = ascii_table[ch];
+ }
+ if (ch2 == 0xfe) {
+ if (ignore)
+ continue;
+ goto exit;
}
+ assert(ch2 < 128);
*out = ch2;
+ out++;
}
- writer->pos = len;
- return 1;
+ res = 1;
+
+exit:
+ writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer);
+ return res;
}
PyObject *
@@ -8695,7 +8705,9 @@ _PyUnicode_TranslateCharmap(PyObject *input,
if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1)
goto onError;
- res = unicode_fast_translate(input, mapping, &writer);
+ ignore = (errors != NULL && strcmp(errors, "ignore") == 0);
+
+ res = unicode_fast_translate(input, mapping, &writer, ignore);
if (res < 0) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
@@ -8703,8 +8715,6 @@ _PyUnicode_TranslateCharmap(PyObject *input,
if (res == 1)
return _PyUnicodeWriter_Finish(&writer);
- ignore = (errors != NULL && strcmp(errors, "ignore") == 0);
-
i = writer.pos;
while (i<size) {
/* try to encode it */