diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2014-04-05 12:27:07 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2014-04-05 12:27:07 (GMT) |
commit | 872b291b9605f0f5a072137182034959416e36bd (patch) | |
tree | 496b410a8cdbc3b378e8439ca5a2b2a10f72ae7e /Objects | |
parent | 4ff33af2574aa09bc8926c0deb116c886cb745d1 (diff) | |
download | cpython-872b291b9605f0f5a072137182034959416e36bd.zip cpython-872b291b9605f0f5a072137182034959416e36bd.tar.gz cpython-872b291b9605f0f5a072137182034959416e36bd.tar.bz2 |
Issue #21118: Optimize also str.translate() for ASCII => ASCII deletion
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 48 |
1 files changed, 29 insertions, 19 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2183773..11f2011 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8561,7 +8561,8 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch, if (item == Py_None) { /* deletion: skip fast translate */ - goto exit; + translate[ch] = 0xfe; + return 1; } if (item == NULL) { @@ -8614,12 +8615,12 @@ exit: translated into writer, raise an exception and return -1 on error. */ static int unicode_fast_translate(PyObject *input, PyObject *mapping, - _PyUnicodeWriter *writer) + _PyUnicodeWriter *writer, int ignore) { - Py_UCS1 translate[128], ch, ch2; + Py_UCS1 ascii_table[128], ch, ch2; Py_ssize_t len; Py_UCS1 *in, *end, *out; - int res; + int res = 0; if (PyUnicode_READY(input) == -1) return -1; @@ -8627,7 +8628,7 @@ unicode_fast_translate(PyObject *input, PyObject *mapping, return 0; len = PyUnicode_GET_LENGTH(input); - memset(translate, 0xff, 128); + memset(ascii_table, 0xff, 128); in = PyUnicode_1BYTE_DATA(input); end = in + len; @@ -8636,23 +8637,32 @@ unicode_fast_translate(PyObject *input, PyObject *mapping, assert(PyUnicode_GET_LENGTH(writer->buffer) == len); out = PyUnicode_1BYTE_DATA(writer->buffer); - for (; in < end; in++, out++) { + for (; in < end; in++) { ch = *in; - ch2 = translate[ch]; + ch2 = ascii_table[ch]; if (ch2 == 0xff) { - res = unicode_fast_translate_lookup(mapping, ch, translate); - if (res < 0) + int translate = unicode_fast_translate_lookup(mapping, ch, + ascii_table); + if (translate < 0) return -1; - if (res == 0) { - writer->pos = in - PyUnicode_1BYTE_DATA(input); - return 0; - } - ch2 = translate[ch]; + if (translate == 0) + goto exit; + ch2 = ascii_table[ch]; + } + if (ch2 == 0xfe) { + if (ignore) + continue; + goto exit; } + assert(ch2 < 128); *out = ch2; + out++; } - writer->pos = len; - return 1; + res = 1; + +exit: + writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer); + return res; } PyObject * @@ -8695,7 +8705,9 @@ _PyUnicode_TranslateCharmap(PyObject *input, if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; - res = unicode_fast_translate(input, mapping, &writer); + ignore = (errors != NULL && strcmp(errors, "ignore") == 0); + + res = unicode_fast_translate(input, mapping, &writer, ignore); if (res < 0) { _PyUnicodeWriter_Dealloc(&writer); return NULL; @@ -8703,8 +8715,6 @@ _PyUnicode_TranslateCharmap(PyObject *input, if (res == 1) return _PyUnicodeWriter_Finish(&writer); - ignore = (errors != NULL && strcmp(errors, "ignore") == 0); - i = writer.pos; while (i<size) { /* try to encode it */ |