diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2013-04-09 19:53:09 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2013-04-09 19:53:09 (GMT) |
commit | 03c3e35d42a2f9855fca4beb89e5cbbefe2d9c21 (patch) | |
tree | 6c3edc03a6edfbe04c6f1c7733f3a8882d1da8c8 /Objects | |
parent | 0f344b6e0526245249b80219e6001616307d2b35 (diff) | |
download | cpython-03c3e35d42a2f9855fca4beb89e5cbbefe2d9c21.zip cpython-03c3e35d42a2f9855fca4beb89e5cbbefe2d9c21.tar.gz cpython-03c3e35d42a2f9855fca4beb89e5cbbefe2d9c21.tar.bz2 |
Add fast-path in PyUnicode_DecodeCharmap() for pure 8 bit encodings:
cp037, cp500 and iso8859_1 codecs
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 27 |
1 files changed, 26 insertions, 1 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e9153c0..88729c8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7281,6 +7281,7 @@ PyUnicode_DecodeCharmap(const char *s, enum PyUnicode_Kind mapkind; void *mapdata; Py_UCS4 x; + unsigned char ch; if (PyUnicode_READY(mapping) == -1) return NULL; @@ -7288,8 +7289,32 @@ PyUnicode_DecodeCharmap(const char *s, maplen = PyUnicode_GET_LENGTH(mapping); mapdata = PyUnicode_DATA(mapping); mapkind = PyUnicode_KIND(mapping); + + if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) { + /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1 + * is disabled in encoding aliases, latin1 is preferred because + * its implementation is faster. */ + Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata; + Py_UCS1 *outdata = (Py_UCS1 *)writer.data; + Py_UCS4 maxchar = writer.maxchar; + + assert (writer.kind == PyUnicode_1BYTE_KIND); + while (s < e) { + ch = *s; + x = mapdata_ucs1[ch]; + if (x > maxchar) { + if (_PyUnicodeWriter_PrepareInternal(&writer, 1, 0xff) == -1) + goto onError; + maxchar = writer.maxchar; + outdata = (Py_UCS1 *)writer.data; + } + outdata[writer.pos] = x; + writer.pos++; + ++s; + } + } + while (s < e) { - unsigned char ch; if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) { enum PyUnicode_Kind outkind = writer.kind; void *outdata = writer.data; |