summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-04-09 19:53:09 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-04-09 19:53:09 (GMT)
commit03c3e35d42a2f9855fca4beb89e5cbbefe2d9c21 (patch)
tree6c3edc03a6edfbe04c6f1c7733f3a8882d1da8c8 /Objects
parent0f344b6e0526245249b80219e6001616307d2b35 (diff)
downloadcpython-03c3e35d42a2f9855fca4beb89e5cbbefe2d9c21.zip
cpython-03c3e35d42a2f9855fca4beb89e5cbbefe2d9c21.tar.gz
cpython-03c3e35d42a2f9855fca4beb89e5cbbefe2d9c21.tar.bz2
Add fast-path in PyUnicode_DecodeCharmap() for pure 8 bit encodings:
cp037, cp500 and iso8859_1 codecs
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c27
1 files changed, 26 insertions, 1 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e9153c0..88729c8 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7281,6 +7281,7 @@ PyUnicode_DecodeCharmap(const char *s,
enum PyUnicode_Kind mapkind;
void *mapdata;
Py_UCS4 x;
+ unsigned char ch;
if (PyUnicode_READY(mapping) == -1)
return NULL;
@@ -7288,8 +7289,32 @@ PyUnicode_DecodeCharmap(const char *s,
maplen = PyUnicode_GET_LENGTH(mapping);
mapdata = PyUnicode_DATA(mapping);
mapkind = PyUnicode_KIND(mapping);
+
+ if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) {
+ /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1
+ * is disabled in encoding aliases, latin1 is preferred because
+ * its implementation is faster. */
+ Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata;
+ Py_UCS1 *outdata = (Py_UCS1 *)writer.data;
+ Py_UCS4 maxchar = writer.maxchar;
+
+ assert (writer.kind == PyUnicode_1BYTE_KIND);
+ while (s < e) {
+ ch = *s;
+ x = mapdata_ucs1[ch];
+ if (x > maxchar) {
+ if (_PyUnicodeWriter_PrepareInternal(&writer, 1, 0xff) == -1)
+ goto onError;
+ maxchar = writer.maxchar;
+ outdata = (Py_UCS1 *)writer.data;
+ }
+ outdata[writer.pos] = x;
+ writer.pos++;
+ ++s;
+ }
+ }
+
while (s < e) {
- unsigned char ch;
if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
enum PyUnicode_Kind outkind = writer.kind;
void *outdata = writer.data;