In text I/O, optimize scanning for new lines with 1-byte unicode chars

author: Antoine Pitrou <solipsis@pitrou.net> 2011-11-13 02:53:42 (GMT)
committer: Antoine Pitrou <solipsis@pitrou.net> 2011-11-13 02:53:42 (GMT)
commit: c28e2e53ba1dd527ece0260785ed9bf7c5c38799 (patch)
tree: ecbab92ee2b81b44dc506ac55d253eb6f6874bb4 /Modules/_io
parent: f364e7b59899fa9425bb0b281a872176facd7914 (diff)
download: cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.zip
cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.tar.gz
cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.tar.bz2
1 files changed, 20 insertions, 12 deletions
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index eef99da..590a9e6 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -365,19 +365,23 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
             */
             if (seennl == 0 &&
                 memchr(in_str, '\n', kind * len) != NULL) {
-                Py_ssize_t i = 0;
-                for (;;) {
-                    Py_UCS4 c;
-                    /* Fast loop for non-control characters */
-                    while (PyUnicode_READ(kind, in_str, i) > '\n')
-                        i++;
-                    c = PyUnicode_READ(kind, in_str, i++);
-                    if (c == '\n') {
-                        seennl |= SEEN_LF;
-                        break;
+                if (kind == PyUnicode_1BYTE_KIND)
+                    seennl |= SEEN_LF;
+                else {
+                    Py_ssize_t i = 0;
+                    for (;;) {
+                        Py_UCS4 c;
+                        /* Fast loop for non-control characters */
+                        while (PyUnicode_READ(kind, in_str, i) > '\n')
+                            i++;
+                        c = PyUnicode_READ(kind, in_str, i++);
+                        if (c == '\n') {
+                            seennl |= SEEN_LF;
+                            break;
+                        }
+                        if (i >= len)
+                            break;
                     }
-                    if (i >= len)
-                        break;
                 }
             }
             /* Finished: we have scanned for newlines, and none of them
@@ -1597,6 +1601,10 @@ textiowrapper_read(textio *self, PyObject *args)
 static char *
 find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
 {
+    if (kind == PyUnicode_1BYTE_KIND) {
+        assert(ch < 256);
+        return (char *) memchr((void *) s, (char) ch, end - s);
+    }
     for (;;) {
         while (PyUnicode_READ(kind, s, 0) > ch)
             s += kind;
author	Antoine Pitrou <solipsis@pitrou.net>	2011-11-13 02:53:42 (GMT)
committer	Antoine Pitrou <solipsis@pitrou.net>	2011-11-13 02:53:42 (GMT)
commit	c28e2e53ba1dd527ece0260785ed9bf7c5c38799 (patch)
tree	ecbab92ee2b81b44dc506ac55d253eb6f6874bb4 /Modules/_io
parent	f364e7b59899fa9425bb0b281a872176facd7914 (diff)
download	cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.zip cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.tar.gz cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.tar.bz2