Issue #5433: Excessive newline detection optimization in IncrementalNewlineDecoder

author: Antoine Pitrou <solipsis@pitrou.net> 2009-03-06 23:40:56 (GMT)
committer: Antoine Pitrou <solipsis@pitrou.net> 2009-03-06 23:40:56 (GMT)
commit: 66913e221312e38cc542896d4db9b45720a20672 (patch)
tree: 2260ead42bedd1112e956abf5728e4de7edb18f1 /Modules
parent: 2db74c2412a785a3b796074f54a7842b8f8f3b9d (diff)
download: cpython-66913e221312e38cc542896d4db9b45720a20672.zip
cpython-66913e221312e38cc542896d4db9b45720a20672.tar.gz
cpython-66913e221312e38cc542896d4db9b45720a20672.tar.bz2
1 files changed, 30 insertions, 12 deletions
diff --git a/Modules/_textio.c b/Modules/_textio.c
index 145f8ea..dbed2fd 100644
--- a/Modules/_textio.c
+++ b/Modules/_textio.c
@@ -305,22 +305,40 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
            for the \r *byte* with the libc's optimized memchr.
            */
         if (seennl == SEEN_LF || seennl == 0) {
-            int has_cr, has_lf;
-            has_lf = (seennl == SEEN_LF) ||
-                    (memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL);
-            has_cr = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
-            if (has_lf && !has_cr) {
-                only_lf = 1;
-                seennl = SEEN_LF;
-            }
+            only_lf = !(memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
         }
 
-        if (!self->translate) {
+        if (only_lf) {
+            /* If not already seen, quick scan for a possible "\n" character.
+               (there's nothing else to be done, even when in translation mode)
+            */
+            if (seennl == 0 &&
+                memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
+                Py_UNICODE *s, *end;
+                s = in_str;
+                end = in_str + len;
+                for (;;) {
+                    Py_UNICODE c;
+                    /* Fast loop for non-control characters */
+                    while (*s > '\n')
+                        s++;
+                    c = *s++;
+                    if (c == '\n') {
+                        seennl |= SEEN_LF;
+                        break;
+                    }
+                    if (s > end)
+                        break;
+                }
+            }
+            /* Finished: we have scanned for newlines, and none of them
+               need translating */
+        }
+        else if (!self->translate) {
             Py_UNICODE *s, *end;
+            /* We have already seen all newline types, no need to scan again */
             if (seennl == SEEN_ALL)
                 goto endscan;
-            if (only_lf)
-                goto endscan;
             s = in_str;
             end = in_str + len;
             for (;;) {
@@ -347,7 +365,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
         endscan:
             ;
         }
-        else if (!only_lf) {
+        else {
             PyObject *translated = NULL;
             Py_UNICODE *out_str;
             Py_UNICODE *in, *out, *end;
author	Antoine Pitrou <solipsis@pitrou.net>	2009-03-06 23:40:56 (GMT)
committer	Antoine Pitrou <solipsis@pitrou.net>	2009-03-06 23:40:56 (GMT)
commit	66913e221312e38cc542896d4db9b45720a20672 (patch)
tree	2260ead42bedd1112e956abf5728e4de7edb18f1 /Modules
parent	2db74c2412a785a3b796074f54a7842b8f8f3b9d (diff)
download	cpython-66913e221312e38cc542896d4db9b45720a20672.zip cpython-66913e221312e38cc542896d4db9b45720a20672.tar.gz cpython-66913e221312e38cc542896d4db9b45720a20672.tar.bz2