diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2009-03-06 23:40:56 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2009-03-06 23:40:56 (GMT) |
commit | 66913e221312e38cc542896d4db9b45720a20672 (patch) | |
tree | 2260ead42bedd1112e956abf5728e4de7edb18f1 /Modules/_textio.c | |
parent | 2db74c2412a785a3b796074f54a7842b8f8f3b9d (diff) | |
download | cpython-66913e221312e38cc542896d4db9b45720a20672.zip cpython-66913e221312e38cc542896d4db9b45720a20672.tar.gz cpython-66913e221312e38cc542896d4db9b45720a20672.tar.bz2 |
Issue #5433: Excessive newline detection optimization in IncrementalNewlineDecoder
Diffstat (limited to 'Modules/_textio.c')
-rw-r--r-- | Modules/_textio.c | 42 |
1 files changed, 30 insertions, 12 deletions
diff --git a/Modules/_textio.c b/Modules/_textio.c index 145f8ea..dbed2fd 100644 --- a/Modules/_textio.c +++ b/Modules/_textio.c @@ -305,22 +305,40 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, for the \r *byte* with the libc's optimized memchr. */ if (seennl == SEEN_LF || seennl == 0) { - int has_cr, has_lf; - has_lf = (seennl == SEEN_LF) || - (memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL); - has_cr = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL); - if (has_lf && !has_cr) { - only_lf = 1; - seennl = SEEN_LF; - } + only_lf = !(memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL); } - if (!self->translate) { + if (only_lf) { + /* If not already seen, quick scan for a possible "\n" character. + (there's nothing else to be done, even when in translation mode) + */ + if (seennl == 0 && + memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) { + Py_UNICODE *s, *end; + s = in_str; + end = in_str + len; + for (;;) { + Py_UNICODE c; + /* Fast loop for non-control characters */ + while (*s > '\n') + s++; + c = *s++; + if (c == '\n') { + seennl |= SEEN_LF; + break; + } + if (s > end) + break; + } + } + /* Finished: we have scanned for newlines, and none of them + need translating */ + } + else if (!self->translate) { Py_UNICODE *s, *end; + /* We have already seen all newline types, no need to scan again */ if (seennl == SEEN_ALL) goto endscan; - if (only_lf) - goto endscan; s = in_str; end = in_str + len; for (;;) { @@ -347,7 +365,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, endscan: ; } - else if (!only_lf) { + else { PyObject *translated = NULL; Py_UNICODE *out_str; Py_UNICODE *in, *out, *end; |