summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2009-03-06 23:40:56 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2009-03-06 23:40:56 (GMT)
commit66913e221312e38cc542896d4db9b45720a20672 (patch)
tree2260ead42bedd1112e956abf5728e4de7edb18f1 /Modules
parent2db74c2412a785a3b796074f54a7842b8f8f3b9d (diff)
downloadcpython-66913e221312e38cc542896d4db9b45720a20672.zip
cpython-66913e221312e38cc542896d4db9b45720a20672.tar.gz
cpython-66913e221312e38cc542896d4db9b45720a20672.tar.bz2
Issue #5433: Excessive newline detection optimization in IncrementalNewlineDecoder
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_textio.c42
1 files changed, 30 insertions, 12 deletions
diff --git a/Modules/_textio.c b/Modules/_textio.c
index 145f8ea..dbed2fd 100644
--- a/Modules/_textio.c
+++ b/Modules/_textio.c
@@ -305,22 +305,40 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
for the \r *byte* with the libc's optimized memchr.
*/
if (seennl == SEEN_LF || seennl == 0) {
- int has_cr, has_lf;
- has_lf = (seennl == SEEN_LF) ||
- (memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL);
- has_cr = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
- if (has_lf && !has_cr) {
- only_lf = 1;
- seennl = SEEN_LF;
- }
+ only_lf = !(memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
}
- if (!self->translate) {
+ if (only_lf) {
+ /* If not already seen, quick scan for a possible "\n" character.
+ (there's nothing else to be done, even when in translation mode)
+ */
+ if (seennl == 0 &&
+ memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
+ Py_UNICODE *s, *end;
+ s = in_str;
+ end = in_str + len;
+ for (;;) {
+ Py_UNICODE c;
+ /* Fast loop for non-control characters */
+ while (*s > '\n')
+ s++;
+ c = *s++;
+ if (c == '\n') {
+ seennl |= SEEN_LF;
+ break;
+ }
+ if (s > end)
+ break;
+ }
+ }
+ /* Finished: we have scanned for newlines, and none of them
+ need translating */
+ }
+ else if (!self->translate) {
Py_UNICODE *s, *end;
+ /* We have already seen all newline types, no need to scan again */
if (seennl == SEEN_ALL)
goto endscan;
- if (only_lf)
- goto endscan;
s = in_str;
end = in_str + len;
for (;;) {
@@ -347,7 +365,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
endscan:
;
}
- else if (!only_lf) {
+ else {
PyObject *translated = NULL;
Py_UNICODE *out_str;
Py_UNICODE *in, *out, *end;