diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2011-11-13 02:53:42 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2011-11-13 02:53:42 (GMT) |
commit | c28e2e53ba1dd527ece0260785ed9bf7c5c38799 (patch) | |
tree | ecbab92ee2b81b44dc506ac55d253eb6f6874bb4 /Modules/_io | |
parent | f364e7b59899fa9425bb0b281a872176facd7914 (diff) | |
download | cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.zip cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.tar.gz cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.tar.bz2 |
In text I/O, optimize scanning for new lines with 1-byte unicode chars
Diffstat (limited to 'Modules/_io')
-rw-r--r-- | Modules/_io/textio.c | 32 |
1 files changed, 20 insertions, 12 deletions
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index eef99da..590a9e6 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -365,19 +365,23 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, */ if (seennl == 0 && memchr(in_str, '\n', kind * len) != NULL) { - Py_ssize_t i = 0; - for (;;) { - Py_UCS4 c; - /* Fast loop for non-control characters */ - while (PyUnicode_READ(kind, in_str, i) > '\n') - i++; - c = PyUnicode_READ(kind, in_str, i++); - if (c == '\n') { - seennl |= SEEN_LF; - break; + if (kind == PyUnicode_1BYTE_KIND) + seennl |= SEEN_LF; + else { + Py_ssize_t i = 0; + for (;;) { + Py_UCS4 c; + /* Fast loop for non-control characters */ + while (PyUnicode_READ(kind, in_str, i) > '\n') + i++; + c = PyUnicode_READ(kind, in_str, i++); + if (c == '\n') { + seennl |= SEEN_LF; + break; + } + if (i >= len) + break; } - if (i >= len) - break; } } /* Finished: we have scanned for newlines, and none of them @@ -1597,6 +1601,10 @@ textiowrapper_read(textio *self, PyObject *args) static char * find_control_char(int kind, char *s, char *end, Py_UCS4 ch) { + if (kind == PyUnicode_1BYTE_KIND) { + assert(ch < 256); + return (char *) memchr((void *) s, (char) ch, end - s); + } for (;;) { while (PyUnicode_READ(kind, s, 0) > ch) s += kind; |