summaryrefslogtreecommitdiffstats
path: root/Modules/_io
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2011-11-13 02:53:42 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2011-11-13 02:53:42 (GMT)
commitc28e2e53ba1dd527ece0260785ed9bf7c5c38799 (patch)
treeecbab92ee2b81b44dc506ac55d253eb6f6874bb4 /Modules/_io
parentf364e7b59899fa9425bb0b281a872176facd7914 (diff)
downloadcpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.zip
cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.tar.gz
cpython-c28e2e53ba1dd527ece0260785ed9bf7c5c38799.tar.bz2
In text I/O, optimize scanning for new lines with 1-byte unicode chars
Diffstat (limited to 'Modules/_io')
-rw-r--r--Modules/_io/textio.c32
1 files changed, 20 insertions, 12 deletions
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index eef99da..590a9e6 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -365,19 +365,23 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
*/
if (seennl == 0 &&
memchr(in_str, '\n', kind * len) != NULL) {
- Py_ssize_t i = 0;
- for (;;) {
- Py_UCS4 c;
- /* Fast loop for non-control characters */
- while (PyUnicode_READ(kind, in_str, i) > '\n')
- i++;
- c = PyUnicode_READ(kind, in_str, i++);
- if (c == '\n') {
- seennl |= SEEN_LF;
- break;
+ if (kind == PyUnicode_1BYTE_KIND)
+ seennl |= SEEN_LF;
+ else {
+ Py_ssize_t i = 0;
+ for (;;) {
+ Py_UCS4 c;
+ /* Fast loop for non-control characters */
+ while (PyUnicode_READ(kind, in_str, i) > '\n')
+ i++;
+ c = PyUnicode_READ(kind, in_str, i++);
+ if (c == '\n') {
+ seennl |= SEEN_LF;
+ break;
+ }
+ if (i >= len)
+ break;
}
- if (i >= len)
- break;
}
}
/* Finished: we have scanned for newlines, and none of them
@@ -1597,6 +1601,10 @@ textiowrapper_read(textio *self, PyObject *args)
static char *
find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
{
+ if (kind == PyUnicode_1BYTE_KIND) {
+ assert(ch < 256);
+ return (char *) memchr((void *) s, (char) ch, end - s);
+ }
for (;;) {
while (PyUnicode_READ(kind, s, 0) > ch)
s += kind;