diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2011-09-28 05:41:54 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2011-09-28 05:41:54 (GMT) |
commit | d63a3b8beb4a0841cb59fb3515347ccaab34b733 (patch) | |
tree | 3b4e3cc63151c5a5a910c3550a190aefaea96ad4 /Modules/_io | |
parent | 48d49497c50e79d14e9df9527d766ca3a0a38be5 (diff) | |
download | cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.zip cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.tar.gz cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.tar.bz2 |
Implement PEP 393.
Diffstat (limited to 'Modules/_io')
-rw-r--r-- | Modules/_io/_iomodule.h | 2 | ||||
-rw-r--r-- | Modules/_io/stringio.c | 69 | ||||
-rw-r--r-- | Modules/_io/textio.c | 352 |
3 files changed, 213 insertions, 210 deletions
diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 9174bdd..4e97dd1 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -55,7 +55,7 @@ extern PyObject *_PyIncrementalNewlineDecoder_decode( Otherwise, the function will scan further and return garbage. */ extern Py_ssize_t _PyIO_find_line_ending( int translated, int universal, PyObject *readnl, - Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed); + int kind, char *start, char *end, Py_ssize_t *consumed); #define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */ diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index c9d14b1..c40163f 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -9,7 +9,7 @@ typedef struct { PyObject_HEAD - Py_UNICODE *buf; + Py_UCS4 *buf; Py_ssize_t pos; Py_ssize_t string_size; size_t buf_size; @@ -21,7 +21,7 @@ typedef struct { PyObject *decoder; PyObject *readnl; PyObject *writenl; - + PyObject *dict; PyObject *weakreflist; } stringio; @@ -56,7 +56,7 @@ resize_buffer(stringio *self, size_t size) /* Here, unsigned types are used to avoid dealing with signed integer overflow, which is undefined in C. */ size_t alloc = self->buf_size; - Py_UNICODE *new_buf = NULL; + Py_UCS4 *new_buf = NULL; assert(self->buf != NULL); @@ -84,10 +84,9 @@ resize_buffer(stringio *self, size_t size) alloc = size + 1; } - if (alloc > ((size_t)-1) / sizeof(Py_UNICODE)) + if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4)) goto overflow; - new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf, - alloc * sizeof(Py_UNICODE)); + new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4)); if (new_buf == NULL) { PyErr_NoMemory(); return -1; @@ -108,9 +107,9 @@ resize_buffer(stringio *self, size_t size) static Py_ssize_t write_str(stringio *self, PyObject *obj) { - Py_UNICODE *str; Py_ssize_t len; PyObject *decoded = NULL; + assert(self->buf != NULL); assert(self->pos >= 0); @@ -132,8 +131,7 @@ write_str(stringio *self, PyObject *obj) return -1; assert(PyUnicode_Check(decoded)); - str = PyUnicode_AS_UNICODE(decoded); - len = PyUnicode_GET_SIZE(decoded); + len = PyUnicode_GET_LENGTH(decoded); assert(len >= 0); @@ -161,18 +159,21 @@ write_str(stringio *self, PyObject *obj) */ memset(self->buf + self->string_size, '\0', - (self->pos - self->string_size) * sizeof(Py_UNICODE)); + (self->pos - self->string_size) * sizeof(Py_UCS4)); } /* Copy the data to the internal buffer, overwriting some of the existing data if self->pos < self->string_size. */ - memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE)); - self->pos += len; + if (!PyUnicode_AsUCS4(decoded, + self->buf + self->pos, + self->buf_size - self->pos, + 0)) + goto fail; /* Set the new length of the internal string if it has changed. */ - if (self->string_size < self->pos) { + self->pos += len; + if (self->string_size < self->pos) self->string_size = self->pos; - } Py_DECREF(decoded); return 0; @@ -190,7 +191,8 @@ stringio_getvalue(stringio *self) { CHECK_INITIALIZED(self); CHECK_CLOSED(self); - return PyUnicode_FromUnicode(self->buf, self->string_size); + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf, + self->string_size); } PyDoc_STRVAR(stringio_tell_doc, @@ -214,7 +216,7 @@ static PyObject * stringio_read(stringio *self, PyObject *args) { Py_ssize_t size, n; - Py_UNICODE *output; + Py_UCS4 *output; PyObject *arg = Py_None; CHECK_INITIALIZED(self); @@ -247,19 +249,19 @@ stringio_read(stringio *self, PyObject *args) output = self->buf + self->pos; self->pos += size; - return PyUnicode_FromUnicode(output, size); + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size); } /* Internal helper, used by stringio_readline and stringio_iternext */ static PyObject * _stringio_readline(stringio *self, Py_ssize_t limit) { - Py_UNICODE *start, *end, old_char; + Py_UCS4 *start, *end, old_char; Py_ssize_t len, consumed; /* In case of overseek, return the empty string */ if (self->pos >= self->string_size) - return PyUnicode_FromString(""); + return PyUnicode_New(0, 0); start = self->buf + self->pos; if (limit < 0 || limit > self->string_size - self->pos) @@ -270,14 +272,14 @@ _stringio_readline(stringio *self, Py_ssize_t limit) *end = '\0'; len = _PyIO_find_line_ending( self->readtranslate, self->readuniversal, self->readnl, - start, end, &consumed); + PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed); *end = old_char; /* If we haven't found any line ending, we just return everything (`consumed` is ignored). */ if (len < 0) len = limit; self->pos += len; - return PyUnicode_FromUnicode(start, len); + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len); } PyDoc_STRVAR(stringio_readline_doc, @@ -462,8 +464,10 @@ stringio_write(stringio *self, PyObject *obj) Py_TYPE(obj)->tp_name); return NULL; } + if (PyUnicode_READY(obj)) + return NULL; CHECK_CLOSED(self); - size = PyUnicode_GET_SIZE(obj); + size = PyUnicode_GET_LENGTH(obj); if (size > 0 && write_str(self, obj) < 0) return NULL; @@ -535,7 +539,7 @@ stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) /* tp_alloc initializes all the fields to zero. So we don't have to initialize them here. */ - self->buf = (Py_UNICODE *)PyMem_Malloc(0); + self->buf = (Py_UCS4 *)PyMem_Malloc(0); if (self->buf == NULL) { Py_DECREF(self); return PyErr_NoMemory(); @@ -747,11 +751,22 @@ stringio_setstate(stringio *self, PyObject *state) once by __init__. So we do not take any chance and replace object's buffer completely. */ { - Py_UNICODE *buf = PyUnicode_AS_UNICODE(PyTuple_GET_ITEM(state, 0)); - Py_ssize_t bufsize = PyUnicode_GET_SIZE(PyTuple_GET_ITEM(state, 0)); - if (resize_buffer(self, bufsize) < 0) + PyObject *item; + Py_UCS4 *buf; + Py_ssize_t bufsize; + + item = PyTuple_GET_ITEM(state, 0); + buf = PyUnicode_AsUCS4Copy(item); + if (buf == NULL) return NULL; - memcpy(self->buf, buf, bufsize * sizeof(Py_UNICODE)); + bufsize = PyUnicode_GET_LENGTH(item); + + if (resize_buffer(self, bufsize) < 0) { + PyMem_Free(buf); + return NULL; + } + memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4)); + PyMem_Free(buf); self->string_size = bufsize; } diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 13d4bd9..9c06ec8 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -274,18 +274,28 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, goto error; } - output_len = PyUnicode_GET_SIZE(output); + if (PyUnicode_READY(output) == -1) + goto error; + + output_len = PyUnicode_GET_LENGTH(output); if (self->pendingcr && (final || output_len > 0)) { - Py_UNICODE *out; - PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1); + /* Prefix output with CR */ + int kind; + PyObject *modified; + char *out; + + modified = PyUnicode_New(output_len + 1, + PyUnicode_MAX_CHAR_VALUE(output)); if (modified == NULL) goto error; - out = PyUnicode_AS_UNICODE(modified); - out[0] = '\r'; - memcpy(out + 1, PyUnicode_AS_UNICODE(output), - output_len * sizeof(Py_UNICODE)); + kind = PyUnicode_KIND(modified); + out = PyUnicode_DATA(modified); + PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r'); + memcpy(out + PyUnicode_KIND_SIZE(kind, 1), + PyUnicode_DATA(output), + PyUnicode_KIND_SIZE(kind, output_len)); Py_DECREF(output); - output = modified; + output = modified; /* output remains ready */ self->pendingcr = 0; output_len++; } @@ -295,21 +305,13 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, */ if (!final) { if (output_len > 0 - && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') { - - if (Py_REFCNT(output) == 1) { - if (PyUnicode_Resize(&output, output_len - 1) < 0) - goto error; - } - else { - PyObject *modified = PyUnicode_FromUnicode( - PyUnicode_AS_UNICODE(output), - output_len - 1); - if (modified == NULL) - goto error; - Py_DECREF(output); - output = modified; - } + && PyUnicode_READ_CHAR(output, output_len - 1) == '\r') + { + PyObject *modified = PyUnicode_Substring(output, 0, output_len -1); + if (modified == NULL) + goto error; + Py_DECREF(output); + output = modified; self->pendingcr = 1; } } @@ -317,13 +319,15 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, /* Record which newlines are read and do newline translation if desired, all in one pass. */ { - Py_UNICODE *in_str; + void *in_str; Py_ssize_t len; int seennl = self->seennl; int only_lf = 0; + int kind; - in_str = PyUnicode_AS_UNICODE(output); - len = PyUnicode_GET_SIZE(output); + in_str = PyUnicode_DATA(output); + len = PyUnicode_GET_LENGTH(output); + kind = PyUnicode_KIND(output); if (len == 0) return output; @@ -332,7 +336,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, for the \r *byte* with the libc's optimized memchr. */ if (seennl == SEEN_LF || seennl == 0) { - only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL); + only_lf = (memchr(in_str, '\r', PyUnicode_KIND_SIZE(kind, len)) == NULL); } if (only_lf) { @@ -340,21 +344,19 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, (there's nothing else to be done, even when in translation mode) */ if (seennl == 0 && - memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) { - Py_UNICODE *s, *end; - s = in_str; - end = in_str + len; + memchr(in_str, '\n', PyUnicode_KIND_SIZE(kind, len)) != NULL) { + Py_ssize_t i = 0; for (;;) { Py_UNICODE c; /* Fast loop for non-control characters */ - while (*s > '\n') - s++; - c = *s++; + while (PyUnicode_READ(kind, in_str, i) > '\n') + i++; + c = PyUnicode_READ(kind, in_str, i++); if (c == '\n') { seennl |= SEEN_LF; break; } - if (s > end) + if (i >= len) break; } } @@ -362,29 +364,27 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, need translating */ } else if (!self->translate) { - Py_UNICODE *s, *end; + Py_ssize_t i = 0; /* We have already seen all newline types, no need to scan again */ if (seennl == SEEN_ALL) goto endscan; - s = in_str; - end = in_str + len; for (;;) { - Py_UNICODE c; + Py_UCS4 c; /* Fast loop for non-control characters */ - while (*s > '\r') - s++; - c = *s++; + while (PyUnicode_READ(kind, in_str, i) > '\r') + i++; + c = PyUnicode_READ(kind, in_str, i++); if (c == '\n') seennl |= SEEN_LF; else if (c == '\r') { - if (*s == '\n') { + if (PyUnicode_READ(kind, in_str, i) == '\n') { seennl |= SEEN_CRLF; - s++; + i++; } else seennl |= SEEN_CR; } - if (s > end) + if (i >= len) break; if (seennl == SEEN_ALL) break; @@ -393,61 +393,50 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, ; } else { - PyObject *translated = NULL; - Py_UNICODE *out_str; - Py_UNICODE *in, *out, *end; - if (Py_REFCNT(output) != 1) { - /* We could try to optimize this so that we only do a copy - when there is something to translate. On the other hand, - most decoders should only output non-shared strings, i.e. - translation is done in place. */ - translated = PyUnicode_FromUnicode(NULL, len); - if (translated == NULL) - goto error; - assert(Py_REFCNT(translated) == 1); - memcpy(PyUnicode_AS_UNICODE(translated), - PyUnicode_AS_UNICODE(output), - len * sizeof(Py_UNICODE)); - } - else { - translated = output; + void *translated; + int kind = PyUnicode_KIND(output); + void *in_str = PyUnicode_DATA(output); + Py_ssize_t in, out; + /* XXX: Previous in-place translation here is disabled as + resizing is not possible anymore */ + /* We could try to optimize this so that we only do a copy + when there is something to translate. On the other hand, + we already know there is a \r byte, so chances are high + that something needs to be done. */ + translated = PyMem_Malloc(PyUnicode_KIND_SIZE(kind, len)); + if (translated == NULL) { + PyErr_NoMemory(); + goto error; } - out_str = PyUnicode_AS_UNICODE(translated); - in = in_str; - out = out_str; - end = in_str + len; + in = out = 0; for (;;) { - Py_UNICODE c; + Py_UCS4 c; /* Fast loop for non-control characters */ - while ((c = *in++) > '\r') - *out++ = c; + while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r') + PyUnicode_WRITE(kind, translated, out++, c); if (c == '\n') { - *out++ = c; + PyUnicode_WRITE(kind, translated, out++, c); seennl |= SEEN_LF; continue; } if (c == '\r') { - if (*in == '\n') { + if (PyUnicode_READ(kind, in_str, in) == '\n') { in++; seennl |= SEEN_CRLF; } else seennl |= SEEN_CR; - *out++ = '\n'; + PyUnicode_WRITE(kind, translated, out++, '\n'); continue; } - if (in > end) + if (in > len) break; - *out++ = c; - } - if (translated != output) { - Py_DECREF(output); - output = translated; - } - if (out - out_str != len) { - if (PyUnicode_Resize(&output, out - out_str) < 0) - goto error; + PyUnicode_WRITE(kind, translated, out++, c); } + Py_DECREF(output); + output = PyUnicode_FromKindAndData(kind, translated, out); + if (!output) + goto error; } self->seennl |= seennl; } @@ -705,9 +694,7 @@ typedef struct static PyObject * ascii_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors)); + return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors)); } static PyObject * @@ -777,17 +764,13 @@ utf32_encode(textio *self, PyObject *text) static PyObject * utf8_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors)); + return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors)); } static PyObject * latin1_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors)); + return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors)); } /* Map normalized encoding names onto the specialized encoding funcs */ @@ -1213,18 +1196,6 @@ textiowrapper_detach(textio *self) return buffer; } -Py_LOCAL_INLINE(const Py_UNICODE *) -findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch) -{ - /* like wcschr, but doesn't stop at NULL characters */ - while (size-- > 0) { - if (*s == ch) - return s; - s++; - } - return NULL; -} - /* Flush the internal write buffer. This doesn't explicitly flush the underlying buffered object, though. */ static int @@ -1269,6 +1240,9 @@ textiowrapper_write(textio *self, PyObject *args) return NULL; } + if (PyUnicode_READY(text) == -1) + return NULL; + CHECK_CLOSED(self); if (self->encoder == NULL) @@ -1276,11 +1250,10 @@ textiowrapper_write(textio *self, PyObject *args) Py_INCREF(text); - textlen = PyUnicode_GetSize(text); + textlen = PyUnicode_GET_LENGTH(text); if ((self->writetranslate && self->writenl != NULL) || self->line_buffering) - if (findchar(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), '\n')) + if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1) haslf = 1; if (haslf && self->writetranslate && self->writenl != NULL) { @@ -1296,8 +1269,7 @@ textiowrapper_write(textio *self, PyObject *args) needflush = 1; else if (self->line_buffering && (haslf || - findchar(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), '\r'))) + PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1)) needflush = 1; /* XXX What if we were just reading? */ @@ -1369,7 +1341,8 @@ textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) if (self->decoded_chars == NULL) return PyUnicode_FromStringAndSize(NULL, 0); - avail = (PyUnicode_GET_SIZE(self->decoded_chars) + /* decoded_chars is guaranteed to be "ready". */ + avail = (PyUnicode_GET_LENGTH(self->decoded_chars) - self->decoded_chars_used); assert(avail >= 0); @@ -1378,9 +1351,9 @@ textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) n = avail; if (self->decoded_chars_used > 0 || n < avail) { - chars = PyUnicode_FromUnicode( - PyUnicode_AS_UNICODE(self->decoded_chars) - + self->decoded_chars_used, n); + chars = PyUnicode_Substring(self->decoded_chars, + self->decoded_chars_used, + self->decoded_chars_used + n); if (chars == NULL) return NULL; } @@ -1464,8 +1437,10 @@ textiowrapper_read_chunk(textio *self) /* TODO sanity check: isinstance(decoded_chars, unicode) */ if (decoded_chars == NULL) goto fail; + if (PyUnicode_READY(decoded_chars) == -1) + goto fail; textiowrapper_set_decoded_chars(self, decoded_chars); - nchars = PyUnicode_GET_SIZE(decoded_chars); + nchars = PyUnicode_GET_LENGTH(decoded_chars); if (nchars > 0) self->b2cratio = (double) nbytes / nchars; else @@ -1553,7 +1528,9 @@ textiowrapper_read(textio *self, PyObject *args) result = textiowrapper_get_decoded_chars(self, n); if (result == NULL) goto fail; - remaining -= PyUnicode_GET_SIZE(result); + if (PyUnicode_READY(result) == -1) + goto fail; + remaining -= PyUnicode_GET_LENGTH(result); /* Keep reading chunks until we have n characters to return */ while (remaining > 0) { @@ -1573,7 +1550,7 @@ textiowrapper_read(textio *self, PyObject *args) result = textiowrapper_get_decoded_chars(self, remaining); if (result == NULL) goto fail; - remaining -= PyUnicode_GET_SIZE(result); + remaining -= PyUnicode_GET_LENGTH(result); } if (chunks != NULL) { if (result != NULL && PyList_Append(chunks, result) < 0) @@ -1596,33 +1573,34 @@ textiowrapper_read(textio *self, PyObject *args) /* NOTE: `end` must point to the real end of the Py_UNICODE storage, that is to the NUL character. Otherwise the function will produce incorrect results. */ -static Py_UNICODE * -find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch) +static char * +find_control_char(int kind, char *s, char *end, Py_UCS4 ch) { - Py_UNICODE *s = start; + int size = PyUnicode_KIND_SIZE(kind, 1); for (;;) { - while (*s > ch) - s++; - if (*s == ch) + while (PyUnicode_READ(kind, s, 0) > ch) + s += size; + if (PyUnicode_READ(kind, s, 0) == ch) return s; if (s == end) return NULL; - s++; + s += size; } } Py_ssize_t _PyIO_find_line_ending( int translated, int universal, PyObject *readnl, - Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed) + int kind, char *start, char *end, Py_ssize_t *consumed) { - Py_ssize_t len = end - start; + int size = PyUnicode_KIND_SIZE(kind, 1); + Py_ssize_t len = ((char*)end - (char*)start)/size; if (translated) { /* Newlines are already translated, only search for \n */ - Py_UNICODE *pos = find_control_char(start, end, '\n'); + char *pos = find_control_char(kind, start, end, '\n'); if (pos != NULL) - return pos - start + 1; + return (pos - start)/size + 1; else { *consumed = len; return -1; @@ -1632,63 +1610,66 @@ _PyIO_find_line_ending( /* Universal newline search. Find any of \r, \r\n, \n * The decoder ensures that \r\n are not split in two pieces */ - Py_UNICODE *s = start; + char *s = start; for (;;) { - Py_UNICODE ch; + Py_UCS4 ch; /* Fast path for non-control chars. The loop always ends since the Py_UNICODE storage is NUL-terminated. */ - while (*s > '\r') - s++; + while (PyUnicode_READ(kind, s, 0) > '\r') + s += size; if (s >= end) { *consumed = len; return -1; } - ch = *s++; + ch = PyUnicode_READ(kind, s, 0); + s += size; if (ch == '\n') - return s - start; + return (s - start)/size; if (ch == '\r') { - if (*s == '\n') - return s - start + 1; + if (PyUnicode_READ(kind, s, 0) == '\n') + return (s - start)/size + 1; else - return s - start; + return (s - start)/size; } } } else { /* Non-universal mode. */ - Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl); - Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl); + Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); + char *nl = PyUnicode_DATA(readnl); + /* Assume that readnl is an ASCII character. */ + assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); if (readnl_len == 1) { - Py_UNICODE *pos = find_control_char(start, end, nl[0]); + char *pos = find_control_char(kind, start, end, nl[0]); if (pos != NULL) - return pos - start + 1; + return (pos - start)/size + 1; *consumed = len; return -1; } else { - Py_UNICODE *s = start; - Py_UNICODE *e = end - readnl_len + 1; - Py_UNICODE *pos; + char *s = start; + char *e = end - (readnl_len - 1)*size; + char *pos; if (e < s) e = s; while (s < e) { Py_ssize_t i; - Py_UNICODE *pos = find_control_char(s, end, nl[0]); + char *pos = find_control_char(kind, s, end, nl[0]); if (pos == NULL || pos >= e) break; for (i = 1; i < readnl_len; i++) { - if (pos[i] != nl[i]) + if (PyUnicode_READ(kind, pos, i) != nl[i]) break; } if (i == readnl_len) - return pos - start + readnl_len; - s = pos + 1; + return (pos - start)/size + readnl_len; + s = pos + size; } - pos = find_control_char(e, end, nl[0]); + pos = find_control_char(kind, e, end, nl[0]); if (pos == NULL) *consumed = len; else - *consumed = pos - start; + *consumed = (pos - start)/size; return -1; } } @@ -1709,14 +1690,15 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) chunked = 0; while (1) { - Py_UNICODE *ptr; + char *ptr; Py_ssize_t line_len; + int kind; Py_ssize_t consumed = 0; /* First, get some data if necessary */ res = 1; while (!self->decoded_chars || - !PyUnicode_GET_SIZE(self->decoded_chars)) { + !PyUnicode_GET_LENGTH(self->decoded_chars)) { res = textiowrapper_read_chunk(self); if (res < 0) goto error; @@ -1741,18 +1723,24 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) assert(self->decoded_chars_used == 0); line = PyUnicode_Concat(remaining, self->decoded_chars); start = 0; - offset_to_buffer = PyUnicode_GET_SIZE(remaining); + offset_to_buffer = PyUnicode_GET_LENGTH(remaining); Py_CLEAR(remaining); if (line == NULL) goto error; + if (PyUnicode_READY(line) == -1) + goto error; } - ptr = PyUnicode_AS_UNICODE(line); - line_len = PyUnicode_GET_SIZE(line); + ptr = PyUnicode_DATA(line); + line_len = PyUnicode_GET_LENGTH(line); + kind = PyUnicode_KIND(line); endpos = _PyIO_find_line_ending( self->readtranslate, self->readuniversal, self->readnl, - ptr + start, ptr + line_len, &consumed); + kind, + ptr + PyUnicode_KIND_SIZE(kind, start), + ptr + PyUnicode_KIND_SIZE(kind, line_len), + &consumed); if (endpos >= 0) { endpos += start; if (limit >= 0 && (endpos - start) + chunked >= limit) @@ -1776,21 +1764,20 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) if (chunks == NULL) goto error; } - s = PyUnicode_FromUnicode(ptr + start, endpos - start); + s = PyUnicode_Substring(line, start, endpos); if (s == NULL) goto error; if (PyList_Append(chunks, s) < 0) { Py_DECREF(s); goto error; } - chunked += PyUnicode_GET_SIZE(s); + chunked += PyUnicode_GET_LENGTH(s); Py_DECREF(s); } /* There may be some remaining bytes we'll have to prepend to the next chunk of data */ if (endpos < line_len) { - remaining = PyUnicode_FromUnicode( - ptr + endpos, line_len - endpos); + remaining = PyUnicode_Substring(line, endpos, line_len); if (remaining == NULL) goto error; } @@ -1802,19 +1789,12 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) if (line != NULL) { /* Our line ends in the current buffer */ self->decoded_chars_used = endpos - offset_to_buffer; - if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) { - if (start == 0 && Py_REFCNT(line) == 1) { - if (PyUnicode_Resize(&line, endpos) < 0) - goto error; - } - else { - PyObject *s = PyUnicode_FromUnicode( - PyUnicode_AS_UNICODE(line) + start, endpos - start); - Py_CLEAR(line); - if (s == NULL) - goto error; - line = s; - } + if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) { + PyObject *s = PyUnicode_Substring(line, start, endpos); + Py_CLEAR(line); + if (s == NULL) + goto error; + line = s; } } if (remaining != NULL) { @@ -1828,16 +1808,20 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) Py_CLEAR(remaining); } if (chunks != NULL) { - if (line != NULL && PyList_Append(chunks, line) < 0) - goto error; - Py_CLEAR(line); + if (line != NULL) { + if (PyList_Append(chunks, line) < 0) + goto error; + Py_DECREF(line); + } line = PyUnicode_Join(_PyIO_empty_str, chunks); if (line == NULL) goto error; - Py_DECREF(chunks); + Py_CLEAR(chunks); + } + if (line == NULL) { + Py_INCREF(_PyIO_empty_str); + line = _PyIO_empty_str; } - if (line == NULL) - line = PyUnicode_FromStringAndSize(NULL, 0); return line; @@ -2128,6 +2112,10 @@ textiowrapper_seek(textio *self, PyObject *args) if (decoded == NULL) goto fail; + if (PyUnicode_READY(decoded) == -1) { + Py_DECREF(decoded); + goto fail; + } textiowrapper_set_decoded_chars(self, decoded); @@ -2250,7 +2238,7 @@ textiowrapper_tell(textio *self, PyObject *args) if (_decoded == NULL) \ goto fail; \ assert (PyUnicode_Check(_decoded)); \ - res = PyUnicode_GET_SIZE(_decoded); \ + res = PyUnicode_GET_LENGTH(_decoded); \ Py_DECREF(_decoded); \ } while (0) @@ -2333,7 +2321,7 @@ textiowrapper_tell(textio *self, PyObject *args) if (decoded == NULL) goto fail; assert (PyUnicode_Check(decoded)); - chars_decoded += PyUnicode_GET_SIZE(decoded); + chars_decoded += PyUnicode_GET_LENGTH(decoded); Py_DECREF(decoded); cookie.need_eof = 1; @@ -2559,10 +2547,10 @@ textiowrapper_iternext(textio *self) } } - if (line == NULL) + if (line == NULL || PyUnicode_READY(line) == -1) return NULL; - if (PyUnicode_GET_SIZE(line) == 0) { + if (PyUnicode_GET_LENGTH(line) == 0) { /* Reached EOF or would have blocked */ Py_DECREF(line); Py_CLEAR(self->snapshot); |