summaryrefslogtreecommitdiffstats
path: root/Modules/_io
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2011-09-28 05:41:54 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2011-09-28 05:41:54 (GMT)
commitd63a3b8beb4a0841cb59fb3515347ccaab34b733 (patch)
tree3b4e3cc63151c5a5a910c3550a190aefaea96ad4 /Modules/_io
parent48d49497c50e79d14e9df9527d766ca3a0a38be5 (diff)
downloadcpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.zip
cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.tar.gz
cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.tar.bz2
Implement PEP 393.
Diffstat (limited to 'Modules/_io')
-rw-r--r--Modules/_io/_iomodule.h2
-rw-r--r--Modules/_io/stringio.c69
-rw-r--r--Modules/_io/textio.c352
3 files changed, 213 insertions, 210 deletions
diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h
index 9174bdd..4e97dd1 100644
--- a/Modules/_io/_iomodule.h
+++ b/Modules/_io/_iomodule.h
@@ -55,7 +55,7 @@ extern PyObject *_PyIncrementalNewlineDecoder_decode(
Otherwise, the function will scan further and return garbage. */
extern Py_ssize_t _PyIO_find_line_ending(
int translated, int universal, PyObject *readnl,
- Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed);
+ int kind, char *start, char *end, Py_ssize_t *consumed);
#define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */
diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c
index c9d14b1..c40163f 100644
--- a/Modules/_io/stringio.c
+++ b/Modules/_io/stringio.c
@@ -9,7 +9,7 @@
typedef struct {
PyObject_HEAD
- Py_UNICODE *buf;
+ Py_UCS4 *buf;
Py_ssize_t pos;
Py_ssize_t string_size;
size_t buf_size;
@@ -21,7 +21,7 @@ typedef struct {
PyObject *decoder;
PyObject *readnl;
PyObject *writenl;
-
+
PyObject *dict;
PyObject *weakreflist;
} stringio;
@@ -56,7 +56,7 @@ resize_buffer(stringio *self, size_t size)
/* Here, unsigned types are used to avoid dealing with signed integer
overflow, which is undefined in C. */
size_t alloc = self->buf_size;
- Py_UNICODE *new_buf = NULL;
+ Py_UCS4 *new_buf = NULL;
assert(self->buf != NULL);
@@ -84,10 +84,9 @@ resize_buffer(stringio *self, size_t size)
alloc = size + 1;
}
- if (alloc > ((size_t)-1) / sizeof(Py_UNICODE))
+ if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
goto overflow;
- new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf,
- alloc * sizeof(Py_UNICODE));
+ new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
if (new_buf == NULL) {
PyErr_NoMemory();
return -1;
@@ -108,9 +107,9 @@ resize_buffer(stringio *self, size_t size)
static Py_ssize_t
write_str(stringio *self, PyObject *obj)
{
- Py_UNICODE *str;
Py_ssize_t len;
PyObject *decoded = NULL;
+
assert(self->buf != NULL);
assert(self->pos >= 0);
@@ -132,8 +131,7 @@ write_str(stringio *self, PyObject *obj)
return -1;
assert(PyUnicode_Check(decoded));
- str = PyUnicode_AS_UNICODE(decoded);
- len = PyUnicode_GET_SIZE(decoded);
+ len = PyUnicode_GET_LENGTH(decoded);
assert(len >= 0);
@@ -161,18 +159,21 @@ write_str(stringio *self, PyObject *obj)
*/
memset(self->buf + self->string_size, '\0',
- (self->pos - self->string_size) * sizeof(Py_UNICODE));
+ (self->pos - self->string_size) * sizeof(Py_UCS4));
}
/* Copy the data to the internal buffer, overwriting some of the
existing data if self->pos < self->string_size. */
- memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE));
- self->pos += len;
+ if (!PyUnicode_AsUCS4(decoded,
+ self->buf + self->pos,
+ self->buf_size - self->pos,
+ 0))
+ goto fail;
/* Set the new length of the internal string if it has changed. */
- if (self->string_size < self->pos) {
+ self->pos += len;
+ if (self->string_size < self->pos)
self->string_size = self->pos;
- }
Py_DECREF(decoded);
return 0;
@@ -190,7 +191,8 @@ stringio_getvalue(stringio *self)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
- return PyUnicode_FromUnicode(self->buf, self->string_size);
+ return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
+ self->string_size);
}
PyDoc_STRVAR(stringio_tell_doc,
@@ -214,7 +216,7 @@ static PyObject *
stringio_read(stringio *self, PyObject *args)
{
Py_ssize_t size, n;
- Py_UNICODE *output;
+ Py_UCS4 *output;
PyObject *arg = Py_None;
CHECK_INITIALIZED(self);
@@ -247,19 +249,19 @@ stringio_read(stringio *self, PyObject *args)
output = self->buf + self->pos;
self->pos += size;
- return PyUnicode_FromUnicode(output, size);
+ return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
}
/* Internal helper, used by stringio_readline and stringio_iternext */
static PyObject *
_stringio_readline(stringio *self, Py_ssize_t limit)
{
- Py_UNICODE *start, *end, old_char;
+ Py_UCS4 *start, *end, old_char;
Py_ssize_t len, consumed;
/* In case of overseek, return the empty string */
if (self->pos >= self->string_size)
- return PyUnicode_FromString("");
+ return PyUnicode_New(0, 0);
start = self->buf + self->pos;
if (limit < 0 || limit > self->string_size - self->pos)
@@ -270,14 +272,14 @@ _stringio_readline(stringio *self, Py_ssize_t limit)
*end = '\0';
len = _PyIO_find_line_ending(
self->readtranslate, self->readuniversal, self->readnl,
- start, end, &consumed);
+ PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
*end = old_char;
/* If we haven't found any line ending, we just return everything
(`consumed` is ignored). */
if (len < 0)
len = limit;
self->pos += len;
- return PyUnicode_FromUnicode(start, len);
+ return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
}
PyDoc_STRVAR(stringio_readline_doc,
@@ -462,8 +464,10 @@ stringio_write(stringio *self, PyObject *obj)
Py_TYPE(obj)->tp_name);
return NULL;
}
+ if (PyUnicode_READY(obj))
+ return NULL;
CHECK_CLOSED(self);
- size = PyUnicode_GET_SIZE(obj);
+ size = PyUnicode_GET_LENGTH(obj);
if (size > 0 && write_str(self, obj) < 0)
return NULL;
@@ -535,7 +539,7 @@ stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
/* tp_alloc initializes all the fields to zero. So we don't have to
initialize them here. */
- self->buf = (Py_UNICODE *)PyMem_Malloc(0);
+ self->buf = (Py_UCS4 *)PyMem_Malloc(0);
if (self->buf == NULL) {
Py_DECREF(self);
return PyErr_NoMemory();
@@ -747,11 +751,22 @@ stringio_setstate(stringio *self, PyObject *state)
once by __init__. So we do not take any chance and replace object's
buffer completely. */
{
- Py_UNICODE *buf = PyUnicode_AS_UNICODE(PyTuple_GET_ITEM(state, 0));
- Py_ssize_t bufsize = PyUnicode_GET_SIZE(PyTuple_GET_ITEM(state, 0));
- if (resize_buffer(self, bufsize) < 0)
+ PyObject *item;
+ Py_UCS4 *buf;
+ Py_ssize_t bufsize;
+
+ item = PyTuple_GET_ITEM(state, 0);
+ buf = PyUnicode_AsUCS4Copy(item);
+ if (buf == NULL)
return NULL;
- memcpy(self->buf, buf, bufsize * sizeof(Py_UNICODE));
+ bufsize = PyUnicode_GET_LENGTH(item);
+
+ if (resize_buffer(self, bufsize) < 0) {
+ PyMem_Free(buf);
+ return NULL;
+ }
+ memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
+ PyMem_Free(buf);
self->string_size = bufsize;
}
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 13d4bd9..9c06ec8 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -274,18 +274,28 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
goto error;
}
- output_len = PyUnicode_GET_SIZE(output);
+ if (PyUnicode_READY(output) == -1)
+ goto error;
+
+ output_len = PyUnicode_GET_LENGTH(output);
if (self->pendingcr && (final || output_len > 0)) {
- Py_UNICODE *out;
- PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
+ /* Prefix output with CR */
+ int kind;
+ PyObject *modified;
+ char *out;
+
+ modified = PyUnicode_New(output_len + 1,
+ PyUnicode_MAX_CHAR_VALUE(output));
if (modified == NULL)
goto error;
- out = PyUnicode_AS_UNICODE(modified);
- out[0] = '\r';
- memcpy(out + 1, PyUnicode_AS_UNICODE(output),
- output_len * sizeof(Py_UNICODE));
+ kind = PyUnicode_KIND(modified);
+ out = PyUnicode_DATA(modified);
+ PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
+ memcpy(out + PyUnicode_KIND_SIZE(kind, 1),
+ PyUnicode_DATA(output),
+ PyUnicode_KIND_SIZE(kind, output_len));
Py_DECREF(output);
- output = modified;
+ output = modified; /* output remains ready */
self->pendingcr = 0;
output_len++;
}
@@ -295,21 +305,13 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
*/
if (!final) {
if (output_len > 0
- && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
-
- if (Py_REFCNT(output) == 1) {
- if (PyUnicode_Resize(&output, output_len - 1) < 0)
- goto error;
- }
- else {
- PyObject *modified = PyUnicode_FromUnicode(
- PyUnicode_AS_UNICODE(output),
- output_len - 1);
- if (modified == NULL)
- goto error;
- Py_DECREF(output);
- output = modified;
- }
+ && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
+ {
+ PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
+ if (modified == NULL)
+ goto error;
+ Py_DECREF(output);
+ output = modified;
self->pendingcr = 1;
}
}
@@ -317,13 +319,15 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
/* Record which newlines are read and do newline translation if desired,
all in one pass. */
{
- Py_UNICODE *in_str;
+ void *in_str;
Py_ssize_t len;
int seennl = self->seennl;
int only_lf = 0;
+ int kind;
- in_str = PyUnicode_AS_UNICODE(output);
- len = PyUnicode_GET_SIZE(output);
+ in_str = PyUnicode_DATA(output);
+ len = PyUnicode_GET_LENGTH(output);
+ kind = PyUnicode_KIND(output);
if (len == 0)
return output;
@@ -332,7 +336,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
for the \r *byte* with the libc's optimized memchr.
*/
if (seennl == SEEN_LF || seennl == 0) {
- only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
+ only_lf = (memchr(in_str, '\r', PyUnicode_KIND_SIZE(kind, len)) == NULL);
}
if (only_lf) {
@@ -340,21 +344,19 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
(there's nothing else to be done, even when in translation mode)
*/
if (seennl == 0 &&
- memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
- Py_UNICODE *s, *end;
- s = in_str;
- end = in_str + len;
+ memchr(in_str, '\n', PyUnicode_KIND_SIZE(kind, len)) != NULL) {
+ Py_ssize_t i = 0;
for (;;) {
Py_UNICODE c;
/* Fast loop for non-control characters */
- while (*s > '\n')
- s++;
- c = *s++;
+ while (PyUnicode_READ(kind, in_str, i) > '\n')
+ i++;
+ c = PyUnicode_READ(kind, in_str, i++);
if (c == '\n') {
seennl |= SEEN_LF;
break;
}
- if (s > end)
+ if (i >= len)
break;
}
}
@@ -362,29 +364,27 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
need translating */
}
else if (!self->translate) {
- Py_UNICODE *s, *end;
+ Py_ssize_t i = 0;
/* We have already seen all newline types, no need to scan again */
if (seennl == SEEN_ALL)
goto endscan;
- s = in_str;
- end = in_str + len;
for (;;) {
- Py_UNICODE c;
+ Py_UCS4 c;
/* Fast loop for non-control characters */
- while (*s > '\r')
- s++;
- c = *s++;
+ while (PyUnicode_READ(kind, in_str, i) > '\r')
+ i++;
+ c = PyUnicode_READ(kind, in_str, i++);
if (c == '\n')
seennl |= SEEN_LF;
else if (c == '\r') {
- if (*s == '\n') {
+ if (PyUnicode_READ(kind, in_str, i) == '\n') {
seennl |= SEEN_CRLF;
- s++;
+ i++;
}
else
seennl |= SEEN_CR;
}
- if (s > end)
+ if (i >= len)
break;
if (seennl == SEEN_ALL)
break;
@@ -393,61 +393,50 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
;
}
else {
- PyObject *translated = NULL;
- Py_UNICODE *out_str;
- Py_UNICODE *in, *out, *end;
- if (Py_REFCNT(output) != 1) {
- /* We could try to optimize this so that we only do a copy
- when there is something to translate. On the other hand,
- most decoders should only output non-shared strings, i.e.
- translation is done in place. */
- translated = PyUnicode_FromUnicode(NULL, len);
- if (translated == NULL)
- goto error;
- assert(Py_REFCNT(translated) == 1);
- memcpy(PyUnicode_AS_UNICODE(translated),
- PyUnicode_AS_UNICODE(output),
- len * sizeof(Py_UNICODE));
- }
- else {
- translated = output;
+ void *translated;
+ int kind = PyUnicode_KIND(output);
+ void *in_str = PyUnicode_DATA(output);
+ Py_ssize_t in, out;
+ /* XXX: Previous in-place translation here is disabled as
+ resizing is not possible anymore */
+ /* We could try to optimize this so that we only do a copy
+ when there is something to translate. On the other hand,
+ we already know there is a \r byte, so chances are high
+ that something needs to be done. */
+ translated = PyMem_Malloc(PyUnicode_KIND_SIZE(kind, len));
+ if (translated == NULL) {
+ PyErr_NoMemory();
+ goto error;
}
- out_str = PyUnicode_AS_UNICODE(translated);
- in = in_str;
- out = out_str;
- end = in_str + len;
+ in = out = 0;
for (;;) {
- Py_UNICODE c;
+ Py_UCS4 c;
/* Fast loop for non-control characters */
- while ((c = *in++) > '\r')
- *out++ = c;
+ while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
+ PyUnicode_WRITE(kind, translated, out++, c);
if (c == '\n') {
- *out++ = c;
+ PyUnicode_WRITE(kind, translated, out++, c);
seennl |= SEEN_LF;
continue;
}
if (c == '\r') {
- if (*in == '\n') {
+ if (PyUnicode_READ(kind, in_str, in) == '\n') {
in++;
seennl |= SEEN_CRLF;
}
else
seennl |= SEEN_CR;
- *out++ = '\n';
+ PyUnicode_WRITE(kind, translated, out++, '\n');
continue;
}
- if (in > end)
+ if (in > len)
break;
- *out++ = c;
- }
- if (translated != output) {
- Py_DECREF(output);
- output = translated;
- }
- if (out - out_str != len) {
- if (PyUnicode_Resize(&output, out - out_str) < 0)
- goto error;
+ PyUnicode_WRITE(kind, translated, out++, c);
}
+ Py_DECREF(output);
+ output = PyUnicode_FromKindAndData(kind, translated, out);
+ if (!output)
+ goto error;
}
self->seennl |= seennl;
}
@@ -705,9 +694,7 @@ typedef struct
static PyObject *
ascii_encode(textio *self, PyObject *text)
{
- return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
- PyUnicode_GET_SIZE(text),
- PyBytes_AS_STRING(self->errors));
+ return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
}
static PyObject *
@@ -777,17 +764,13 @@ utf32_encode(textio *self, PyObject *text)
static PyObject *
utf8_encode(textio *self, PyObject *text)
{
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
- PyUnicode_GET_SIZE(text),
- PyBytes_AS_STRING(self->errors));
+ return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
}
static PyObject *
latin1_encode(textio *self, PyObject *text)
{
- return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
- PyUnicode_GET_SIZE(text),
- PyBytes_AS_STRING(self->errors));
+ return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
}
/* Map normalized encoding names onto the specialized encoding funcs */
@@ -1213,18 +1196,6 @@ textiowrapper_detach(textio *self)
return buffer;
}
-Py_LOCAL_INLINE(const Py_UNICODE *)
-findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
-{
- /* like wcschr, but doesn't stop at NULL characters */
- while (size-- > 0) {
- if (*s == ch)
- return s;
- s++;
- }
- return NULL;
-}
-
/* Flush the internal write buffer. This doesn't explicitly flush the
underlying buffered object, though. */
static int
@@ -1269,6 +1240,9 @@ textiowrapper_write(textio *self, PyObject *args)
return NULL;
}
+ if (PyUnicode_READY(text) == -1)
+ return NULL;
+
CHECK_CLOSED(self);
if (self->encoder == NULL)
@@ -1276,11 +1250,10 @@ textiowrapper_write(textio *self, PyObject *args)
Py_INCREF(text);
- textlen = PyUnicode_GetSize(text);
+ textlen = PyUnicode_GET_LENGTH(text);
if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
- if (findchar(PyUnicode_AS_UNICODE(text),
- PyUnicode_GET_SIZE(text), '\n'))
+ if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
haslf = 1;
if (haslf && self->writetranslate && self->writenl != NULL) {
@@ -1296,8 +1269,7 @@ textiowrapper_write(textio *self, PyObject *args)
needflush = 1;
else if (self->line_buffering &&
(haslf ||
- findchar(PyUnicode_AS_UNICODE(text),
- PyUnicode_GET_SIZE(text), '\r')))
+ PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
needflush = 1;
/* XXX What if we were just reading? */
@@ -1369,7 +1341,8 @@ textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
if (self->decoded_chars == NULL)
return PyUnicode_FromStringAndSize(NULL, 0);
- avail = (PyUnicode_GET_SIZE(self->decoded_chars)
+ /* decoded_chars is guaranteed to be "ready". */
+ avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
- self->decoded_chars_used);
assert(avail >= 0);
@@ -1378,9 +1351,9 @@ textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
n = avail;
if (self->decoded_chars_used > 0 || n < avail) {
- chars = PyUnicode_FromUnicode(
- PyUnicode_AS_UNICODE(self->decoded_chars)
- + self->decoded_chars_used, n);
+ chars = PyUnicode_Substring(self->decoded_chars,
+ self->decoded_chars_used,
+ self->decoded_chars_used + n);
if (chars == NULL)
return NULL;
}
@@ -1464,8 +1437,10 @@ textiowrapper_read_chunk(textio *self)
/* TODO sanity check: isinstance(decoded_chars, unicode) */
if (decoded_chars == NULL)
goto fail;
+ if (PyUnicode_READY(decoded_chars) == -1)
+ goto fail;
textiowrapper_set_decoded_chars(self, decoded_chars);
- nchars = PyUnicode_GET_SIZE(decoded_chars);
+ nchars = PyUnicode_GET_LENGTH(decoded_chars);
if (nchars > 0)
self->b2cratio = (double) nbytes / nchars;
else
@@ -1553,7 +1528,9 @@ textiowrapper_read(textio *self, PyObject *args)
result = textiowrapper_get_decoded_chars(self, n);
if (result == NULL)
goto fail;
- remaining -= PyUnicode_GET_SIZE(result);
+ if (PyUnicode_READY(result) == -1)
+ goto fail;
+ remaining -= PyUnicode_GET_LENGTH(result);
/* Keep reading chunks until we have n characters to return */
while (remaining > 0) {
@@ -1573,7 +1550,7 @@ textiowrapper_read(textio *self, PyObject *args)
result = textiowrapper_get_decoded_chars(self, remaining);
if (result == NULL)
goto fail;
- remaining -= PyUnicode_GET_SIZE(result);
+ remaining -= PyUnicode_GET_LENGTH(result);
}
if (chunks != NULL) {
if (result != NULL && PyList_Append(chunks, result) < 0)
@@ -1596,33 +1573,34 @@ textiowrapper_read(textio *self, PyObject *args)
/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
that is to the NUL character. Otherwise the function will produce
incorrect results. */
-static Py_UNICODE *
-find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
+static char *
+find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
{
- Py_UNICODE *s = start;
+ int size = PyUnicode_KIND_SIZE(kind, 1);
for (;;) {
- while (*s > ch)
- s++;
- if (*s == ch)
+ while (PyUnicode_READ(kind, s, 0) > ch)
+ s += size;
+ if (PyUnicode_READ(kind, s, 0) == ch)
return s;
if (s == end)
return NULL;
- s++;
+ s += size;
}
}
Py_ssize_t
_PyIO_find_line_ending(
int translated, int universal, PyObject *readnl,
- Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
+ int kind, char *start, char *end, Py_ssize_t *consumed)
{
- Py_ssize_t len = end - start;
+ int size = PyUnicode_KIND_SIZE(kind, 1);
+ Py_ssize_t len = ((char*)end - (char*)start)/size;
if (translated) {
/* Newlines are already translated, only search for \n */
- Py_UNICODE *pos = find_control_char(start, end, '\n');
+ char *pos = find_control_char(kind, start, end, '\n');
if (pos != NULL)
- return pos - start + 1;
+ return (pos - start)/size + 1;
else {
*consumed = len;
return -1;
@@ -1632,63 +1610,66 @@ _PyIO_find_line_ending(
/* Universal newline search. Find any of \r, \r\n, \n
* The decoder ensures that \r\n are not split in two pieces
*/
- Py_UNICODE *s = start;
+ char *s = start;
for (;;) {
- Py_UNICODE ch;
+ Py_UCS4 ch;
/* Fast path for non-control chars. The loop always ends
since the Py_UNICODE storage is NUL-terminated. */
- while (*s > '\r')
- s++;
+ while (PyUnicode_READ(kind, s, 0) > '\r')
+ s += size;
if (s >= end) {
*consumed = len;
return -1;
}
- ch = *s++;
+ ch = PyUnicode_READ(kind, s, 0);
+ s += size;
if (ch == '\n')
- return s - start;
+ return (s - start)/size;
if (ch == '\r') {
- if (*s == '\n')
- return s - start + 1;
+ if (PyUnicode_READ(kind, s, 0) == '\n')
+ return (s - start)/size + 1;
else
- return s - start;
+ return (s - start)/size;
}
}
}
else {
/* Non-universal mode. */
- Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
- Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
+ Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
+ char *nl = PyUnicode_DATA(readnl);
+ /* Assume that readnl is an ASCII character. */
+ assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
if (readnl_len == 1) {
- Py_UNICODE *pos = find_control_char(start, end, nl[0]);
+ char *pos = find_control_char(kind, start, end, nl[0]);
if (pos != NULL)
- return pos - start + 1;
+ return (pos - start)/size + 1;
*consumed = len;
return -1;
}
else {
- Py_UNICODE *s = start;
- Py_UNICODE *e = end - readnl_len + 1;
- Py_UNICODE *pos;
+ char *s = start;
+ char *e = end - (readnl_len - 1)*size;
+ char *pos;
if (e < s)
e = s;
while (s < e) {
Py_ssize_t i;
- Py_UNICODE *pos = find_control_char(s, end, nl[0]);
+ char *pos = find_control_char(kind, s, end, nl[0]);
if (pos == NULL || pos >= e)
break;
for (i = 1; i < readnl_len; i++) {
- if (pos[i] != nl[i])
+ if (PyUnicode_READ(kind, pos, i) != nl[i])
break;
}
if (i == readnl_len)
- return pos - start + readnl_len;
- s = pos + 1;
+ return (pos - start)/size + readnl_len;
+ s = pos + size;
}
- pos = find_control_char(e, end, nl[0]);
+ pos = find_control_char(kind, e, end, nl[0]);
if (pos == NULL)
*consumed = len;
else
- *consumed = pos - start;
+ *consumed = (pos - start)/size;
return -1;
}
}
@@ -1709,14 +1690,15 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
chunked = 0;
while (1) {
- Py_UNICODE *ptr;
+ char *ptr;
Py_ssize_t line_len;
+ int kind;
Py_ssize_t consumed = 0;
/* First, get some data if necessary */
res = 1;
while (!self->decoded_chars ||
- !PyUnicode_GET_SIZE(self->decoded_chars)) {
+ !PyUnicode_GET_LENGTH(self->decoded_chars)) {
res = textiowrapper_read_chunk(self);
if (res < 0)
goto error;
@@ -1741,18 +1723,24 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
assert(self->decoded_chars_used == 0);
line = PyUnicode_Concat(remaining, self->decoded_chars);
start = 0;
- offset_to_buffer = PyUnicode_GET_SIZE(remaining);
+ offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Py_CLEAR(remaining);
if (line == NULL)
goto error;
+ if (PyUnicode_READY(line) == -1)
+ goto error;
}
- ptr = PyUnicode_AS_UNICODE(line);
- line_len = PyUnicode_GET_SIZE(line);
+ ptr = PyUnicode_DATA(line);
+ line_len = PyUnicode_GET_LENGTH(line);
+ kind = PyUnicode_KIND(line);
endpos = _PyIO_find_line_ending(
self->readtranslate, self->readuniversal, self->readnl,
- ptr + start, ptr + line_len, &consumed);
+ kind,
+ ptr + PyUnicode_KIND_SIZE(kind, start),
+ ptr + PyUnicode_KIND_SIZE(kind, line_len),
+ &consumed);
if (endpos >= 0) {
endpos += start;
if (limit >= 0 && (endpos - start) + chunked >= limit)
@@ -1776,21 +1764,20 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
if (chunks == NULL)
goto error;
}
- s = PyUnicode_FromUnicode(ptr + start, endpos - start);
+ s = PyUnicode_Substring(line, start, endpos);
if (s == NULL)
goto error;
if (PyList_Append(chunks, s) < 0) {
Py_DECREF(s);
goto error;
}
- chunked += PyUnicode_GET_SIZE(s);
+ chunked += PyUnicode_GET_LENGTH(s);
Py_DECREF(s);
}
/* There may be some remaining bytes we'll have to prepend to the
next chunk of data */
if (endpos < line_len) {
- remaining = PyUnicode_FromUnicode(
- ptr + endpos, line_len - endpos);
+ remaining = PyUnicode_Substring(line, endpos, line_len);
if (remaining == NULL)
goto error;
}
@@ -1802,19 +1789,12 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
if (line != NULL) {
/* Our line ends in the current buffer */
self->decoded_chars_used = endpos - offset_to_buffer;
- if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
- if (start == 0 && Py_REFCNT(line) == 1) {
- if (PyUnicode_Resize(&line, endpos) < 0)
- goto error;
- }
- else {
- PyObject *s = PyUnicode_FromUnicode(
- PyUnicode_AS_UNICODE(line) + start, endpos - start);
- Py_CLEAR(line);
- if (s == NULL)
- goto error;
- line = s;
- }
+ if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
+ PyObject *s = PyUnicode_Substring(line, start, endpos);
+ Py_CLEAR(line);
+ if (s == NULL)
+ goto error;
+ line = s;
}
}
if (remaining != NULL) {
@@ -1828,16 +1808,20 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
Py_CLEAR(remaining);
}
if (chunks != NULL) {
- if (line != NULL && PyList_Append(chunks, line) < 0)
- goto error;
- Py_CLEAR(line);
+ if (line != NULL) {
+ if (PyList_Append(chunks, line) < 0)
+ goto error;
+ Py_DECREF(line);
+ }
line = PyUnicode_Join(_PyIO_empty_str, chunks);
if (line == NULL)
goto error;
- Py_DECREF(chunks);
+ Py_CLEAR(chunks);
+ }
+ if (line == NULL) {
+ Py_INCREF(_PyIO_empty_str);
+ line = _PyIO_empty_str;
}
- if (line == NULL)
- line = PyUnicode_FromStringAndSize(NULL, 0);
return line;
@@ -2128,6 +2112,10 @@ textiowrapper_seek(textio *self, PyObject *args)
if (decoded == NULL)
goto fail;
+ if (PyUnicode_READY(decoded) == -1) {
+ Py_DECREF(decoded);
+ goto fail;
+ }
textiowrapper_set_decoded_chars(self, decoded);
@@ -2250,7 +2238,7 @@ textiowrapper_tell(textio *self, PyObject *args)
if (_decoded == NULL) \
goto fail; \
assert (PyUnicode_Check(_decoded)); \
- res = PyUnicode_GET_SIZE(_decoded); \
+ res = PyUnicode_GET_LENGTH(_decoded); \
Py_DECREF(_decoded); \
} while (0)
@@ -2333,7 +2321,7 @@ textiowrapper_tell(textio *self, PyObject *args)
if (decoded == NULL)
goto fail;
assert (PyUnicode_Check(decoded));
- chars_decoded += PyUnicode_GET_SIZE(decoded);
+ chars_decoded += PyUnicode_GET_LENGTH(decoded);
Py_DECREF(decoded);
cookie.need_eof = 1;
@@ -2559,10 +2547,10 @@ textiowrapper_iternext(textio *self)
}
}
- if (line == NULL)
+ if (line == NULL || PyUnicode_READY(line) == -1)
return NULL;
- if (PyUnicode_GET_SIZE(line) == 0) {
+ if (PyUnicode_GET_LENGTH(line) == 0) {
/* Reached EOF or would have blocked */
Py_DECREF(line);
Py_CLEAR(self->snapshot);