summaryrefslogtreecommitdiffstats
path: root/Modules/_stringio.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_stringio.c')
-rw-r--r--Modules/_stringio.c538
1 files changed, 462 insertions, 76 deletions
diff --git a/Modules/_stringio.c b/Modules/_stringio.c
index e627258..a88fcb7 100644
--- a/Modules/_stringio.c
+++ b/Modules/_stringio.c
@@ -1,8 +1,11 @@
+#define PY_SSIZE_T_CLEAN
#include "Python.h"
+#include "structmember.h"
+#include "_iomodule.h"
-/* This module is a stripped down version of _bytesio.c with a Py_UNICODE
- buffer. Most of the functionality is provided by subclassing _StringIO. */
-
+/* Implementation note: the buffer is always at least one character longer
+ than the enclosed string, for proper functioning of _PyIO_find_line_ending.
+*/
typedef struct {
PyObject_HEAD
@@ -10,8 +13,39 @@ typedef struct {
Py_ssize_t pos;
Py_ssize_t string_size;
size_t buf_size;
+
+ char ok; /* initialized? */
+ char closed;
+ char readuniversal;
+ char readtranslate;
+ PyObject *decoder;
+ PyObject *readnl;
+ PyObject *writenl;
+
+ PyObject *dict;
+ PyObject *weakreflist;
} StringIOObject;
+#define CHECK_INITIALIZED(self) \
+ if (self->ok <= 0) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on uninitialized object"); \
+ return NULL; \
+ }
+
+#define CHECK_CLOSED(self) \
+ if (self->closed) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on closed file"); \
+ return NULL; \
+ }
+
+PyDoc_STRVAR(stringio_doc,
+ "Text I/O implementation using an in-memory buffer.\n"
+ "\n"
+ "The initial_value argument sets the value of object. The newline\n"
+ "argument is like the one of TextIOWrapper's constructor.");
+
/* Internal routine for changing the size, in terms of characters, of the
buffer of StringIO objects. The caller should ensure that the 'size'
@@ -26,6 +60,8 @@ resize_buffer(StringIOObject *self, size_t size)
assert(self->buf != NULL);
+ /* Reserve one more char for line ending detection. */
+ size = size + 1;
/* For simplicity, stay in the range of the signed type. Anyway, Python
doesn't allow strings to be longer than this. */
if (size > PY_SSIZE_T_MAX)
@@ -67,13 +103,38 @@ resize_buffer(StringIOObject *self, size_t size)
return -1;
}
-/* Internal routine for writing a string of characters to the buffer of a
- StringIO object. Returns the number of bytes wrote, or -1 on error. */
+/* Internal routine for writing a whole PyUnicode object to the buffer of a
+ StringIO object. Returns 0 on success, or -1 on error. */
static Py_ssize_t
-write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len)
+write_str(StringIOObject *self, PyObject *obj)
{
+ Py_UNICODE *str;
+ Py_ssize_t len;
+ PyObject *decoded = NULL;
assert(self->buf != NULL);
assert(self->pos >= 0);
+
+ if (self->decoder != NULL) {
+ decoded = _PyIncrementalNewlineDecoder_decode(
+ self->decoder, obj, 1 /* always final */);
+ }
+ else {
+ decoded = obj;
+ Py_INCREF(decoded);
+ }
+ if (self->writenl) {
+ PyObject *translated = PyUnicode_Replace(
+ decoded, _PyIO_str_nl, self->writenl, -1);
+ Py_DECREF(decoded);
+ decoded = translated;
+ }
+ if (decoded == NULL)
+ return -1;
+
+ assert(PyUnicode_Check(decoded));
+ str = PyUnicode_AS_UNICODE(decoded);
+ len = PyUnicode_GET_SIZE(decoded);
+
assert(len >= 0);
/* This overflow check is not strictly necessary. However, it avoids us to
@@ -82,11 +143,11 @@ write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len)
if (self->pos > PY_SSIZE_T_MAX - len) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
- return -1;
+ goto fail;
}
if (self->pos + len > self->string_size) {
if (resize_buffer(self, self->pos + len) < 0)
- return -1;
+ goto fail;
}
if (self->pos > self->string_size) {
@@ -108,26 +169,47 @@ write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len)
memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE));
self->pos += len;
- /* Set the new length of the internal string if it has changed */
+ /* Set the new length of the internal string if it has changed. */
if (self->string_size < self->pos) {
self->string_size = self->pos;
}
- return len;
+ Py_DECREF(decoded);
+ return 0;
+
+fail:
+ Py_XDECREF(decoded);
+ return -1;
}
+PyDoc_STRVAR(stringio_getvalue_doc,
+ "Retrieve the entire contents of the object.");
+
static PyObject *
stringio_getvalue(StringIOObject *self)
{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
return PyUnicode_FromUnicode(self->buf, self->string_size);
}
+PyDoc_STRVAR(stringio_tell_doc,
+ "Tell the current file position.");
+
static PyObject *
stringio_tell(StringIOObject *self)
{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
return PyLong_FromSsize_t(self->pos);
}
+PyDoc_STRVAR(stringio_read_doc,
+ "Read at most n characters, returned as a string.\n"
+ "\n"
+ "If the argument is negative or omitted, read until EOF\n"
+ "is reached. Return an empty string at EOF.\n");
+
static PyObject *
stringio_read(StringIOObject *self, PyObject *args)
{
@@ -135,11 +217,13 @@ stringio_read(StringIOObject *self, PyObject *args)
Py_UNICODE *output;
PyObject *arg = Py_None;
+ CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|O:read", &arg))
return NULL;
+ CHECK_CLOSED(self);
- if (PyLong_Check(arg)) {
- size = PyLong_AsSsize_t(arg);
+ if (PyNumber_Check(arg)) {
+ size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred())
return NULL;
}
@@ -161,24 +245,127 @@ stringio_read(StringIOObject *self, PyObject *args)
size = 0;
}
- assert(self->buf != NULL);
output = self->buf + self->pos;
self->pos += size;
-
return PyUnicode_FromUnicode(output, size);
}
+/* Internal helper, used by stringio_readline and stringio_iternext */
+static PyObject *
+_stringio_readline(StringIOObject *self, Py_ssize_t limit)
+{
+ Py_UNICODE *start, *end, old_char;
+ Py_ssize_t len, consumed;
+
+ /* In case of overseek, return the empty string */
+ if (self->pos >= self->string_size)
+ return PyUnicode_FromString("");
+
+ start = self->buf + self->pos;
+ if (limit < 0 || limit > self->string_size - self->pos)
+ limit = self->string_size - self->pos;
+
+ end = start + limit;
+ old_char = *end;
+ *end = '\0';
+ len = _PyIO_find_line_ending(
+ self->readtranslate, self->readuniversal, self->readnl,
+ start, end, &consumed);
+ *end = old_char;
+ /* If we haven't found any line ending, we just return everything
+ (`consumed` is ignored). */
+ if (len < 0)
+ len = limit;
+ self->pos += len;
+ return PyUnicode_FromUnicode(start, len);
+}
+
+PyDoc_STRVAR(stringio_readline_doc,
+ "Read until newline or EOF.\n"
+ "\n"
+ "Returns an empty string if EOF is hit immediately.\n");
+
+static PyObject *
+stringio_readline(StringIOObject *self, PyObject *args)
+{
+ PyObject *arg = Py_None;
+ Py_ssize_t limit = -1;
+
+ CHECK_INITIALIZED(self);
+ if (!PyArg_ParseTuple(args, "|O:readline", &arg))
+ return NULL;
+ CHECK_CLOSED(self);
+
+ if (PyNumber_Check(arg)) {
+ limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
+ if (limit == -1 && PyErr_Occurred())
+ return NULL;
+ }
+ else if (arg != Py_None) {
+ PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
+ Py_TYPE(arg)->tp_name);
+ return NULL;
+ }
+ return _stringio_readline(self, limit);
+}
+
+static PyObject *
+stringio_iternext(StringIOObject *self)
+{
+ PyObject *line;
+
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+
+ if (Py_TYPE(self) == &PyStringIO_Type) {
+ /* Skip method call overhead for speed */
+ line = _stringio_readline(self, -1);
+ }
+ else {
+ /* XXX is subclassing StringIO really supported? */
+ line = PyObject_CallMethodObjArgs((PyObject *)self,
+ _PyIO_str_readline, NULL);
+ if (line && !PyUnicode_Check(line)) {
+ PyErr_Format(PyExc_IOError,
+ "readline() should have returned an str object, "
+ "not '%.200s'", Py_TYPE(line)->tp_name);
+ Py_DECREF(line);
+ return NULL;
+ }
+ }
+
+ if (line == NULL)
+ return NULL;
+
+ if (PyUnicode_GET_SIZE(line) == 0) {
+ /* Reached EOF */
+ Py_DECREF(line);
+ return NULL;
+ }
+
+ return line;
+}
+
+PyDoc_STRVAR(stringio_truncate_doc,
+ "Truncate size to pos.\n"
+ "\n"
+ "The pos argument defaults to the current file position, as\n"
+ "returned by tell(). Imply an absolute seek to pos.\n"
+ "Returns the new absolute position.\n");
+
static PyObject *
stringio_truncate(StringIOObject *self, PyObject *args)
{
Py_ssize_t size;
PyObject *arg = Py_None;
+ CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
return NULL;
+ CHECK_CLOSED(self);
- if (PyLong_Check(arg)) {
- size = PyLong_AsSsize_t(arg);
+ if (PyNumber_Check(arg)) {
+ size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred())
return NULL;
}
@@ -199,23 +386,34 @@ stringio_truncate(StringIOObject *self, PyObject *args)
}
if (size < self->string_size) {
- self->string_size = size;
if (resize_buffer(self, size) < 0)
return NULL;
+ self->string_size = size;
}
self->pos = size;
return PyLong_FromSsize_t(size);
}
+PyDoc_STRVAR(stringio_seek_doc,
+ "Change stream position.\n"
+ "\n"
+ "Seek to character offset pos relative to position indicated by whence:\n"
+ " 0 Start of stream (the default). pos should be >= 0;\n"
+ " 1 Current position - pos must be 0;\n"
+ " 2 End of stream - pos must be 0.\n"
+ "Returns the new absolute position.\n");
+
static PyObject *
stringio_seek(StringIOObject *self, PyObject *args)
{
Py_ssize_t pos;
int mode = 0;
+ CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
return NULL;
+ CHECK_CLOSED(self);
if (mode != 0 && mode != 1 && mode != 2) {
PyErr_Format(PyExc_ValueError,
@@ -248,36 +446,76 @@ stringio_seek(StringIOObject *self, PyObject *args)
return PyLong_FromSsize_t(self->pos);
}
+PyDoc_STRVAR(stringio_write_doc,
+ "Write string to file.\n"
+ "\n"
+ "Returns the number of characters written, which is always equal to\n"
+ "the length of the string.\n");
+
static PyObject *
stringio_write(StringIOObject *self, PyObject *obj)
{
- const Py_UNICODE *str;
Py_ssize_t size;
- Py_ssize_t n = 0;
- if (PyUnicode_Check(obj)) {
- str = PyUnicode_AsUnicode(obj);
- size = PyUnicode_GetSize(obj);
- }
- else {
+ CHECK_INITIALIZED(self);
+ if (!PyUnicode_Check(obj)) {
PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
Py_TYPE(obj)->tp_name);
return NULL;
}
+ CHECK_CLOSED(self);
+ size = PyUnicode_GET_SIZE(obj);
- if (size != 0) {
- n = write_str(self, str, size);
- if (n < 0)
- return NULL;
- }
+ if (size > 0 && write_str(self, obj) < 0)
+ return NULL;
+
+ return PyLong_FromSsize_t(size);
+}
+
+PyDoc_STRVAR(stringio_close_doc,
+ "Close the IO object. Attempting any further operation after the\n"
+ "object is closed will raise a ValueError.\n"
+ "\n"
+ "This method has no effect if the file is already closed.\n");
+
+static PyObject *
+stringio_close(StringIOObject *self)
+{
+ self->closed = 1;
+ /* Free up some memory */
+ if (resize_buffer(self, 0) < 0)
+ return NULL;
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+ Py_RETURN_NONE;
+}
+
+static int
+stringio_traverse(StringIOObject *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->dict);
+ return 0;
+}
- return PyLong_FromSsize_t(n);
+static int
+stringio_clear(StringIOObject *self)
+{
+ Py_CLEAR(self->dict);
+ return 0;
}
static void
stringio_dealloc(StringIOObject *self)
{
- PyMem_Free(self->buf);
+ _PyObject_GC_UNTRACK(self);
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+ if (self->buf)
+ PyMem_Free(self->buf);
+ if (self->weakreflist != NULL)
+ PyObject_ClearWeakRefs((PyObject *) self);
Py_TYPE(self)->tp_free(self);
}
@@ -303,19 +541,194 @@ stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return (PyObject *)self;
}
+static int
+stringio_init(StringIOObject *self, PyObject *args, PyObject *kwds)
+{
+ char *kwlist[] = {"initial_value", "newline", NULL};
+ PyObject *value = NULL;
+ char *newline = "\n";
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oz:__init__", kwlist,
+ &value, &newline))
+ return -1;
+
+ if (newline && newline[0] != '\0'
+ && !(newline[0] == '\n' && newline[1] == '\0')
+ && !(newline[0] == '\r' && newline[1] == '\0')
+ && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
+ PyErr_Format(PyExc_ValueError,
+ "illegal newline value: %s", newline);
+ return -1;
+ }
+ if (value && value != Py_None && !PyUnicode_Check(value)) {
+ PyErr_Format(PyExc_ValueError,
+ "initial_value must be str or None, not %.200s",
+ Py_TYPE(value)->tp_name);
+ return -1;
+ }
+
+ self->ok = 0;
+
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+
+ if (newline) {
+ self->readnl = PyUnicode_FromString(newline);
+ if (self->readnl == NULL)
+ return -1;
+ }
+ self->readuniversal = (newline == NULL || newline[0] == '\0');
+ self->readtranslate = (newline == NULL);
+ /* If newline == "", we don't translate anything.
+ If newline == "\n" or newline == None, we translate to "\n", which is
+ a no-op.
+ (for newline == None, TextIOWrapper translates to os.sepline, but it
+ is pointless for StringIO)
+ */
+ if (newline != NULL && newline[0] == '\r') {
+ self->writenl = self->readnl;
+ Py_INCREF(self->writenl);
+ }
+
+ if (self->readuniversal) {
+ self->decoder = PyObject_CallFunction(
+ (PyObject *)&PyIncrementalNewlineDecoder_Type,
+ "Oi", Py_None, (int) self->readtranslate);
+ if (self->decoder == NULL)
+ return -1;
+ }
+
+ /* Now everything is set up, resize buffer to size of initial value,
+ and copy it */
+ self->string_size = 0;
+ if (value && value != Py_None) {
+ Py_ssize_t len = PyUnicode_GetSize(value);
+ /* This is a heuristic, for newline translation might change
+ the string length. */
+ if (resize_buffer(self, len) < 0)
+ return -1;
+ self->pos = 0;
+ if (write_str(self, value) < 0)
+ return -1;
+ }
+ else {
+ if (resize_buffer(self, 0) < 0)
+ return -1;
+ }
+ self->pos = 0;
+
+ self->closed = 0;
+ self->ok = 1;
+ return 0;
+}
+
+/* Properties and pseudo-properties */
+static PyObject *
+stringio_seekable(StringIOObject *self, PyObject *args)
+{
+ CHECK_INITIALIZED(self);
+ Py_RETURN_TRUE;
+}
+
+static PyObject *
+stringio_readable(StringIOObject *self, PyObject *args)
+{
+ CHECK_INITIALIZED(self);
+ Py_RETURN_TRUE;
+}
+
+static PyObject *
+stringio_writable(StringIOObject *self, PyObject *args)
+{
+ CHECK_INITIALIZED(self);
+ Py_RETURN_TRUE;
+}
+
+static PyObject *
+stringio_buffer(StringIOObject *self, void *context)
+{
+ PyErr_SetString(IO_STATE->unsupported_operation,
+ "buffer attribute is unsupported on type StringIO");
+ return NULL;
+}
+
+static PyObject *
+stringio_closed(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ return PyBool_FromLong(self->closed);
+}
+
+static PyObject *
+stringio_encoding(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ return PyUnicode_FromString("utf-8");
+}
+
+static PyObject *
+stringio_errors(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ return PyUnicode_FromString("strict");
+}
+
+static PyObject *
+stringio_line_buffering(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ Py_RETURN_FALSE;
+}
+
+static PyObject *
+stringio_newlines(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ if (self->decoder == NULL)
+ Py_RETURN_NONE;
+ return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
+}
+
static struct PyMethodDef stringio_methods[] = {
- {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, NULL},
- {"read", (PyCFunction)stringio_read, METH_VARARGS, NULL},
- {"tell", (PyCFunction)stringio_tell, METH_NOARGS, NULL},
- {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, NULL},
- {"seek", (PyCFunction)stringio_seek, METH_VARARGS, NULL},
- {"write", (PyCFunction)stringio_write, METH_O, NULL},
+ {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
+ {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, stringio_getvalue_doc},
+ {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
+ {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
+ {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
+ {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
+ {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
+ {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
+
+ {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
+ {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
+ {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
{NULL, NULL} /* sentinel */
};
-static PyTypeObject StringIO_Type = {
+static PyGetSetDef stringio_getset[] = {
+ {"closed", (getter)stringio_closed, NULL, NULL},
+ {"newlines", (getter)stringio_newlines, NULL, NULL},
+ /* (following comments straight off of the original Python wrapper:)
+ XXX Cruft to support the TextIOWrapper API. This would only
+ be meaningful if StringIO supported the buffer attribute.
+ Hopefully, a better solution, than adding these pseudo-attributes,
+ will be found.
+ */
+ {"buffer", (getter)stringio_buffer, NULL, NULL},
+ {"encoding", (getter)stringio_encoding, NULL, NULL},
+ {"errors", (getter)stringio_errors, NULL, NULL},
+ {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
+ {0}
+};
+
+PyTypeObject PyStringIO_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
- "_stringio._StringIO", /*tp_name*/
+ "_io.StringIO", /*tp_name*/
sizeof(StringIOObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)stringio_dealloc, /*tp_dealloc*/
@@ -333,51 +746,24 @@ static PyTypeObject StringIO_Type = {
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
- 0, /*tp_doc*/
- 0, /*tp_traverse*/
- 0, /*tp_clear*/
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
+ | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+ stringio_doc, /*tp_doc*/
+ (traverseproc)stringio_traverse, /*tp_traverse*/
+ (inquiry)stringio_clear, /*tp_clear*/
0, /*tp_richcompare*/
- 0, /*tp_weaklistoffset*/
+ offsetof(StringIOObject, weakreflist), /*tp_weaklistoffset*/
0, /*tp_iter*/
- 0, /*tp_iternext*/
+ (iternextfunc)stringio_iternext, /*tp_iternext*/
stringio_methods, /*tp_methods*/
0, /*tp_members*/
- 0, /*tp_getset*/
+ stringio_getset, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
- 0, /*tp_dictoffset*/
- 0, /*tp_init*/
+ offsetof(StringIOObject, dict), /*tp_dictoffset*/
+ (initproc)stringio_init, /*tp_init*/
0, /*tp_alloc*/
stringio_new, /*tp_new*/
};
-
-static struct PyModuleDef _stringiomodule = {
- PyModuleDef_HEAD_INIT,
- "_stringio",
- NULL,
- -1,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
-};
-
-PyMODINIT_FUNC
-PyInit__stringio(void)
-{
- PyObject *m;
-
- if (PyType_Ready(&StringIO_Type) < 0)
- return NULL;
- m = PyModule_Create(&_stringiomodule);
- if (m == NULL)
- return NULL;
- Py_INCREF(&StringIO_Type);
- if (PyModule_AddObject(m, "_StringIO", (PyObject *)&StringIO_Type) < 0)
- return NULL;
- return m;
-}