diff options
author | Alexandre Vassalotti <alexandre@peadrop.com> | 2008-06-11 22:58:36 (GMT) |
---|---|---|
committer | Alexandre Vassalotti <alexandre@peadrop.com> | 2008-06-11 22:58:36 (GMT) |
commit | 794652dd064590d7188d93d9bf524ae9e1558386 (patch) | |
tree | a0011f0cfa8fef8fcda771eceadad50e1f95b42d /Modules/_stringio.c | |
parent | 502d89ed1518687861563293cb761d268321fa4a (diff) | |
download | cpython-794652dd064590d7188d93d9bf524ae9e1558386.zip cpython-794652dd064590d7188d93d9bf524ae9e1558386.tar.gz cpython-794652dd064590d7188d93d9bf524ae9e1558386.tar.bz2 |
Issue 2918: Merge StringIO and cStringIO.
Diffstat (limited to 'Modules/_stringio.c')
-rw-r--r-- | Modules/_stringio.c | 379 |
1 files changed, 379 insertions, 0 deletions
diff --git a/Modules/_stringio.c b/Modules/_stringio.c new file mode 100644 index 0000000..83fc79e --- /dev/null +++ b/Modules/_stringio.c @@ -0,0 +1,379 @@ +#include "Python.h" + +/* This module is a stripped down version of _bytesio.c with a Py_UNICODE + buffer. Most of the functionality is provided by subclassing _StringIO. */ + + +typedef struct { + PyObject_HEAD + Py_UNICODE *buf; + Py_ssize_t pos; + Py_ssize_t string_size; + size_t buf_size; +} StringIOObject; + + +/* Internal routine for changing the size, in terms of characters, of the + buffer of StringIO objects. The caller should ensure that the 'size' + argument is non-negative. Returns 0 on success, -1 otherwise. */ +static int +resize_buffer(StringIOObject *self, size_t size) +{ + /* Here, unsigned types are used to avoid dealing with signed integer + overflow, which is undefined in C. */ + size_t alloc = self->buf_size; + Py_UNICODE *new_buf = NULL; + + assert(self->buf != NULL); + + /* For simplicity, stay in the range of the signed type. Anyway, Python + doesn't allow strings to be longer than this. */ + if (size > PY_SSIZE_T_MAX) + goto overflow; + + if (size < alloc / 2) { + /* Major downsize; resize down to exact size. */ + alloc = size + 1; + } + else if (size < alloc) { + /* Within allocated size; quick exit */ + return 0; + } + else if (size <= alloc * 1.125) { + /* Moderate upsize; overallocate similar to list_resize() */ + alloc = size + (size >> 3) + (size < 9 ? 3 : 6); + } + else { + /* Major upsize; resize up to exact size */ + alloc = size + 1; + } + + if (alloc > ((size_t)-1) / sizeof(Py_UNICODE)) + goto overflow; + new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf, + alloc * sizeof(Py_UNICODE)); + if (new_buf == NULL) { + PyErr_NoMemory(); + return -1; + } + self->buf_size = alloc; + self->buf = new_buf; + + return 0; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "new buffer size too large"); + return -1; +} + +/* Internal routine for writing a string of characters to the buffer of a + StringIO object. Returns the number of bytes wrote, or -1 on error. */ +static Py_ssize_t +write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len) +{ + assert(self->buf != NULL); + assert(self->pos >= 0); + assert(len >= 0); + + /* This overflow check is not strictly necessary. However, it avoids us to + deal with funky things like comparing an unsigned and a signed + integer. */ + if (self->pos > PY_SSIZE_T_MAX - len) { + PyErr_SetString(PyExc_OverflowError, + "new position too large"); + return -1; + } + if (self->pos + len > self->string_size) { + if (resize_buffer(self, self->pos + len) < 0) + return -1; + } + + if (self->pos > self->string_size) { + /* In case of overseek, pad with null bytes the buffer region between + the end of stream and the current position. + + 0 lo string_size hi + | |<---used--->|<----------available----------->| + | | <--to pad-->|<---to write---> | + 0 buf positon + + */ + memset(self->buf + self->string_size, '\0', + (self->pos - self->string_size) * sizeof(Py_UNICODE)); + } + + /* Copy the data to the internal buffer, overwriting some of the + existing data if self->pos < self->string_size. */ + memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE)); + self->pos += len; + + /* Set the new length of the internal string if it has changed */ + if (self->string_size < self->pos) { + self->string_size = self->pos; + } + + return len; +} + +static PyObject * +stringio_getvalue(StringIOObject *self) +{ + return PyUnicode_FromUnicode(self->buf, self->string_size); +} + +static PyObject * +stringio_tell(StringIOObject *self) +{ + return PyLong_FromSsize_t(self->pos); +} + +static PyObject * +stringio_read(StringIOObject *self, PyObject *args) +{ + Py_ssize_t size, n; + Py_UNICODE *output; + PyObject *arg = Py_None; + + if (!PyArg_ParseTuple(args, "|O:read", &arg)) + return NULL; + + if (PyLong_Check(arg)) { + size = PyLong_AsSsize_t(arg); + } + else if (arg == Py_None) { + /* Read until EOF is reached, by default. */ + size = -1; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + /* adjust invalid sizes */ + n = self->string_size - self->pos; + if (size < 0 || size > n) { + size = n; + if (size < 0) + size = 0; + } + + assert(self->buf != NULL); + output = self->buf + self->pos; + self->pos += size; + + return PyUnicode_FromUnicode(output, size); +} + +static PyObject * +stringio_truncate(StringIOObject *self, PyObject *args) +{ + Py_ssize_t size; + PyObject *arg = Py_None; + + if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) + return NULL; + + if (PyLong_Check(arg)) { + size = PyLong_AsSsize_t(arg); + } + else if (arg == Py_None) { + /* Truncate to current position if no argument is passed. */ + size = self->pos; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "Negative size value %zd", size); + return NULL; + } + + if (size < self->string_size) { + self->string_size = size; + if (resize_buffer(self, size) < 0) + return NULL; + } + self->pos = size; + + return PyLong_FromSsize_t(size); +} + +static PyObject * +stringio_seek(StringIOObject *self, PyObject *args) +{ + Py_ssize_t pos; + int mode = 0; + + if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode)) + return NULL; + + if (mode != 0 && mode != 1 && mode != 2) { + PyErr_Format(PyExc_ValueError, + "Invalid whence (%i, should be 0, 1 or 2)", mode); + return NULL; + } + else if (pos < 0 && mode == 0) { + PyErr_Format(PyExc_ValueError, + "Negative seek position %zd", pos); + return NULL; + } + else if (mode != 0 && pos != 0) { + PyErr_SetString(PyExc_IOError, + "Can't do nonzero cur-relative seeks"); + return NULL; + } + + /* mode 0: offset relative to beginning of the string. + mode 1: no change to current position. + mode 2: change position to end of file. */ + if (mode == 1) { + pos = self->pos; + } + else if (mode == 2) { + pos = self->string_size; + } + + self->pos = pos; + + return PyLong_FromSsize_t(self->pos); +} + +static PyObject * +stringio_write(StringIOObject *self, PyObject *obj) +{ + const Py_UNICODE *str; + Py_ssize_t size; + Py_ssize_t n = 0; + + if (PyUnicode_Check(obj)) { + str = PyUnicode_AsUnicode(obj); + size = PyUnicode_GetSize(obj); + } + else { + PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'", + Py_TYPE(obj)->tp_name); + return NULL; + } + + if (size != 0) { + n = write_str(self, str, size); + if (n < 0) + return NULL; + } + + return PyLong_FromSsize_t(n); +} + +static void +stringio_dealloc(StringIOObject *self) +{ + PyMem_Free(self->buf); + Py_TYPE(self)->tp_free(self); +} + +static PyObject * +stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + StringIOObject *self; + + assert(type != NULL && type->tp_alloc != NULL); + self = (StringIOObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + self->string_size = 0; + self->pos = 0; + self->buf_size = 0; + self->buf = (Py_UNICODE *)PyMem_Malloc(0); + if (self->buf == NULL) { + Py_DECREF(self); + return PyErr_NoMemory(); + } + + return (PyObject *)self; +} + +static struct PyMethodDef stringio_methods[] = { + {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, NULL}, + {"read", (PyCFunction)stringio_read, METH_VARARGS, NULL}, + {"tell", (PyCFunction)stringio_tell, METH_NOARGS, NULL}, + {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, NULL}, + {"seek", (PyCFunction)stringio_seek, METH_VARARGS, NULL}, + {"write", (PyCFunction)stringio_write, METH_O, NULL}, + {NULL, NULL} /* sentinel */ +}; + +static PyTypeObject StringIO_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_stringio._StringIO", /*tp_name*/ + sizeof(StringIOObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)stringio_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + 0, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + stringio_methods, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + 0, /*tp_alloc*/ + stringio_new, /*tp_new*/ +}; + +static struct PyModuleDef _stringiomodule = { + PyModuleDef_HEAD_INIT, + "_stringio", + NULL, + -1, + NULL, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__stringio(void) +{ + PyObject *m; + + if (PyType_Ready(&StringIO_Type) < 0) + return NULL; + m = PyModule_Create(&_stringiomodule); + if (m == NULL) + return NULL; + Py_INCREF(&StringIO_Type); + if (PyModule_AddObject(m, "_StringIO", (PyObject *)&StringIO_Type) < 0) + return NULL; + return m; +} |