diff options
author | Alexandre Vassalotti <alexandre@peadrop.com> | 2008-05-09 21:49:43 (GMT) |
---|---|---|
committer | Alexandre Vassalotti <alexandre@peadrop.com> | 2008-05-09 21:49:43 (GMT) |
commit | 1aed624f7c6051bc670a846825bd40108d3f8dd5 (patch) | |
tree | bfb494b5e311d0af801388ba4df04b0e716187c1 /Modules/_bytesio.c | |
parent | 81673b7b63973bfe386ce6966a69d40cf9c277bc (diff) | |
download | cpython-1aed624f7c6051bc670a846825bd40108d3f8dd5.zip cpython-1aed624f7c6051bc670a846825bd40108d3f8dd5.tar.gz cpython-1aed624f7c6051bc670a846825bd40108d3f8dd5.tar.bz2 |
Backport fast alternate io.BytesIO implementation.
Merged r62778, r62779, r62802, r62806, r62807, r62808, r62809, r62844,
r62846, r62952, r62956.
Diffstat (limited to 'Modules/_bytesio.c')
-rw-r--r-- | Modules/_bytesio.c | 755 |
1 files changed, 755 insertions, 0 deletions
diff --git a/Modules/_bytesio.c b/Modules/_bytesio.c new file mode 100644 index 0000000..8c5bb82 --- /dev/null +++ b/Modules/_bytesio.c @@ -0,0 +1,755 @@ +#include "Python.h" + +typedef struct { + PyObject_HEAD + char *buf; + Py_ssize_t pos; + Py_ssize_t string_size; + size_t buf_size; +} BytesIOObject; + +#define CHECK_CLOSED(self) \ + if ((self)->buf == NULL) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on closed file."); \ + return NULL; \ + } + +/* Internal routine to get a line from the buffer of a BytesIO + object. Returns the length between the current position to the + next newline character. */ +static Py_ssize_t +get_line(BytesIOObject *self, char **output) +{ + char *n; + const char *str_end; + Py_ssize_t len; + + assert(self->buf != NULL); + + /* Move to the end of the line, up to the end of the string, s. */ + str_end = self->buf + self->string_size; + for (n = self->buf + self->pos; + n < str_end && *n != '\n'; + n++); + + /* Skip the newline character */ + if (n < str_end) + n++; + + /* Get the length from the current position to the end of the line. */ + len = n - (self->buf + self->pos); + *output = self->buf + self->pos; + + assert(len >= 0); + assert(self->pos < PY_SSIZE_T_MAX - len); + self->pos += len; + + return len; +} + +/* Internal routine for changing the size of the buffer of BytesIO objects. + The caller should ensure that the 'size' argument is non-negative. Returns + 0 on success, -1 otherwise. */ +static int +resize_buffer(BytesIOObject *self, size_t size) +{ + /* Here, unsigned types are used to avoid dealing with signed integer + overflow, which is undefined in C. */ + size_t alloc = self->buf_size; + char *new_buf = NULL; + + assert(self->buf != NULL); + + /* For simplicity, stay in the range of the signed type. Anyway, Python + doesn't allow strings to be longer than this. */ + if (size > PY_SSIZE_T_MAX) + goto overflow; + + if (size < alloc / 2) { + /* Major downsize; resize down to exact size. */ + alloc = size + 1; + } + else if (size < alloc) { + /* Within allocated size; quick exit */ + return 0; + } + else if (size <= alloc * 1.125) { + /* Moderate upsize; overallocate similar to list_resize() */ + alloc = size + (size >> 3) + (size < 9 ? 3 : 6); + } + else { + /* Major upsize; resize up to exact size */ + alloc = size + 1; + } + + if (alloc > ((size_t)-1) / sizeof(char)) + goto overflow; + new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char)); + if (new_buf == NULL) { + PyErr_NoMemory(); + return -1; + } + self->buf_size = alloc; + self->buf = new_buf; + + return 0; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "new buffer size too large"); + return -1; +} + +/* Internal routine for writing a string of bytes to the buffer of a BytesIO + object. Returns the number of bytes wrote, or -1 on error. */ +static Py_ssize_t +write_bytes(BytesIOObject *self, const char *bytes, Py_ssize_t len) +{ + assert(self->buf != NULL); + assert(self->pos >= 0); + assert(len >= 0); + + if ((size_t)self->pos + len > self->buf_size) { + if (resize_buffer(self, (size_t)self->pos + len) < 0) + return -1; + } + + if (self->pos > self->string_size) { + /* In case of overseek, pad with null bytes the buffer region between + the end of stream and the current position. + + 0 lo string_size hi + | |<---used--->|<----------available----------->| + | | <--to pad-->|<---to write---> | + 0 buf position + */ + memset(self->buf + self->string_size, '\0', + (self->pos - self->string_size) * sizeof(char)); + } + + /* Copy the data to the internal buffer, overwriting some of the existing + data if self->pos < self->string_size. */ + memcpy(self->buf + self->pos, bytes, len); + self->pos += len; + + /* Set the new length of the internal string if it has changed. */ + if (self->string_size < self->pos) { + self->string_size = self->pos; + } + + return len; +} + +static PyObject * +bytesio_get_closed(BytesIOObject *self) +{ + if (self->buf == NULL) + Py_RETURN_TRUE; + else + Py_RETURN_FALSE; +} + +/* Generic getter for the writable, readable and seekable properties */ +static PyObject * +return_true(BytesIOObject *self) +{ + Py_RETURN_TRUE; +} + +PyDoc_STRVAR(flush_doc, +"flush() -> None. Does nothing."); + +static PyObject * +bytesio_flush(BytesIOObject *self) +{ + Py_RETURN_NONE; +} + +PyDoc_STRVAR(getval_doc, +"getvalue() -> bytes.\n" +"\n" +"Retrieve the entire contents of the BytesIO object."); + +static PyObject * +bytesio_getvalue(BytesIOObject *self) +{ + CHECK_CLOSED(self); + return PyString_FromStringAndSize(self->buf, self->string_size); +} + +PyDoc_STRVAR(isatty_doc, +"isatty() -> False.\n" +"\n" +"Always returns False since BytesIO objects are not connected\n" +"to a tty-like device."); + +static PyObject * +bytesio_isatty(BytesIOObject *self) +{ + CHECK_CLOSED(self); + Py_RETURN_FALSE; +} + +PyDoc_STRVAR(tell_doc, +"tell() -> current file position, an integer\n"); + +static PyObject * +bytesio_tell(BytesIOObject *self) +{ + CHECK_CLOSED(self); + return PyInt_FromSsize_t(self->pos); +} + +PyDoc_STRVAR(read_doc, +"read([size]) -> read at most size bytes, returned as a string.\n" +"\n" +"If the size argument is negative, read until EOF is reached.\n" +"Return an empty string at EOF."); + +static PyObject * +bytesio_read(BytesIOObject *self, PyObject *args) +{ + Py_ssize_t size, n; + char *output; + PyObject *arg = Py_None; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "|O:read", &arg)) + return NULL; + + if (PyInt_Check(arg)) { + size = PyInt_AsSsize_t(arg); + } + else if (arg == Py_None) { + /* Read until EOF is reached, by default. */ + size = -1; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + /* adjust invalid sizes */ + n = self->string_size - self->pos; + if (size < 0 || size > n) { + size = n; + if (size < 0) + size = 0; + } + + assert(self->buf != NULL); + output = self->buf + self->pos; + self->pos += size; + + return PyString_FromStringAndSize(output, size); +} + + +PyDoc_STRVAR(read1_doc, +"read1(size) -> read at most size bytes, returned as a string.\n" +"\n" +"If the size argument is negative or omitted, read until EOF is reached.\n" +"Return an empty string at EOF."); + +static PyObject * +bytesio_read1(BytesIOObject *self, PyObject *n) +{ + PyObject *arg, *res; + + arg = PyTuple_Pack(1, n); + if (arg == NULL) + return NULL; + res = bytesio_read(self, arg); + Py_DECREF(arg); + return res; +} + +PyDoc_STRVAR(readline_doc, +"readline([size]) -> next line from the file, as a string.\n" +"\n" +"Retain newline. A non-negative size argument limits the maximum\n" +"number of bytes to return (an incomplete line may be returned then).\n" +"Return an empty string at EOF.\n"); + +static PyObject * +bytesio_readline(BytesIOObject *self, PyObject *args) +{ + Py_ssize_t size, n; + char *output; + PyObject *arg = Py_None; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "|O:readline", &arg)) + return NULL; + + if (PyInt_Check(arg)) { + size = PyInt_AsSsize_t(arg); + } + else if (arg == Py_None) { + /* No size limit, by default. */ + size = -1; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + n = get_line(self, &output); + + if (size >= 0 && size < n) { + size = n - size; + n -= size; + self->pos -= size; + } + + return PyString_FromStringAndSize(output, n); +} + +PyDoc_STRVAR(readlines_doc, +"readlines([size]) -> list of strings, each a line from the file.\n" +"\n" +"Call readline() repeatedly and return a list of the lines so read.\n" +"The optional size argument, if given, is an approximate bound on the\n" +"total number of bytes in the lines returned.\n"); + +static PyObject * +bytesio_readlines(BytesIOObject *self, PyObject *args) +{ + Py_ssize_t maxsize, size, n; + PyObject *result, *line; + char *output; + PyObject *arg = Py_None; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "|O:readlines", &arg)) + return NULL; + + if (PyInt_Check(arg)) { + maxsize = PyInt_AsSsize_t(arg); + } + else if (arg == Py_None) { + /* No size limit, by default. */ + maxsize = -1; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + size = 0; + result = PyList_New(0); + if (!result) + return NULL; + + while ((n = get_line(self, &output)) != 0) { + line = PyString_FromStringAndSize(output, n); + if (!line) + goto on_error; + if (PyList_Append(result, line) == -1) { + Py_DECREF(line); + goto on_error; + } + Py_DECREF(line); + size += n; + if (maxsize > 0 && size >= maxsize) + break; + } + return result; + + on_error: + Py_DECREF(result); + return NULL; +} + +PyDoc_STRVAR(readinto_doc, +"readinto(bytearray) -> int. Read up to len(b) bytes into b.\n" +"\n" +"Returns number of bytes read (0 for EOF), or None if the object\n" +"is set not to block as has no data to read."); + +static PyObject * +bytesio_readinto(BytesIOObject *self, PyObject *buffer) +{ + void *raw_buffer; + Py_ssize_t len; + + CHECK_CLOSED(self); + + if (PyObject_AsWriteBuffer(buffer, &raw_buffer, &len) == -1) + return NULL; + + if (self->pos + len > self->string_size) + len = self->string_size - self->pos; + + memcpy(raw_buffer, self->buf + self->pos, len); + assert(self->pos + len < PY_SSIZE_T_MAX); + assert(len >= 0); + self->pos += len; + + return PyInt_FromSsize_t(len); +} + +PyDoc_STRVAR(truncate_doc, +"truncate([size]) -> int. Truncate the file to at most size bytes.\n" +"\n" +"Size defaults to the current file position, as returned by tell().\n" +"Returns the new size. Imply an absolute seek to the position size."); + +static PyObject * +bytesio_truncate(BytesIOObject *self, PyObject *args) +{ + Py_ssize_t size; + PyObject *arg = Py_None; + + CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) + return NULL; + + if (PyInt_Check(arg)) { + size = PyInt_AsSsize_t(arg); + } + else if (arg == Py_None) { + /* Truncate to current position if no argument is passed. */ + size = self->pos; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "negative size value %zd", size); + return NULL; + } + + if (size < self->string_size) { + self->string_size = size; + if (resize_buffer(self, size) < 0) + return NULL; + } + self->pos = size; + + return PyInt_FromSsize_t(size); +} + +static PyObject * +bytesio_iternext(BytesIOObject *self) +{ + char *next; + Py_ssize_t n; + + CHECK_CLOSED(self); + + n = get_line(self, &next); + + if (!next || n == 0) + return NULL; + + return PyString_FromStringAndSize(next, n); +} + +PyDoc_STRVAR(seek_doc, +"seek(pos, whence=0) -> int. Change stream position.\n" +"\n" +"Seek to byte offset pos relative to position indicated by whence:\n" +" 0 Start of stream (the default). pos should be >= 0;\n" +" 1 Current position - pos may be negative;\n" +" 2 End of stream - pos usually negative.\n" +"Returns the new absolute position."); + +static PyObject * +bytesio_seek(BytesIOObject *self, PyObject *args) +{ + PyObject *pos_obj, *mode_obj; + Py_ssize_t pos; + int mode = 0; + + CHECK_CLOSED(self); + + /* Special-case for 2.x to prevent floats from passing through. + This only needed to make a test in test_io succeed. */ + if (!PyArg_UnpackTuple(args, "seek", 1, 2, &pos_obj, &mode_obj)) + return NULL; + if (PyFloat_Check(pos_obj)) { + PyErr_SetString(PyExc_TypeError, + "position argument must be an integer"); + return NULL; + } + + if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode)) + return NULL; + + if (pos < 0 && mode == 0) { + PyErr_Format(PyExc_ValueError, + "negative seek value %zd", pos); + return NULL; + } + + /* mode 0: offset relative to beginning of the string. + mode 1: offset relative to current position. + mode 2: offset relative the end of the string. */ + if (mode == 1) { + if (pos > PY_SSIZE_T_MAX - self->pos) { + PyErr_SetString(PyExc_OverflowError, + "new position too large"); + return NULL; + } + pos += self->pos; + } + else if (mode == 2) { + if (pos > PY_SSIZE_T_MAX - self->string_size) { + PyErr_SetString(PyExc_OverflowError, + "new position too large"); + return NULL; + } + pos += self->string_size; + } + else if (mode != 0) { + PyErr_Format(PyExc_ValueError, + "invalid whence (%i, should be 0, 1 or 2)", mode); + return NULL; + } + + if (pos < 0) + pos = 0; + self->pos = pos; + + return PyInt_FromSsize_t(self->pos); +} + +PyDoc_STRVAR(write_doc, +"write(bytes) -> int. Write bytes to file.\n" +"\n" +"Return the number of bytes written."); + +static PyObject * +bytesio_write(BytesIOObject *self, PyObject *obj) +{ + const char *bytes; + Py_ssize_t size; + Py_ssize_t n = 0; + + CHECK_CLOSED(self); + + /* Special-case in 2.x to prevent unicode objects to pass through. */ + if (PyUnicode_Check(obj)) { + PyErr_SetString(PyExc_TypeError, + "expecting a bytes object, got unicode"); + return NULL; + } + + if (PyObject_AsReadBuffer(obj, (void *)&bytes, &size) < 0) + return NULL; + + if (size != 0) { + n = write_bytes(self, bytes, size); + if (n < 0) + return NULL; + } + + return PyInt_FromSsize_t(n); +} + +PyDoc_STRVAR(writelines_doc, +"writelines(sequence_of_strings) -> None. Write strings to the file.\n" +"\n" +"Note that newlines are not added. The sequence can be any iterable\n" +"object producing strings. This is equivalent to calling write() for\n" +"each string."); + +static PyObject * +bytesio_writelines(BytesIOObject *self, PyObject *v) +{ + PyObject *it, *item; + PyObject *ret; + + CHECK_CLOSED(self); + + it = PyObject_GetIter(v); + if (it == NULL) + return NULL; + + while ((item = PyIter_Next(it)) != NULL) { + ret = bytesio_write(self, item); + Py_DECREF(item); + if (ret == NULL) { + Py_DECREF(it); + return NULL; + } + Py_DECREF(ret); + } + Py_DECREF(it); + + /* See if PyIter_Next failed */ + if (PyErr_Occurred()) + return NULL; + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(close_doc, +"close() -> None. Disable all I/O operations."); + +static PyObject * +bytesio_close(BytesIOObject *self) +{ + if (self->buf != NULL) { + PyMem_Free(self->buf); + self->buf = NULL; + } + Py_RETURN_NONE; +} + +static void +bytesio_dealloc(BytesIOObject *self) +{ + if (self->buf != NULL) { + PyMem_Free(self->buf); + self->buf = NULL; + } + Py_TYPE(self)->tp_free(self); +} + +static PyObject * +bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + BytesIOObject *self; + + assert(type != NULL && type->tp_alloc != NULL); + self = (BytesIOObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + self->string_size = 0; + self->pos = 0; + self->buf_size = 0; + self->buf = (char *)PyMem_Malloc(0); + if (self->buf == NULL) { + Py_DECREF(self); + return PyErr_NoMemory(); + } + + return (PyObject *)self; +} + +static int +bytesio_init(BytesIOObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *initvalue = NULL; + + if (!PyArg_ParseTuple(args, "|O:BytesIO", &initvalue)) + return -1; + + /* In case, __init__ is called multiple times. */ + self->string_size = 0; + self->pos = 0; + + if (initvalue && initvalue != Py_None) { + PyObject *res; + res = bytesio_write(self, initvalue); + if (res == NULL) + return -1; + Py_DECREF(res); + self->pos = 0; + } + + return 0; +} + +static PyGetSetDef bytesio_getsetlist[] = { + {"closed", (getter)bytesio_get_closed, NULL, + "True if the file is closed."}, + {0}, /* sentinel */ +}; + +static struct PyMethodDef bytesio_methods[] = { + {"readable", (PyCFunction)return_true, METH_NOARGS, NULL}, + {"seekable", (PyCFunction)return_true, METH_NOARGS, NULL}, + {"writable", (PyCFunction)return_true, METH_NOARGS, NULL}, + {"close", (PyCFunction)bytesio_close, METH_NOARGS, close_doc}, + {"flush", (PyCFunction)bytesio_flush, METH_NOARGS, flush_doc}, + {"isatty", (PyCFunction)bytesio_isatty, METH_NOARGS, isatty_doc}, + {"tell", (PyCFunction)bytesio_tell, METH_NOARGS, tell_doc}, + {"write", (PyCFunction)bytesio_write, METH_O, write_doc}, + {"writelines", (PyCFunction)bytesio_writelines, METH_O, writelines_doc}, + {"read1", (PyCFunction)bytesio_read1, METH_O, read1_doc}, + {"readinto", (PyCFunction)bytesio_readinto, METH_O, readinto_doc}, + {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc}, + {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc}, + {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc}, + {"getvalue", (PyCFunction)bytesio_getvalue, METH_VARARGS, getval_doc}, + {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc}, + {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc}, + {NULL, NULL} /* sentinel */ +}; + +PyDoc_STRVAR(bytesio_doc, +"BytesIO([buffer]) -> object\n" +"\n" +"Create a buffered I/O implementation using an in-memory bytes\n" +"buffer, ready for reading and writing."); + +static PyTypeObject BytesIO_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_bytesio._BytesIO", /*tp_name*/ + sizeof(BytesIOObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)bytesio_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + bytesio_doc, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + PyObject_SelfIter, /*tp_iter*/ + (iternextfunc)bytesio_iternext, /*tp_iternext*/ + bytesio_methods, /*tp_methods*/ + 0, /*tp_members*/ + bytesio_getsetlist, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + (initproc)bytesio_init, /*tp_init*/ + 0, /*tp_alloc*/ + bytesio_new, /*tp_new*/ +}; + +PyMODINIT_FUNC +init_bytesio(void) +{ + PyObject *m; + + if (PyType_Ready(&BytesIO_Type) < 0) + return; + m = Py_InitModule("_bytesio", NULL); + if (m == NULL) + return; + Py_INCREF(&BytesIO_Type); + PyModule_AddObject(m, "_BytesIO", (PyObject *)&BytesIO_Type); +} |