diff options
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_io/_iomodule.c | 735 | ||||
-rw-r--r-- | Modules/_io/_iomodule.h | 146 | ||||
-rw-r--r-- | Modules/_io/bufferedio.c | 2289 | ||||
-rw-r--r-- | Modules/_io/bytesio.c (renamed from Modules/_bytesio.c) | 199 | ||||
-rw-r--r-- | Modules/_io/fileio.c (renamed from Modules/_fileio.c) | 415 | ||||
-rw-r--r-- | Modules/_io/iobase.c | 894 | ||||
-rw-r--r-- | Modules/_io/stringio.c | 756 | ||||
-rw-r--r-- | Modules/_io/textio.c | 2606 |
8 files changed, 7793 insertions, 247 deletions
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c new file mode 100644 index 0000000..7b43022 --- /dev/null +++ b/Modules/_io/_iomodule.c @@ -0,0 +1,735 @@ +/* + An implementation of the new I/O lib as defined by PEP 3116 - "New I/O" + + Classes defined here: UnsupportedOperation, BlockingIOError. + Functions defined here: open(). + + Mostly written by Amaury Forgeot d'Arc +*/ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "_iomodule.h" + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif /* HAVE_SYS_TYPES_H */ + +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif /* HAVE_SYS_STAT_H */ + + +/* Various interned strings */ + +PyObject *_PyIO_str_close; +PyObject *_PyIO_str_closed; +PyObject *_PyIO_str_decode; +PyObject *_PyIO_str_encode; +PyObject *_PyIO_str_fileno; +PyObject *_PyIO_str_flush; +PyObject *_PyIO_str_getstate; +PyObject *_PyIO_str_isatty; +PyObject *_PyIO_str_newlines; +PyObject *_PyIO_str_nl; +PyObject *_PyIO_str_read; +PyObject *_PyIO_str_read1; +PyObject *_PyIO_str_readable; +PyObject *_PyIO_str_readinto; +PyObject *_PyIO_str_readline; +PyObject *_PyIO_str_reset; +PyObject *_PyIO_str_seek; +PyObject *_PyIO_str_seekable; +PyObject *_PyIO_str_setstate; +PyObject *_PyIO_str_tell; +PyObject *_PyIO_str_truncate; +PyObject *_PyIO_str_writable; +PyObject *_PyIO_str_write; + +PyObject *_PyIO_empty_str; +PyObject *_PyIO_empty_bytes; +PyObject *_PyIO_zero; + + +PyDoc_STRVAR(module_doc, +"The io module provides the Python interfaces to stream handling. The\n" +"builtin open function is defined in this module.\n" +"\n" +"At the top of the I/O hierarchy is the abstract base class IOBase. It\n" +"defines the basic interface to a stream. Note, however, that there is no\n" +"seperation between reading and writing to streams; implementations are\n" +"allowed to throw an IOError if they do not support a given operation.\n" +"\n" +"Extending IOBase is RawIOBase which deals simply with the reading and\n" +"writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide\n" +"an interface to OS files.\n" +"\n" +"BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its\n" +"subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer\n" +"streams that are readable, writable, and both respectively.\n" +"BufferedRandom provides a buffered interface to random access\n" +"streams. BytesIO is a simple stream of in-memory bytes.\n" +"\n" +"Another IOBase subclass, TextIOBase, deals with the encoding and decoding\n" +"of streams into text. TextIOWrapper, which extends it, is a buffered text\n" +"interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO\n" +"is a in-memory stream for text.\n" +"\n" +"Argument names are not part of the specification, and only the arguments\n" +"of open() are intended to be used as keyword arguments.\n" +"\n" +"data:\n" +"\n" +"DEFAULT_BUFFER_SIZE\n" +"\n" +" An int containing the default buffer size used by the module's buffered\n" +" I/O classes. open() uses the file's blksize (as obtained by os.stat) if\n" +" possible.\n" + ); + + +/* + * BlockingIOError extends IOError + */ + +static int +blockingioerror_init(PyBlockingIOErrorObject *self, PyObject *args, + PyObject *kwds) +{ + PyObject *myerrno = NULL, *strerror = NULL; + PyObject *baseargs = NULL; + Py_ssize_t written = 0; + + assert(PyTuple_Check(args)); + + self->written = 0; + if (!PyArg_ParseTuple(args, "OO|n:BlockingIOError", + &myerrno, &strerror, &written)) + return -1; + + baseargs = PyTuple_Pack(2, myerrno, strerror); + if (baseargs == NULL) + return -1; + /* This will take care of initializing of myerrno and strerror members */ + if (((PyTypeObject *)PyExc_IOError)->tp_init( + (PyObject *)self, baseargs, kwds) == -1) { + Py_DECREF(baseargs); + return -1; + } + Py_DECREF(baseargs); + + self->written = written; + return 0; +} + +static PyMemberDef blockingioerror_members[] = { + {"characters_written", T_PYSSIZET, offsetof(PyBlockingIOErrorObject, written), 0}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject _PyExc_BlockingIOError = { + PyVarObject_HEAD_INIT(NULL, 0) + "BlockingIOError", /*tp_name*/ + sizeof(PyBlockingIOErrorObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + PyDoc_STR("Exception raised when I/O would block " + "on a non-blocking I/O stream"), /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + blockingioerror_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)blockingioerror_init, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; +PyObject *PyExc_BlockingIOError = (PyObject *)&_PyExc_BlockingIOError; + + +/* + * The main open() function + */ +PyDoc_STRVAR(open_doc, +"Open file and return a stream. Raise IOError upon failure.\n" +"\n" +"file is either a text or byte string giving the name (and the path\n" +"if the file isn't in the current working directory) of the file to\n" +"be opened or an integer file descriptor of the file to be\n" +"wrapped. (If a file descriptor is given, it is closed when the\n" +"returned I/O object is closed, unless closefd is set to False.)\n" +"\n" +"mode is an optional string that specifies the mode in which the file\n" +"is opened. It defaults to 'r' which means open for reading in text\n" +"mode. Other common values are 'w' for writing (truncating the file if\n" +"it already exists), and 'a' for appending (which on some Unix systems,\n" +"means that all writes append to the end of the file regardless of the\n" +"current seek position). In text mode, if encoding is not specified the\n" +"encoding used is platform dependent. (For reading and writing raw\n" +"bytes use binary mode and leave encoding unspecified.) The available\n" +"modes are:\n" +"\n" +"========= ===============================================================\n" +"Character Meaning\n" +"--------- ---------------------------------------------------------------\n" +"'r' open for reading (default)\n" +"'w' open for writing, truncating the file first\n" +"'a' open for writing, appending to the end of the file if it exists\n" +"'b' binary mode\n" +"'t' text mode (default)\n" +"'+' open a disk file for updating (reading and writing)\n" +"'U' universal newline mode (for backwards compatibility; unneeded\n" +" for new code)\n" +"========= ===============================================================\n" +"\n" +"The default mode is 'rt' (open for reading text). For binary random\n" +"access, the mode 'w+b' opens and truncates the file to 0 bytes, while\n" +"'r+b' opens the file without truncation.\n" +"\n" +"Python distinguishes between files opened in binary and text modes,\n" +"even when the underlying operating system doesn't. Files opened in\n" +"binary mode (appending 'b' to the mode argument) return contents as\n" +"bytes objects without any decoding. In text mode (the default, or when\n" +"'t' is appended to the mode argument), the contents of the file are\n" +"returned as strings, the bytes having been first decoded using a\n" +"platform-dependent encoding or using the specified encoding if given.\n" +"\n" +"buffering is an optional integer used to set the buffering policy. By\n" +"default full buffering is on. Pass 0 to switch buffering off (only\n" +"allowed in binary mode), 1 to set line buffering, and an integer > 1\n" +"for full buffering.\n" +"\n" +"encoding is the name of the encoding used to decode or encode the\n" +"file. This should only be used in text mode. The default encoding is\n" +"platform dependent, but any encoding supported by Python can be\n" +"passed. See the codecs module for the list of supported encodings.\n" +"\n" +"errors is an optional string that specifies how encoding errors are to\n" +"be handled---this argument should not be used in binary mode. Pass\n" +"'strict' to raise a ValueError exception if there is an encoding error\n" +"(the default of None has the same effect), or pass 'ignore' to ignore\n" +"errors. (Note that ignoring encoding errors can lead to data loss.)\n" +"See the documentation for codecs.register for a list of the permitted\n" +"encoding error strings.\n" +"\n" +"newline controls how universal newlines works (it only applies to text\n" +"mode). It can be None, '', '\\n', '\\r', and '\\r\\n'. It works as\n" +"follows:\n" +"\n" +"* On input, if newline is None, universal newlines mode is\n" +" enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n" +" these are translated into '\\n' before being returned to the\n" +" caller. If it is '', universal newline mode is enabled, but line\n" +" endings are returned to the caller untranslated. If it has any of\n" +" the other legal values, input lines are only terminated by the given\n" +" string, and the line ending is returned to the caller untranslated.\n" +"\n" +"* On output, if newline is None, any '\\n' characters written are\n" +" translated to the system default line separator, os.linesep. If\n" +" newline is '', no translation takes place. If newline is any of the\n" +" other legal values, any '\\n' characters written are translated to\n" +" the given string.\n" +"\n" +"If closefd is False, the underlying file descriptor will be kept open\n" +"when the file is closed. This does not work when a file name is given\n" +"and must be True in that case.\n" +"\n" +"open() returns a file object whose type depends on the mode, and\n" +"through which the standard file operations such as reading and writing\n" +"are performed. When open() is used to open a file in a text mode ('w',\n" +"'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open\n" +"a file in a binary mode, the returned class varies: in read binary\n" +"mode, it returns a BufferedReader; in write binary and append binary\n" +"modes, it returns a BufferedWriter, and in read/write mode, it returns\n" +"a BufferedRandom.\n" +"\n" +"It is also possible to use a string or bytearray as a file for both\n" +"reading and writing. For strings StringIO can be used like a file\n" +"opened in a text mode, and for bytes a BytesIO can be used like a file\n" +"opened in a binary mode.\n" + ); + +static PyObject * +io_open(PyObject *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"file", "mode", "buffering", + "encoding", "errors", "newline", + "closefd", NULL}; + PyObject *file; + char *mode = "r"; + int buffering = -1, closefd = 1; + char *encoding = NULL, *errors = NULL, *newline = NULL; + unsigned i; + + int reading = 0, writing = 0, appending = 0, updating = 0; + int text = 0, binary = 0, universal = 0; + + char rawmode[5], *m; + int line_buffering, isatty; + + PyObject *raw, *modeobj = NULL, *buffer = NULL, *wrapper = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|sizzzi:open", kwlist, + &file, &mode, &buffering, + &encoding, &errors, &newline, + &closefd)) { + return NULL; + } + + if (!PyUnicode_Check(file) && + !PyBytes_Check(file) && + !PyNumber_Check(file)) { + PyObject *repr = PyObject_Repr(file); + if (repr != NULL) { + PyErr_Format(PyExc_TypeError, "invalid file: %s", + PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + return NULL; + } + + /* Decode mode */ + for (i = 0; i < strlen(mode); i++) { + char c = mode[i]; + + switch (c) { + case 'r': + reading = 1; + break; + case 'w': + writing = 1; + break; + case 'a': + appending = 1; + break; + case '+': + updating = 1; + break; + case 't': + text = 1; + break; + case 'b': + binary = 1; + break; + case 'U': + universal = 1; + reading = 1; + break; + default: + goto invalid_mode; + } + + /* c must not be duplicated */ + if (strchr(mode+i+1, c)) { + invalid_mode: + PyErr_Format(PyExc_ValueError, "invalid mode: '%s'", mode); + return NULL; + } + + } + + m = rawmode; + if (reading) *(m++) = 'r'; + if (writing) *(m++) = 'w'; + if (appending) *(m++) = 'a'; + if (updating) *(m++) = '+'; + *m = '\0'; + + /* Parameters validation */ + if (universal) { + if (writing || appending) { + PyErr_SetString(PyExc_ValueError, + "can't use U and writing mode at once"); + return NULL; + } + reading = 1; + } + + if (text && binary) { + PyErr_SetString(PyExc_ValueError, + "can't have text and binary mode at once"); + return NULL; + } + + if (reading + writing + appending > 1) { + PyErr_SetString(PyExc_ValueError, + "must have exactly one of read/write/append mode"); + return NULL; + } + + if (binary && encoding != NULL) { + PyErr_SetString(PyExc_ValueError, + "binary mode doesn't take an encoding argument"); + return NULL; + } + + if (binary && errors != NULL) { + PyErr_SetString(PyExc_ValueError, + "binary mode doesn't take an errors argument"); + return NULL; + } + + if (binary && newline != NULL) { + PyErr_SetString(PyExc_ValueError, + "binary mode doesn't take a newline argument"); + return NULL; + } + + /* Create the Raw file stream */ + raw = PyObject_CallFunction((PyObject *)&PyFileIO_Type, + "Osi", file, rawmode, closefd); + if (raw == NULL) + return NULL; + + modeobj = PyUnicode_FromString(mode); + if (modeobj == NULL) + goto error; + + /* buffering */ + { + PyObject *res = PyObject_CallMethod(raw, "isatty", NULL); + if (res == NULL) + goto error; + isatty = PyLong_AsLong(res); + Py_DECREF(res); + if (isatty == -1 && PyErr_Occurred()) + goto error; + } + + if (buffering == 1 || (buffering < 0 && isatty)) { + buffering = -1; + line_buffering = 1; + } + else + line_buffering = 0; + + if (buffering < 0) { + buffering = DEFAULT_BUFFER_SIZE; +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + { + struct stat st; + long fileno; + PyObject *res = PyObject_CallMethod(raw, "fileno", NULL); + if (res == NULL) + goto error; + + fileno = PyInt_AsLong(res); + Py_DECREF(res); + if (fileno == -1 && PyErr_Occurred()) + goto error; + + if (fstat(fileno, &st) >= 0) + buffering = st.st_blksize; + } +#endif + } + if (buffering < 0) { + PyErr_SetString(PyExc_ValueError, + "invalid buffering size"); + goto error; + } + + /* if not buffering, returns the raw file object */ + if (buffering == 0) { + if (!binary) { + PyErr_SetString(PyExc_ValueError, + "can't have unbuffered text I/O"); + goto error; + } + + Py_DECREF(modeobj); + return raw; + } + + /* wraps into a buffered file */ + { + PyObject *Buffered_class; + + if (updating) + Buffered_class = (PyObject *)&PyBufferedRandom_Type; + else if (writing || appending) + Buffered_class = (PyObject *)&PyBufferedWriter_Type; + else if (reading) + Buffered_class = (PyObject *)&PyBufferedReader_Type; + else { + PyErr_Format(PyExc_ValueError, + "unknown mode: '%s'", mode); + goto error; + } + + buffer = PyObject_CallFunction(Buffered_class, "Oi", raw, buffering); + } + Py_CLEAR(raw); + if (buffer == NULL) + goto error; + + + /* if binary, returns the buffered file */ + if (binary) { + Py_DECREF(modeobj); + return buffer; + } + + /* wraps into a TextIOWrapper */ + wrapper = PyObject_CallFunction((PyObject *)&PyTextIOWrapper_Type, + "Osssi", + buffer, + encoding, errors, newline, + line_buffering); + Py_CLEAR(buffer); + if (wrapper == NULL) + goto error; + + if (PyObject_SetAttrString(wrapper, "mode", modeobj) < 0) + goto error; + Py_DECREF(modeobj); + return wrapper; + + error: + Py_XDECREF(raw); + Py_XDECREF(modeobj); + Py_XDECREF(buffer); + Py_XDECREF(wrapper); + return NULL; +} + +/* + * Private helpers for the io module. + */ + +Py_off_t +PyNumber_AsOff_t(PyObject *item, PyObject *err) +{ + Py_off_t result; + PyObject *runerr; + PyObject *value = PyNumber_Index(item); + if (value == NULL) + return -1; + + if (PyInt_Check(value)) { + /* We assume a long always fits in a Py_off_t... */ + result = (Py_off_t) PyInt_AS_LONG(value); + goto finish; + } + + /* We're done if PyLong_AsSsize_t() returns without error. */ + result = PyLong_AsOff_t(value); + if (result != -1 || !(runerr = PyErr_Occurred())) + goto finish; + + /* Error handling code -- only manage OverflowError differently */ + if (!PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) + goto finish; + + PyErr_Clear(); + /* If no error-handling desired then the default clipping + is sufficient. + */ + if (!err) { + assert(PyLong_Check(value)); + /* Whether or not it is less than or equal to + zero is determined by the sign of ob_size + */ + if (_PyLong_Sign(value) < 0) + result = PY_OFF_T_MIN; + else + result = PY_OFF_T_MAX; + } + else { + /* Otherwise replace the error with caller's error object. */ + PyErr_Format(err, + "cannot fit '%.200s' into an offset-sized integer", + item->ob_type->tp_name); + } + + finish: + Py_DECREF(value); + return result; +} + + +/* + * Module definition + */ + +PyObject *_PyIO_os_module = NULL; +PyObject *_PyIO_locale_module = NULL; +PyObject *_PyIO_unsupported_operation = NULL; + +static PyMethodDef module_methods[] = { + {"open", (PyCFunction)io_open, METH_VARARGS|METH_KEYWORDS, open_doc}, + {NULL, NULL} +}; + +PyMODINIT_FUNC +init_io(void) +{ + PyObject *m = Py_InitModule4("_io", module_methods, + module_doc, NULL, PYTHON_API_VERSION); + if (m == NULL) + return; + + /* put os in the module state */ + _PyIO_os_module = PyImport_ImportModule("os"); + if (_PyIO_os_module == NULL) + goto fail; + +#define ADD_TYPE(type, name) \ + if (PyType_Ready(type) < 0) \ + goto fail; \ + Py_INCREF(type); \ + if (PyModule_AddObject(m, name, (PyObject *)type) < 0) { \ + Py_DECREF(type); \ + goto fail; \ + } + + /* DEFAULT_BUFFER_SIZE */ + if (PyModule_AddIntMacro(m, DEFAULT_BUFFER_SIZE) < 0) + goto fail; + + /* UnsupportedOperation inherits from ValueError and IOError */ + _PyIO_unsupported_operation = PyObject_CallFunction( + (PyObject *)&PyType_Type, "s(OO){}", + "UnsupportedOperation", PyExc_ValueError, PyExc_IOError); + if (_PyIO_unsupported_operation == NULL) + goto fail; + Py_INCREF(_PyIO_unsupported_operation); + if (PyModule_AddObject(m, "UnsupportedOperation", + _PyIO_unsupported_operation) < 0) + goto fail; + + /* BlockingIOError */ + _PyExc_BlockingIOError.tp_base = (PyTypeObject *) PyExc_IOError; + ADD_TYPE(&_PyExc_BlockingIOError, "BlockingIOError"); + + /* Concrete base types of the IO ABCs. + (the ABCs themselves are declared through inheritance in io.py) + */ + ADD_TYPE(&PyIOBase_Type, "_IOBase"); + ADD_TYPE(&PyRawIOBase_Type, "_RawIOBase"); + ADD_TYPE(&PyBufferedIOBase_Type, "_BufferedIOBase"); + ADD_TYPE(&PyTextIOBase_Type, "_TextIOBase"); + + /* Implementation of concrete IO objects. */ + /* FileIO */ + PyFileIO_Type.tp_base = &PyRawIOBase_Type; + ADD_TYPE(&PyFileIO_Type, "FileIO"); + + /* BytesIO */ + PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBytesIO_Type, "BytesIO"); + + /* StringIO */ + PyStringIO_Type.tp_base = &PyTextIOBase_Type; + ADD_TYPE(&PyStringIO_Type, "StringIO"); + + /* BufferedReader */ + PyBufferedReader_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBufferedReader_Type, "BufferedReader"); + + /* BufferedWriter */ + PyBufferedWriter_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBufferedWriter_Type, "BufferedWriter"); + + /* BufferedRWPair */ + PyBufferedRWPair_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBufferedRWPair_Type, "BufferedRWPair"); + + /* BufferedRandom */ + PyBufferedRandom_Type.tp_base = &PyBufferedIOBase_Type; + ADD_TYPE(&PyBufferedRandom_Type, "BufferedRandom"); + + /* TextIOWrapper */ + PyTextIOWrapper_Type.tp_base = &PyTextIOBase_Type; + ADD_TYPE(&PyTextIOWrapper_Type, "TextIOWrapper"); + + /* IncrementalNewlineDecoder */ + ADD_TYPE(&PyIncrementalNewlineDecoder_Type, "IncrementalNewlineDecoder"); + + /* Interned strings */ + if (!(_PyIO_str_close = PyString_InternFromString("close"))) + goto fail; + if (!(_PyIO_str_closed = PyString_InternFromString("closed"))) + goto fail; + if (!(_PyIO_str_decode = PyString_InternFromString("decode"))) + goto fail; + if (!(_PyIO_str_encode = PyString_InternFromString("encode"))) + goto fail; + if (!(_PyIO_str_fileno = PyString_InternFromString("fileno"))) + goto fail; + if (!(_PyIO_str_flush = PyString_InternFromString("flush"))) + goto fail; + if (!(_PyIO_str_getstate = PyString_InternFromString("getstate"))) + goto fail; + if (!(_PyIO_str_isatty = PyString_InternFromString("isatty"))) + goto fail; + if (!(_PyIO_str_newlines = PyString_InternFromString("newlines"))) + goto fail; + if (!(_PyIO_str_nl = PyString_InternFromString("\n"))) + goto fail; + if (!(_PyIO_str_read = PyString_InternFromString("read"))) + goto fail; + if (!(_PyIO_str_read1 = PyString_InternFromString("read1"))) + goto fail; + if (!(_PyIO_str_readable = PyString_InternFromString("readable"))) + goto fail; + if (!(_PyIO_str_readinto = PyString_InternFromString("readinto"))) + goto fail; + if (!(_PyIO_str_readline = PyString_InternFromString("readline"))) + goto fail; + if (!(_PyIO_str_reset = PyString_InternFromString("reset"))) + goto fail; + if (!(_PyIO_str_seek = PyString_InternFromString("seek"))) + goto fail; + if (!(_PyIO_str_seekable = PyString_InternFromString("seekable"))) + goto fail; + if (!(_PyIO_str_setstate = PyString_InternFromString("setstate"))) + goto fail; + if (!(_PyIO_str_tell = PyString_InternFromString("tell"))) + goto fail; + if (!(_PyIO_str_truncate = PyString_InternFromString("truncate"))) + goto fail; + if (!(_PyIO_str_write = PyString_InternFromString("write"))) + goto fail; + if (!(_PyIO_str_writable = PyString_InternFromString("writable"))) + goto fail; + + if (!(_PyIO_empty_str = PyUnicode_FromStringAndSize(NULL, 0))) + goto fail; + if (!(_PyIO_empty_bytes = PyBytes_FromStringAndSize(NULL, 0))) + goto fail; + if (!(_PyIO_zero = PyLong_FromLong(0L))) + goto fail; + + return; + + fail: + Py_CLEAR(_PyIO_os_module); + Py_CLEAR(_PyIO_unsupported_operation); + Py_DECREF(m); +} diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h new file mode 100644 index 0000000..e220ec1 --- /dev/null +++ b/Modules/_io/_iomodule.h @@ -0,0 +1,146 @@ +/* + * Declarations shared between the different parts of the io module + */ + +/* ABCs */ +extern PyTypeObject PyIOBase_Type; +extern PyTypeObject PyRawIOBase_Type; +extern PyTypeObject PyBufferedIOBase_Type; +extern PyTypeObject PyTextIOBase_Type; + +/* Concrete classes */ +extern PyTypeObject PyFileIO_Type; +extern PyTypeObject PyBytesIO_Type; +extern PyTypeObject PyStringIO_Type; +extern PyTypeObject PyBufferedReader_Type; +extern PyTypeObject PyBufferedWriter_Type; +extern PyTypeObject PyBufferedRWPair_Type; +extern PyTypeObject PyBufferedRandom_Type; +extern PyTypeObject PyTextIOWrapper_Type; +extern PyTypeObject PyIncrementalNewlineDecoder_Type; + +/* These functions are used as METH_NOARGS methods, are normally called + * with args=NULL, and return a new reference. + * BUT when args=Py_True is passed, they return a borrowed reference. + */ +extern PyObject* _PyIOBase_check_readable(PyObject *self, PyObject *args); +extern PyObject* _PyIOBase_check_writable(PyObject *self, PyObject *args); +extern PyObject* _PyIOBase_check_seekable(PyObject *self, PyObject *args); +extern PyObject* _PyIOBase_check_closed(PyObject *self, PyObject *args); + +/* Helper for finalization. + This function will revive an object ready to be deallocated and try to + close() it. It returns 0 if the object can be destroyed, or -1 if it + is alive again. */ +extern int _PyIOBase_finalize(PyObject *self); + +/* Returns true if the given FileIO object is closed. + Doesn't check the argument type, so be careful! */ +extern int _PyFileIO_closed(PyObject *self); + +/* Shortcut to the core of the IncrementalNewlineDecoder.decode method */ +extern PyObject *_PyIncrementalNewlineDecoder_decode( + PyObject *self, PyObject *input, int final); + +/* Finds the first line ending between `start` and `end`. + If found, returns the index after the line ending and doesn't touch + `*consumed`. + If not found, returns -1 and sets `*consumed` to the number of characters + which can be safely put aside until another search. + + NOTE: for performance reasons, `end` must point to a NUL character ('\0'). + Otherwise, the function will scan further and return garbage. */ +extern Py_ssize_t _PyIO_find_line_ending( + int translated, int universal, PyObject *readnl, + Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed); + + +#define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */ + +typedef struct { + /* This is the equivalent of PyException_HEAD in 3.x */ + PyObject_HEAD + PyObject *dict; + PyObject *args; + PyObject *message; + + PyObject *myerrno; + PyObject *strerror; + PyObject *filename; /* Not used, but part of the IOError object */ + Py_ssize_t written; +} PyBlockingIOErrorObject; +PyAPI_DATA(PyObject *) PyExc_BlockingIOError; + +/* + * Offset type for positioning. + */ + +#if defined(MS_WIN64) || defined(MS_WINDOWS) + +/* Windows uses long long for offsets */ +typedef PY_LONG_LONG Py_off_t; +# define PyLong_AsOff_t PyLong_AsLongLong +# define PyLong_FromOff_t PyLong_FromLongLong +# define PY_OFF_T_MAX PY_LLONG_MAX +# define PY_OFF_T_MIN PY_LLONG_MIN + +#else + +/* Other platforms use off_t */ +typedef off_t Py_off_t; +#if (SIZEOF_OFF_T == SIZEOF_SIZE_T) +# define PyLong_AsOff_t PyLong_AsSsize_t +# define PyLong_FromOff_t PyLong_FromSsize_t +# define PY_OFF_T_MAX PY_SSIZE_T_MAX +# define PY_OFF_T_MIN PY_SSIZE_T_MIN +#elif (SIZEOF_OFF_T == SIZEOF_LONG_LONG) +# define PyLong_AsOff_t PyLong_AsLongLong +# define PyLong_FromOff_t PyLong_FromLongLong +# define PY_OFF_T_MAX PY_LLONG_MAX +# define PY_OFF_T_MIN PY_LLONG_MIN +#elif (SIZEOF_OFF_T == SIZEOF_LONG) +# define PyLong_AsOff_t PyLong_AsLong +# define PyLong_FromOff_t PyLong_FromLong +# define PY_OFF_T_MAX LONG_MAX +# define PY_OFF_T_MIN LONG_MIN +#else +# error off_t does not match either size_t, long, or long long! +#endif + +#endif + +extern Py_off_t PyNumber_AsOff_t(PyObject *item, PyObject *err); + +/* Implementation details */ + +extern PyObject *_PyIO_os_module; +extern PyObject *_PyIO_locale_module; +extern PyObject *_PyIO_unsupported_operation; + +extern PyObject *_PyIO_str_close; +extern PyObject *_PyIO_str_closed; +extern PyObject *_PyIO_str_decode; +extern PyObject *_PyIO_str_encode; +extern PyObject *_PyIO_str_fileno; +extern PyObject *_PyIO_str_flush; +extern PyObject *_PyIO_str_getstate; +extern PyObject *_PyIO_str_isatty; +extern PyObject *_PyIO_str_newlines; +extern PyObject *_PyIO_str_nl; +extern PyObject *_PyIO_str_read; +extern PyObject *_PyIO_str_read1; +extern PyObject *_PyIO_str_readable; +extern PyObject *_PyIO_str_readinto; +extern PyObject *_PyIO_str_readline; +extern PyObject *_PyIO_str_reset; +extern PyObject *_PyIO_str_seek; +extern PyObject *_PyIO_str_seekable; +extern PyObject *_PyIO_str_setstate; +extern PyObject *_PyIO_str_tell; +extern PyObject *_PyIO_str_truncate; +extern PyObject *_PyIO_str_writable; +extern PyObject *_PyIO_str_write; + +extern PyObject *_PyIO_empty_str; +extern PyObject *_PyIO_empty_bytes; +extern PyObject *_PyIO_zero; diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c new file mode 100644 index 0000000..554e113 --- /dev/null +++ b/Modules/_io/bufferedio.c @@ -0,0 +1,2289 @@ +/* + An implementation of Buffered I/O as defined by PEP 3116 - "New I/O" + + Classes defined here: BufferedIOBase, BufferedReader, BufferedWriter, + BufferedRandom. + + Written by Amaury Forgeot d'Arc and Antoine Pitrou +*/ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "pythread.h" +#include "_iomodule.h" + +/* + * BufferedIOBase class, inherits from IOBase. + */ +PyDoc_STRVAR(bufferediobase_doc, + "Base class for buffered IO objects.\n" + "\n" + "The main difference with RawIOBase is that the read() method\n" + "supports omitting the size argument, and does not have a default\n" + "implementation that defers to readinto().\n" + "\n" + "In addition, read(), readinto() and write() may raise\n" + "BlockingIOError if the underlying raw stream is in non-blocking\n" + "mode and not ready; unlike their raw counterparts, they will never\n" + "return None.\n" + "\n" + "A typical implementation should not inherit from a RawIOBase\n" + "implementation, but wrap one.\n" + ); + +static PyObject * +bufferediobase_readinto(PyObject *self, PyObject *args) +{ + Py_buffer buf; + Py_ssize_t len; + PyObject *data; + + if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) { + return NULL; + } + + data = PyObject_CallMethod(self, "read", "n", buf.len); + if (data == NULL) + goto error; + + if (!PyBytes_Check(data)) { + Py_DECREF(data); + PyErr_SetString(PyExc_TypeError, "read() should return bytes"); + goto error; + } + + len = Py_SIZE(data); + memcpy(buf.buf, PyBytes_AS_STRING(data), len); + + PyBuffer_Release(&buf); + Py_DECREF(data); + + return PyLong_FromSsize_t(len); + + error: + PyBuffer_Release(&buf); + return NULL; +} + +static PyObject * +bufferediobase_unsupported(const char *message) +{ + PyErr_SetString(_PyIO_unsupported_operation, message); + return NULL; +} + +PyDoc_STRVAR(bufferediobase_detach_doc, + "Disconnect this buffer from its underlying raw stream and return it.\n" + "\n" + "After the raw stream has been detached, the buffer is in an unusable\n" + "state.\n"); + +static PyObject * +bufferediobase_detach(PyObject *self) +{ + return bufferediobase_unsupported("detach"); +} + +PyDoc_STRVAR(bufferediobase_read_doc, + "Read and return up to n bytes.\n" + "\n" + "If the argument is omitted, None, or negative, reads and\n" + "returns all data until EOF.\n" + "\n" + "If the argument is positive, and the underlying raw stream is\n" + "not 'interactive', multiple raw reads may be issued to satisfy\n" + "the byte count (unless EOF is reached first). But for\n" + "interactive raw streams (as well as sockets and pipes), at most\n" + "one raw read will be issued, and a short result does not imply\n" + "that EOF is imminent.\n" + "\n" + "Returns an empty bytes object on EOF.\n" + "\n" + "Returns None if the underlying raw stream was open in non-blocking\n" + "mode and no data is available at the moment.\n"); + +static PyObject * +bufferediobase_read(PyObject *self, PyObject *args) +{ + return bufferediobase_unsupported("read"); +} + +PyDoc_STRVAR(bufferediobase_read1_doc, + "Read and return up to n bytes, with at most one read() call\n" + "to the underlying raw stream. A short result does not imply\n" + "that EOF is imminent.\n" + "\n" + "Returns an empty bytes object on EOF.\n"); + +static PyObject * +bufferediobase_read1(PyObject *self, PyObject *args) +{ + return bufferediobase_unsupported("read1"); +} + +PyDoc_STRVAR(bufferediobase_write_doc, + "Write the given buffer to the IO stream.\n" + "\n" + "Returns the number of bytes written, which is never less than\n" + "len(b).\n" + "\n" + "Raises BlockingIOError if the buffer is full and the\n" + "underlying raw stream cannot accept more data at the moment.\n"); + +static PyObject * +bufferediobase_write(PyObject *self, PyObject *args) +{ + return bufferediobase_unsupported("write"); +} + + +static PyMethodDef bufferediobase_methods[] = { + {"detach", (PyCFunction)bufferediobase_detach, METH_NOARGS, bufferediobase_detach_doc}, + {"read", bufferediobase_read, METH_VARARGS, bufferediobase_read_doc}, + {"read1", bufferediobase_read1, METH_VARARGS, bufferediobase_read1_doc}, + {"readinto", bufferediobase_readinto, METH_VARARGS, NULL}, + {"write", bufferediobase_write, METH_VARARGS, bufferediobase_write_doc}, + {NULL, NULL} +}; + +PyTypeObject PyBufferedIOBase_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._BufferedIOBase", /*tp_name*/ + 0, /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + bufferediobase_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + bufferediobase_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + &PyIOBase_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + + +typedef struct { + PyObject_HEAD + + PyObject *raw; + int ok; /* Initialized? */ + int detached; + int readable; + int writable; + + /* True if this is a vanilla Buffered object (rather than a user derived + class) *and* the raw stream is a vanilla FileIO object. */ + int fast_closed_checks; + + /* Absolute position inside the raw stream (-1 if unknown). */ + Py_off_t abs_pos; + + /* A static buffer of size `buffer_size` */ + char *buffer; + /* Current logical position in the buffer. */ + Py_off_t pos; + /* Position of the raw stream in the buffer. */ + Py_off_t raw_pos; + + /* Just after the last buffered byte in the buffer, or -1 if the buffer + isn't ready for reading. */ + Py_off_t read_end; + + /* Just after the last byte actually written */ + Py_off_t write_pos; + /* Just after the last byte waiting to be written, or -1 if the buffer + isn't ready for writing. */ + Py_off_t write_end; + +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif + + Py_ssize_t buffer_size; + Py_ssize_t buffer_mask; + + PyObject *dict; + PyObject *weakreflist; +} buffered; + +/* + Implementation notes: + + * BufferedReader, BufferedWriter and BufferedRandom try to share most + methods (this is helped by the members `readable` and `writable`, which + are initialized in the respective constructors) + * They also share a single buffer for reading and writing. This enables + interleaved reads and writes without flushing. It also makes the logic + a bit trickier to get right. + * The absolute position of the raw stream is cached, if possible, in the + `abs_pos` member. It must be updated every time an operation is done + on the raw stream. If not sure, it can be reinitialized by calling + _buffered_raw_tell(), which queries the raw stream (_buffered_raw_seek() + also does it). To read it, use RAW_TELL(). + * Three helpers, _bufferedreader_raw_read, _bufferedwriter_raw_write and + _bufferedwriter_flush_unlocked do a lot of useful housekeeping. + + NOTE: we should try to maintain block alignment of reads and writes to the + raw stream (according to the buffer size), but for now it is only done + in read() and friends. + +*/ + +/* These macros protect the buffered object against concurrent operations. */ + +#ifdef WITH_THREAD +#define ENTER_BUFFERED(self) \ + Py_BEGIN_ALLOW_THREADS \ + PyThread_acquire_lock(self->lock, 1); \ + Py_END_ALLOW_THREADS + +#define LEAVE_BUFFERED(self) \ + PyThread_release_lock(self->lock); +#else +#define ENTER_BUFFERED(self) +#define LEAVE_BUFFERED(self) +#endif + +#define CHECK_INITIALIZED(self) \ + if (self->ok <= 0) { \ + if (self->detached) { \ + PyErr_SetString(PyExc_ValueError, \ + "raw stream has been detached"); \ + } else { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + } \ + return NULL; \ + } + +#define CHECK_INITIALIZED_INT(self) \ + if (self->ok <= 0) { \ + if (self->detached) { \ + PyErr_SetString(PyExc_ValueError, \ + "raw stream has been detached"); \ + } else { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + } \ + return -1; \ + } + +#define IS_CLOSED(self) \ + (self->fast_closed_checks \ + ? _PyFileIO_closed(self->raw) \ + : buffered_closed(self)) + +#define CHECK_CLOSED(self, error_msg) \ + if (IS_CLOSED(self)) { \ + PyErr_SetString(PyExc_ValueError, error_msg); \ + return NULL; \ + } + + +#define VALID_READ_BUFFER(self) \ + (self->readable && self->read_end != -1) + +#define VALID_WRITE_BUFFER(self) \ + (self->writable && self->write_end != -1) + +#define ADJUST_POSITION(self, _new_pos) \ + do { \ + self->pos = _new_pos; \ + if (VALID_READ_BUFFER(self) && self->read_end < self->pos) \ + self->read_end = self->pos; \ + } while(0) + +#define READAHEAD(self) \ + ((self->readable && VALID_READ_BUFFER(self)) \ + ? (self->read_end - self->pos) : 0) + +#define RAW_OFFSET(self) \ + (((VALID_READ_BUFFER(self) || VALID_WRITE_BUFFER(self)) \ + && self->raw_pos >= 0) ? self->raw_pos - self->pos : 0) + +#define RAW_TELL(self) \ + (self->abs_pos != -1 ? self->abs_pos : _buffered_raw_tell(self)) + +#define MINUS_LAST_BLOCK(self, size) \ + (self->buffer_mask ? \ + (size & ~self->buffer_mask) : \ + (self->buffer_size * (size / self->buffer_size))) + + +static void +buffered_dealloc(buffered *self) +{ + if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0) + return; + _PyObject_GC_UNTRACK(self); + self->ok = 0; + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *)self); + Py_CLEAR(self->raw); + if (self->buffer) { + PyMem_Free(self->buffer); + self->buffer = NULL; + } +#ifdef WITH_THREAD + if (self->lock) { + PyThread_free_lock(self->lock); + self->lock = NULL; + } +#endif + Py_CLEAR(self->dict); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int +buffered_traverse(buffered *self, visitproc visit, void *arg) +{ + Py_VISIT(self->raw); + Py_VISIT(self->dict); + return 0; +} + +static int +buffered_clear(buffered *self) +{ + if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0) + return -1; + self->ok = 0; + Py_CLEAR(self->raw); + Py_CLEAR(self->dict); + return 0; +} + +/* + * _BufferedIOMixin methods + * This is not a class, just a collection of methods that will be reused + * by BufferedReader and BufferedWriter + */ + +/* Flush and close */ + +static PyObject * +buffered_simple_flush(buffered *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_flush, NULL); +} + +static int +buffered_closed(buffered *self) +{ + int closed; + PyObject *res; + CHECK_INITIALIZED_INT(self) + res = PyObject_GetAttr(self->raw, _PyIO_str_closed); + if (res == NULL) + return -1; + closed = PyObject_IsTrue(res); + Py_DECREF(res); + return closed; +} + +static PyObject * +buffered_closed_get(buffered *self, void *context) +{ + CHECK_INITIALIZED(self) + return PyObject_GetAttr(self->raw, _PyIO_str_closed); +} + +static PyObject * +buffered_close(buffered *self, PyObject *args) +{ + PyObject *res = NULL; + int r; + + CHECK_INITIALIZED(self) + ENTER_BUFFERED(self) + + r = buffered_closed(self); + if (r < 0) + goto end; + if (r > 0) { + res = Py_None; + Py_INCREF(res); + goto end; + } + /* flush() will most probably re-take the lock, so drop it first */ + LEAVE_BUFFERED(self) + res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); + ENTER_BUFFERED(self) + if (res == NULL) { + /* If flush() fails, just give up */ + if (PyErr_ExceptionMatches(PyExc_IOError)) + PyErr_Clear(); + else + goto end; + } + Py_XDECREF(res); + + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_close, NULL); + +end: + LEAVE_BUFFERED(self) + return res; +} + +/* detach */ + +static PyObject * +buffered_detach(buffered *self, PyObject *args) +{ + PyObject *raw, *res; + CHECK_INITIALIZED(self) + res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); + if (res == NULL) + return NULL; + Py_DECREF(res); + raw = self->raw; + self->raw = NULL; + self->detached = 1; + self->ok = 0; + return raw; +} + +/* Inquiries */ + +static PyObject * +buffered_seekable(buffered *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_seekable, NULL); +} + +static PyObject * +buffered_readable(buffered *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readable, NULL); +} + +static PyObject * +buffered_writable(buffered *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_writable, NULL); +} + +static PyObject * +buffered_name_get(buffered *self, void *context) +{ + CHECK_INITIALIZED(self) + return PyObject_GetAttrString(self->raw, "name"); +} + +static PyObject * +buffered_mode_get(buffered *self, void *context) +{ + CHECK_INITIALIZED(self) + return PyObject_GetAttrString(self->raw, "mode"); +} + +/* Lower-level APIs */ + +static PyObject * +buffered_fileno(buffered *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_fileno, NULL); +} + +static PyObject * +buffered_isatty(buffered *self, PyObject *args) +{ + CHECK_INITIALIZED(self) + return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_isatty, NULL); +} + + +/* Forward decls */ +static PyObject * +_bufferedwriter_flush_unlocked(buffered *, int); +static Py_ssize_t +_bufferedreader_fill_buffer(buffered *self); +static void +_bufferedreader_reset_buf(buffered *self); +static void +_bufferedwriter_reset_buf(buffered *self); +static PyObject * +_bufferedreader_peek_unlocked(buffered *self, Py_ssize_t); +static PyObject * +_bufferedreader_read_all(buffered *self); +static PyObject * +_bufferedreader_read_fast(buffered *self, Py_ssize_t); +static PyObject * +_bufferedreader_read_generic(buffered *self, Py_ssize_t); + + +/* + * Helpers + */ + +/* Returns the address of the `written` member if a BlockingIOError was + raised, NULL otherwise. The error is always re-raised. */ +static Py_ssize_t * +_buffered_check_blocking_error(void) +{ + PyObject *t, *v, *tb; + PyBlockingIOErrorObject *err; + + PyErr_Fetch(&t, &v, &tb); + if (v == NULL || !PyErr_GivenExceptionMatches(v, PyExc_BlockingIOError)) { + PyErr_Restore(t, v, tb); + return NULL; + } + err = (PyBlockingIOErrorObject *) v; + /* TODO: sanity check (err->written >= 0) */ + PyErr_Restore(t, v, tb); + return &err->written; +} + +static Py_off_t +_buffered_raw_tell(buffered *self) +{ + Py_off_t n; + PyObject *res; + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_tell, NULL); + if (res == NULL) + return -1; + n = PyNumber_AsOff_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n < 0) { + if (!PyErr_Occurred()) + PyErr_Format(PyExc_IOError, + "Raw stream returned invalid position %zd", n); + return -1; + } + self->abs_pos = n; + return n; +} + +static Py_off_t +_buffered_raw_seek(buffered *self, Py_off_t target, int whence) +{ + PyObject *res, *posobj, *whenceobj; + Py_off_t n; + + posobj = PyLong_FromOff_t(target); + if (posobj == NULL) + return -1; + whenceobj = PyLong_FromLong(whence); + if (whenceobj == NULL) { + Py_DECREF(posobj); + return -1; + } + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_seek, + posobj, whenceobj, NULL); + Py_DECREF(posobj); + Py_DECREF(whenceobj); + if (res == NULL) + return -1; + n = PyNumber_AsOff_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n < 0) { + if (!PyErr_Occurred()) + PyErr_Format(PyExc_IOError, + "Raw stream returned invalid position %zd", n); + return -1; + } + self->abs_pos = n; + return n; +} + +static int +_buffered_init(buffered *self) +{ + Py_ssize_t n; + if (self->buffer_size <= 0) { + PyErr_SetString(PyExc_ValueError, + "buffer size must be strictly positive"); + return -1; + } + if (self->buffer) + PyMem_Free(self->buffer); + self->buffer = PyMem_Malloc(self->buffer_size); + if (self->buffer == NULL) { + PyErr_NoMemory(); + return -1; + } +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + PyErr_SetString(PyExc_RuntimeError, "can't allocate read lock"); + return -1; + } +#endif + /* Find out whether buffer_size is a power of 2 */ + /* XXX is this optimization useful? */ + for (n = self->buffer_size - 1; n & 1; n >>= 1) + ; + if (n == 0) + self->buffer_mask = self->buffer_size - 1; + else + self->buffer_mask = 0; + if (_buffered_raw_tell(self) == -1) + PyErr_Clear(); + return 0; +} + +/* + * Shared methods and wrappers + */ + +static PyObject * +buffered_flush(buffered *self, PyObject *args) +{ + PyObject *res; + + CHECK_INITIALIZED(self) + CHECK_CLOSED(self, "flush of closed file") + + ENTER_BUFFERED(self) + res = _bufferedwriter_flush_unlocked(self, 0); + if (res != NULL && self->readable) { + /* Rewind the raw stream so that its position corresponds to + the current logical position. */ + Py_off_t n; + n = _buffered_raw_seek(self, -RAW_OFFSET(self), 1); + if (n == -1) + Py_CLEAR(res); + _bufferedreader_reset_buf(self); + } + LEAVE_BUFFERED(self) + + return res; +} + +static PyObject * +buffered_peek(buffered *self, PyObject *args) +{ + Py_ssize_t n = 0; + PyObject *res = NULL; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "|n:peek", &n)) { + return NULL; + } + + ENTER_BUFFERED(self) + + if (self->writable) { + res = _bufferedwriter_flush_unlocked(self, 1); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + res = _bufferedreader_peek_unlocked(self, n); + +end: + LEAVE_BUFFERED(self) + return res; +} + +static PyObject * +buffered_read(buffered *self, PyObject *args) +{ + Py_ssize_t n = -1; + PyObject *res; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "|n:read", &n)) { + return NULL; + } + if (n < -1) { + PyErr_SetString(PyExc_ValueError, + "read length must be positive or -1"); + return NULL; + } + + CHECK_CLOSED(self, "read of closed file") + + if (n == -1) { + /* The number of bytes is unspecified, read until the end of stream */ + ENTER_BUFFERED(self) + res = _bufferedreader_read_all(self); + LEAVE_BUFFERED(self) + } + else { + res = _bufferedreader_read_fast(self, n); + if (res == Py_None) { + Py_DECREF(res); + ENTER_BUFFERED(self) + res = _bufferedreader_read_generic(self, n); + LEAVE_BUFFERED(self) + } + } + + return res; +} + +static PyObject * +buffered_read1(buffered *self, PyObject *args) +{ + Py_ssize_t n, have, r; + PyObject *res = NULL; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "n:read1", &n)) { + return NULL; + } + + if (n < 0) { + PyErr_SetString(PyExc_ValueError, + "read length must be positive"); + return NULL; + } + if (n == 0) + return PyBytes_FromStringAndSize(NULL, 0); + + ENTER_BUFFERED(self) + + if (self->writable) { + res = _bufferedwriter_flush_unlocked(self, 1); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + + /* Return up to n bytes. If at least one byte is buffered, we + only return buffered bytes. Otherwise, we do one raw read. */ + + /* XXX: this mimicks the io.py implementation but is probably wrong. + If we need to read from the raw stream, then we could actually read + all `n` bytes asked by the caller (and possibly more, so as to fill + our buffer for the next reads). */ + + have = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (have > 0) { + if (n > have) + n = have; + res = PyBytes_FromStringAndSize(self->buffer + self->pos, n); + if (res == NULL) + goto end; + self->pos += n; + goto end; + } + + /* Fill the buffer from the raw stream, and copy it to the result. */ + _bufferedreader_reset_buf(self); + r = _bufferedreader_fill_buffer(self); + if (r == -1) + goto end; + if (r == -2) + r = 0; + if (n > r) + n = r; + res = PyBytes_FromStringAndSize(self->buffer, n); + if (res == NULL) + goto end; + self->pos = n; + +end: + LEAVE_BUFFERED(self) + return res; +} + +static PyObject * +buffered_readinto(buffered *self, PyObject *args) +{ + PyObject *res = NULL; + + CHECK_INITIALIZED(self) + + /* TODO: use raw.readinto() instead! */ + if (self->writable) { + ENTER_BUFFERED(self) + res = _bufferedwriter_flush_unlocked(self, 0); + LEAVE_BUFFERED(self) + if (res == NULL) + goto end; + Py_DECREF(res); + } + res = bufferediobase_readinto((PyObject *)self, args); + +end: + return res; +} + +static PyObject * +_buffered_readline(buffered *self, Py_ssize_t limit) +{ + PyObject *res = NULL; + PyObject *chunks = NULL; + Py_ssize_t n, written = 0; + const char *start, *s, *end; + + CHECK_CLOSED(self, "readline of closed file") + + /* First, try to find a line in the buffer. This can run unlocked because + the calls to the C API are simple enough that they can't trigger + any thread switch. */ + n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (limit >= 0 && n > limit) + n = limit; + start = self->buffer + self->pos; + s = memchr(start, '\n', n); + if (s != NULL) { + res = PyBytes_FromStringAndSize(start, s - start + 1); + if (res != NULL) + self->pos += s - start + 1; + goto end_unlocked; + } + if (n == limit) { + res = PyBytes_FromStringAndSize(start, n); + if (res != NULL) + self->pos += n; + goto end_unlocked; + } + + ENTER_BUFFERED(self) + + /* Now we try to get some more from the raw stream */ + if (self->writable) { + res = _bufferedwriter_flush_unlocked(self, 1); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + chunks = PyList_New(0); + if (chunks == NULL) + goto end; + if (n > 0) { + res = PyBytes_FromStringAndSize(start, n); + if (res == NULL) + goto end; + if (PyList_Append(chunks, res) < 0) { + Py_CLEAR(res); + goto end; + } + Py_CLEAR(res); + written += n; + if (limit >= 0) + limit -= n; + } + + for (;;) { + _bufferedreader_reset_buf(self); + n = _bufferedreader_fill_buffer(self); + if (n == -1) + goto end; + if (n <= 0) + break; + if (limit >= 0 && n > limit) + n = limit; + start = self->buffer; + end = start + n; + s = start; + while (s < end) { + if (*s++ == '\n') { + res = PyBytes_FromStringAndSize(start, s - start); + if (res == NULL) + goto end; + self->pos = s - start; + goto found; + } + } + res = PyBytes_FromStringAndSize(start, n); + if (res == NULL) + goto end; + if (n == limit) { + self->pos = n; + break; + } + if (PyList_Append(chunks, res) < 0) { + Py_CLEAR(res); + goto end; + } + Py_CLEAR(res); + written += n; + if (limit >= 0) + limit -= n; + } +found: + if (res != NULL && PyList_Append(chunks, res) < 0) { + Py_CLEAR(res); + goto end; + } + Py_CLEAR(res); + res = _PyBytes_Join(_PyIO_empty_bytes, chunks); + +end: + LEAVE_BUFFERED(self) +end_unlocked: + Py_XDECREF(chunks); + return res; +} + +static PyObject * +buffered_readline(buffered *self, PyObject *args) +{ + PyObject *limitobj = NULL; + Py_ssize_t limit = -1; + + CHECK_INITIALIZED(self) + + if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) { + return NULL; + } + if (limitobj) { + if (!PyNumber_Check(limitobj)) { + PyErr_Format(PyExc_TypeError, + "integer argument expected, got '%.200s'", + Py_TYPE(limitobj)->tp_name); + return NULL; + } + limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError); + if (limit == -1 && PyErr_Occurred()) + return NULL; + } + return _buffered_readline(self, limit); +} + + +static PyObject * +buffered_tell(buffered *self, PyObject *args) +{ + Py_off_t pos; + + CHECK_INITIALIZED(self) + pos = _buffered_raw_tell(self); + if (pos == -1) + return NULL; + pos -= RAW_OFFSET(self); + /* TODO: sanity check (pos >= 0) */ + return PyLong_FromOff_t(pos); +} + +static PyObject * +buffered_seek(buffered *self, PyObject *args) +{ + Py_off_t target, n; + int whence = 0; + PyObject *targetobj, *res = NULL; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "O|i:seek", &targetobj, &whence)) { + return NULL; + } + if (whence < 0 || whence > 2) { + PyErr_Format(PyExc_ValueError, + "whence must be between 0 and 2, not %d", whence); + return NULL; + } + + CHECK_CLOSED(self, "seek of closed file") + + target = PyNumber_AsOff_t(targetobj, PyExc_ValueError); + if (target == -1 && PyErr_Occurred()) + return NULL; + + if (whence != 2 && self->readable) { + Py_off_t current, avail; + /* Check if seeking leaves us inside the current buffer, + so as to return quickly if possible. Also, we needn't take the + lock in this fast path. + Don't know how to do that when whence == 2, though. */ + /* NOTE: RAW_TELL() can release the GIL but the object is in a stable + state at this point. */ + current = RAW_TELL(self); + avail = READAHEAD(self); + if (avail > 0) { + Py_off_t offset; + if (whence == 0) + offset = target - (current - RAW_OFFSET(self)); + else + offset = target; + if (offset >= -self->pos && offset <= avail) { + self->pos += offset; + return PyLong_FromOff_t(current - avail + offset); + } + } + } + + ENTER_BUFFERED(self) + + /* Fallback: invoke raw seek() method and clear buffer */ + if (self->writable) { + res = _bufferedwriter_flush_unlocked(self, 0); + if (res == NULL) + goto end; + Py_CLEAR(res); + _bufferedwriter_reset_buf(self); + } + + /* TODO: align on block boundary and read buffer if needed? */ + if (whence == 1) + target -= RAW_OFFSET(self); + n = _buffered_raw_seek(self, target, whence); + if (n == -1) + goto end; + self->raw_pos = -1; + res = PyLong_FromOff_t(n); + if (res != NULL && self->readable) + _bufferedreader_reset_buf(self); + +end: + LEAVE_BUFFERED(self) + return res; +} + +static PyObject * +buffered_truncate(buffered *self, PyObject *args) +{ + PyObject *pos = Py_None; + PyObject *res = NULL; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) { + return NULL; + } + + ENTER_BUFFERED(self) + + if (self->writable) { + res = _bufferedwriter_flush_unlocked(self, 0); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + if (self->readable) { + if (pos == Py_None) { + /* Rewind the raw stream so that its position corresponds to + the current logical position. */ + if (_buffered_raw_seek(self, -RAW_OFFSET(self), 1) == -1) + goto end; + } + _bufferedreader_reset_buf(self); + } + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_truncate, pos, NULL); + if (res == NULL) + goto end; + /* Reset cached position */ + if (_buffered_raw_tell(self) == -1) + PyErr_Clear(); + +end: + LEAVE_BUFFERED(self) + return res; +} + +static PyObject * +buffered_iternext(buffered *self) +{ + PyObject *line; + PyTypeObject *tp; + + CHECK_INITIALIZED(self); + + tp = Py_TYPE(self); + if (tp == &PyBufferedReader_Type || + tp == &PyBufferedRandom_Type) { + /* Skip method call overhead for speed */ + line = _buffered_readline(self, -1); + } + else { + line = PyObject_CallMethodObjArgs((PyObject *)self, + _PyIO_str_readline, NULL); + if (line && !PyBytes_Check(line)) { + PyErr_Format(PyExc_IOError, + "readline() should have returned a bytes object, " + "not '%.200s'", Py_TYPE(line)->tp_name); + Py_DECREF(line); + return NULL; + } + } + + if (line == NULL) + return NULL; + + if (PyBytes_GET_SIZE(line) == 0) { + /* Reached EOF or would have blocked */ + Py_DECREF(line); + return NULL; + } + + return line; +} + +static PyObject * +buffered_repr(buffered *self) +{ + PyObject *nameobj, *res; + + nameobj = PyObject_GetAttrString((PyObject *) self, "name"); + if (nameobj == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + return NULL; + res = PyString_FromFormat("<%s>", Py_TYPE(self)->tp_name); + } + else { + PyObject *repr = PyObject_Repr(nameobj); + Py_DECREF(nameobj); + if (repr == NULL) + return NULL; + res = PyString_FromFormat("<%s name=%s>", + Py_TYPE(self)->tp_name, + PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + return res; +} + +/* + * class BufferedReader + */ + +PyDoc_STRVAR(bufferedreader_doc, + "Create a new buffered reader using the given readable raw IO object."); + +static void _bufferedreader_reset_buf(buffered *self) +{ + self->read_end = -1; +} + +static int +bufferedreader_init(buffered *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"raw", "buffer_size", NULL}; + Py_ssize_t buffer_size = DEFAULT_BUFFER_SIZE; + PyObject *raw; + + self->ok = 0; + self->detached = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|n:BufferedReader", kwlist, + &raw, &buffer_size)) { + return -1; + } + + if (_PyIOBase_check_readable(raw, Py_True) == NULL) + return -1; + + Py_CLEAR(self->raw); + Py_INCREF(raw); + self->raw = raw; + self->buffer_size = buffer_size; + self->readable = 1; + self->writable = 0; + + if (_buffered_init(self) < 0) + return -1; + _bufferedreader_reset_buf(self); + + self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedReader_Type && + Py_TYPE(raw) == &PyFileIO_Type); + + self->ok = 1; + return 0; +} + +static Py_ssize_t +_bufferedreader_raw_read(buffered *self, char *start, Py_ssize_t len) +{ + Py_buffer buf; + PyObject *memobj, *res; + Py_ssize_t n; + /* NOTE: the buffer needn't be released as its object is NULL. */ + if (PyBuffer_FillInfo(&buf, NULL, start, len, 0, PyBUF_CONTIG) == -1) + return -1; + memobj = PyMemoryView_FromBuffer(&buf); + if (memobj == NULL) + return -1; + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readinto, memobj, NULL); + Py_DECREF(memobj); + if (res == NULL) + return -1; + if (res == Py_None) { + /* Non-blocking stream would have blocked. Special return code! */ + Py_DECREF(res); + return -2; + } + n = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n < 0 || n > len) { + PyErr_Format(PyExc_IOError, + "raw readinto() returned invalid length %zd " + "(should have been between 0 and %zd)", n, len); + return -1; + } + if (n > 0 && self->abs_pos != -1) + self->abs_pos += n; + return n; +} + +static Py_ssize_t +_bufferedreader_fill_buffer(buffered *self) +{ + Py_ssize_t start, len, n; + if (VALID_READ_BUFFER(self)) + start = Py_SAFE_DOWNCAST(self->read_end, Py_off_t, Py_ssize_t); + else + start = 0; + len = self->buffer_size - start; + n = _bufferedreader_raw_read(self, self->buffer + start, len); + if (n <= 0) + return n; + self->read_end = start + n; + self->raw_pos = start + n; + return n; +} + +static PyObject * +_bufferedreader_read_all(buffered *self) +{ + Py_ssize_t current_size; + PyObject *res, *data = NULL; + PyObject *chunks = PyList_New(0); + + if (chunks == NULL) + return NULL; + + /* First copy what we have in the current buffer. */ + current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (current_size) { + data = PyBytes_FromStringAndSize( + self->buffer + self->pos, current_size); + if (data == NULL) { + Py_DECREF(chunks); + return NULL; + } + } + _bufferedreader_reset_buf(self); + /* We're going past the buffer's bounds, flush it */ + if (self->writable) { + res = _bufferedwriter_flush_unlocked(self, 1); + if (res == NULL) { + Py_DECREF(chunks); + return NULL; + } + Py_CLEAR(res); + } + while (1) { + if (data) { + if (PyList_Append(chunks, data) < 0) { + Py_DECREF(data); + Py_DECREF(chunks); + return NULL; + } + Py_DECREF(data); + } + + /* Read until EOF or until read() would block. */ + data = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_read, NULL); + if (data == NULL) { + Py_DECREF(chunks); + return NULL; + } + if (data != Py_None && !PyBytes_Check(data)) { + Py_DECREF(data); + Py_DECREF(chunks); + PyErr_SetString(PyExc_TypeError, "read() should return bytes"); + return NULL; + } + if (data == Py_None || PyBytes_GET_SIZE(data) == 0) { + if (current_size == 0) { + Py_DECREF(chunks); + return data; + } + else { + res = _PyBytes_Join(_PyIO_empty_bytes, chunks); + Py_DECREF(data); + Py_DECREF(chunks); + return res; + } + } + current_size += PyBytes_GET_SIZE(data); + if (self->abs_pos != -1) + self->abs_pos += PyBytes_GET_SIZE(data); + } +} + +/* Read n bytes from the buffer if it can, otherwise return None. + This function is simple enough that it can run unlocked. */ +static PyObject * +_bufferedreader_read_fast(buffered *self, Py_ssize_t n) +{ + Py_ssize_t current_size; + + current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (n <= current_size) { + /* Fast path: the data to read is fully buffered. */ + PyObject *res = PyBytes_FromStringAndSize(self->buffer + self->pos, n); + if (res != NULL) + self->pos += n; + return res; + } + Py_RETURN_NONE; +} + +/* Generic read function: read from the stream until enough bytes are read, + * or until an EOF occurs or until read() would block. + */ +static PyObject * +_bufferedreader_read_generic(buffered *self, Py_ssize_t n) +{ + PyObject *res = NULL; + Py_ssize_t current_size, remaining, written; + char *out; + + current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (n <= current_size) + return _bufferedreader_read_fast(self, n); + + res = PyBytes_FromStringAndSize(NULL, n); + if (res == NULL) + goto error; + out = PyBytes_AS_STRING(res); + remaining = n; + written = 0; + if (current_size > 0) { + memcpy(out, self->buffer + self->pos, current_size); + remaining -= current_size; + written += current_size; + } + _bufferedreader_reset_buf(self); + while (remaining > 0) { + /* We want to read a whole block at the end into buffer. + If we had readv() we could do this in one pass. */ + Py_ssize_t r = MINUS_LAST_BLOCK(self, remaining); + if (r == 0) + break; + r = _bufferedreader_raw_read(self, out + written, r); + if (r == -1) + goto error; + if (r == 0 || r == -2) { + /* EOF occurred or read() would block. */ + if (r == 0 || written > 0) { + if (_PyBytes_Resize(&res, written)) + goto error; + return res; + } + Py_DECREF(res); + Py_INCREF(Py_None); + return Py_None; + } + remaining -= r; + written += r; + } + assert(remaining <= self->buffer_size); + self->pos = 0; + self->raw_pos = 0; + self->read_end = 0; + while (self->read_end < self->buffer_size) { + Py_ssize_t r = _bufferedreader_fill_buffer(self); + if (r == -1) + goto error; + if (r == 0 || r == -2) { + /* EOF occurred or read() would block. */ + if (r == 0 || written > 0) { + if (_PyBytes_Resize(&res, written)) + goto error; + return res; + } + Py_DECREF(res); + Py_INCREF(Py_None); + return Py_None; + } + if (remaining > r) { + memcpy(out + written, self->buffer + self->pos, r); + written += r; + self->pos += r; + remaining -= r; + } + else if (remaining > 0) { + memcpy(out + written, self->buffer + self->pos, remaining); + written += remaining; + self->pos += remaining; + remaining = 0; + } + if (remaining == 0) + break; + } + + return res; + +error: + Py_XDECREF(res); + return NULL; +} + +static PyObject * +_bufferedreader_peek_unlocked(buffered *self, Py_ssize_t n) +{ + Py_ssize_t have, r; + + have = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + /* Constraints: + 1. we don't want to advance the file position. + 2. we don't want to lose block alignment, so we can't shift the buffer + to make some place. + Therefore, we either return `have` bytes (if > 0), or a full buffer. + */ + if (have > 0) { + return PyBytes_FromStringAndSize(self->buffer + self->pos, have); + } + + /* Fill the buffer from the raw stream, and copy it to the result. */ + _bufferedreader_reset_buf(self); + r = _bufferedreader_fill_buffer(self); + if (r == -1) + return NULL; + if (r == -2) + r = 0; + self->pos = 0; + return PyBytes_FromStringAndSize(self->buffer, r); +} + +static PyMethodDef bufferedreader_methods[] = { + /* BufferedIOMixin methods */ + {"detach", (PyCFunction)buffered_detach, METH_NOARGS}, + {"flush", (PyCFunction)buffered_simple_flush, METH_NOARGS}, + {"close", (PyCFunction)buffered_close, METH_NOARGS}, + {"seekable", (PyCFunction)buffered_seekable, METH_NOARGS}, + {"readable", (PyCFunction)buffered_readable, METH_NOARGS}, + {"writable", (PyCFunction)buffered_writable, METH_NOARGS}, + {"fileno", (PyCFunction)buffered_fileno, METH_NOARGS}, + {"isatty", (PyCFunction)buffered_isatty, METH_NOARGS}, + + {"read", (PyCFunction)buffered_read, METH_VARARGS}, + {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, + {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, + {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, + {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, + {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, + {"truncate", (PyCFunction)buffered_truncate, METH_VARARGS}, + {NULL, NULL} +}; + +static PyMemberDef bufferedreader_members[] = { + {"raw", T_OBJECT, offsetof(buffered, raw), 0}, + {NULL} +}; + +static PyGetSetDef bufferedreader_getset[] = { + {"closed", (getter)buffered_closed_get, NULL, NULL}, + {"name", (getter)buffered_name_get, NULL, NULL}, + {"mode", (getter)buffered_mode_get, NULL, NULL}, + {NULL} +}; + + +PyTypeObject PyBufferedReader_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.BufferedReader", /*tp_name*/ + sizeof(buffered), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)buffered_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + (reprfunc)buffered_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + bufferedreader_doc, /* tp_doc */ + (traverseproc)buffered_traverse, /* tp_traverse */ + (inquiry)buffered_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(buffered, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + (iternextfunc)buffered_iternext, /* tp_iternext */ + bufferedreader_methods, /* tp_methods */ + bufferedreader_members, /* tp_members */ + bufferedreader_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(buffered, dict), /* tp_dictoffset */ + (initproc)bufferedreader_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + + +static int +complain_about_max_buffer_size(void) +{ + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "max_buffer_size is deprecated", 1) < 0) + return 0; + return 1; +} + +/* + * class BufferedWriter + */ +PyDoc_STRVAR(bufferedwriter_doc, + "A buffer for a writeable sequential RawIO object.\n" + "\n" + "The constructor creates a BufferedWriter for the given writeable raw\n" + "stream. If the buffer_size is not given, it defaults to\n" + "DEFAULT_BUFFER_SIZE. max_buffer_size isn't used anymore.\n" + ); + +static void +_bufferedwriter_reset_buf(buffered *self) +{ + self->write_pos = 0; + self->write_end = -1; +} + +static int +bufferedwriter_init(buffered *self, PyObject *args, PyObject *kwds) +{ + /* TODO: properly deprecate max_buffer_size */ + char *kwlist[] = {"raw", "buffer_size", "max_buffer_size", NULL}; + Py_ssize_t buffer_size = DEFAULT_BUFFER_SIZE; + Py_ssize_t max_buffer_size = -234; + PyObject *raw; + + self->ok = 0; + self->detached = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|nn:BufferedReader", kwlist, + &raw, &buffer_size, &max_buffer_size)) { + return -1; + } + + if (max_buffer_size != -234 && !complain_about_max_buffer_size()) + return -1; + + if (_PyIOBase_check_writable(raw, Py_True) == NULL) + return -1; + + Py_CLEAR(self->raw); + Py_INCREF(raw); + self->raw = raw; + self->readable = 0; + self->writable = 1; + + self->buffer_size = buffer_size; + if (_buffered_init(self) < 0) + return -1; + _bufferedwriter_reset_buf(self); + self->pos = 0; + + self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedWriter_Type && + Py_TYPE(raw) == &PyFileIO_Type); + + self->ok = 1; + return 0; +} + +static Py_ssize_t +_bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len) +{ + Py_buffer buf; + PyObject *memobj, *res; + Py_ssize_t n; + /* NOTE: the buffer needn't be released as its object is NULL. */ + if (PyBuffer_FillInfo(&buf, NULL, start, len, 1, PyBUF_CONTIG_RO) == -1) + return -1; + memobj = PyMemoryView_FromBuffer(&buf); + if (memobj == NULL) + return -1; + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_write, memobj, NULL); + Py_DECREF(memobj); + if (res == NULL) + return -1; + n = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n < 0 || n > len) { + PyErr_Format(PyExc_IOError, + "raw write() returned invalid length %zd " + "(should have been between 0 and %zd)", n, len); + return -1; + } + if (n > 0 && self->abs_pos != -1) + self->abs_pos += n; + return n; +} + +/* `restore_pos` is 1 if we need to restore the raw stream position at + the end, 0 otherwise. */ +static PyObject * +_bufferedwriter_flush_unlocked(buffered *self, int restore_pos) +{ + Py_ssize_t written = 0; + Py_off_t n, rewind; + + if (!VALID_WRITE_BUFFER(self) || self->write_pos == self->write_end) + goto end; + /* First, rewind */ + rewind = RAW_OFFSET(self) + (self->pos - self->write_pos); + if (rewind != 0) { + n = _buffered_raw_seek(self, -rewind, 1); + if (n < 0) { + goto error; + } + self->raw_pos -= rewind; + } + while (self->write_pos < self->write_end) { + n = _bufferedwriter_raw_write(self, + self->buffer + self->write_pos, + Py_SAFE_DOWNCAST(self->write_end - self->write_pos, + Py_off_t, Py_ssize_t)); + if (n == -1) { + Py_ssize_t *w = _buffered_check_blocking_error(); + if (w == NULL) + goto error; + self->write_pos += *w; + self->raw_pos = self->write_pos; + written += *w; + *w = written; + /* Already re-raised */ + goto error; + } + self->write_pos += n; + self->raw_pos = self->write_pos; + written += Py_SAFE_DOWNCAST(n, Py_off_t, Py_ssize_t); + } + + if (restore_pos) { + Py_off_t forward = rewind - written; + if (forward != 0) { + n = _buffered_raw_seek(self, forward, 1); + if (n < 0) { + goto error; + } + self->raw_pos += forward; + } + } + _bufferedwriter_reset_buf(self); + +end: + Py_RETURN_NONE; + +error: + return NULL; +} + +static PyObject * +bufferedwriter_write(buffered *self, PyObject *args) +{ + PyObject *res = NULL; + Py_buffer buf; + Py_ssize_t written, avail, remaining, n; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "s*:write", &buf)) { + return NULL; + } + + if (IS_CLOSED(self)) { + PyErr_SetString(PyExc_ValueError, "write to closed file"); + PyBuffer_Release(&buf); + return NULL; + } + + ENTER_BUFFERED(self) + + /* Fast path: the data to write can be fully buffered. */ + if (!VALID_READ_BUFFER(self) && !VALID_WRITE_BUFFER(self)) { + self->pos = 0; + self->raw_pos = 0; + } + avail = Py_SAFE_DOWNCAST(self->buffer_size - self->pos, Py_off_t, Py_ssize_t); + if (buf.len <= avail) { + memcpy(self->buffer + self->pos, buf.buf, buf.len); + if (!VALID_WRITE_BUFFER(self)) { + self->write_pos = self->pos; + } + ADJUST_POSITION(self, self->pos + buf.len); + if (self->pos > self->write_end) + self->write_end = self->pos; + written = buf.len; + goto end; + } + + /* First write the current buffer */ + res = _bufferedwriter_flush_unlocked(self, 0); + if (res == NULL) { + Py_ssize_t *w = _buffered_check_blocking_error(); + if (w == NULL) + goto error; + if (self->readable) + _bufferedreader_reset_buf(self); + /* Make some place by shifting the buffer. */ + assert(VALID_WRITE_BUFFER(self)); + memmove(self->buffer, self->buffer + self->write_pos, + Py_SAFE_DOWNCAST(self->write_end - self->write_pos, + Py_off_t, Py_ssize_t)); + self->write_end -= self->write_pos; + self->raw_pos -= self->write_pos; + self->pos -= self->write_pos; + self->write_pos = 0; + avail = Py_SAFE_DOWNCAST(self->buffer_size - self->write_end, + Py_off_t, Py_ssize_t); + if (buf.len <= avail) { + /* Everything can be buffered */ + PyErr_Clear(); + memcpy(self->buffer + self->write_end, buf.buf, buf.len); + self->write_end += buf.len; + written = buf.len; + goto end; + } + /* Buffer as much as possible. */ + memcpy(self->buffer + self->write_end, buf.buf, avail); + self->write_end += avail; + /* Already re-raised */ + *w = avail; + goto error; + } + Py_CLEAR(res); + + /* Then write buf itself. At this point the buffer has been emptied. */ + remaining = buf.len; + written = 0; + while (remaining > self->buffer_size) { + n = _bufferedwriter_raw_write( + self, (char *) buf.buf + written, buf.len - written); + if (n == -1) { + Py_ssize_t *w = _buffered_check_blocking_error(); + if (w == NULL) + goto error; + written += *w; + remaining -= *w; + if (remaining > self->buffer_size) { + /* Can't buffer everything, still buffer as much as possible */ + memcpy(self->buffer, + (char *) buf.buf + written, self->buffer_size); + self->raw_pos = 0; + ADJUST_POSITION(self, self->buffer_size); + self->write_end = self->buffer_size; + *w = written + self->buffer_size; + /* Already re-raised */ + goto error; + } + PyErr_Clear(); + break; + } + written += n; + remaining -= n; + } + if (self->readable) + _bufferedreader_reset_buf(self); + if (remaining > 0) { + memcpy(self->buffer, (char *) buf.buf + written, remaining); + written += remaining; + } + self->write_pos = 0; + /* TODO: sanity check (remaining >= 0) */ + self->write_end = remaining; + ADJUST_POSITION(self, remaining); + self->raw_pos = 0; + +end: + res = PyLong_FromSsize_t(written); + +error: + LEAVE_BUFFERED(self) + PyBuffer_Release(&buf); + return res; +} + +static PyMethodDef bufferedwriter_methods[] = { + /* BufferedIOMixin methods */ + {"close", (PyCFunction)buffered_close, METH_NOARGS}, + {"detach", (PyCFunction)buffered_detach, METH_NOARGS}, + {"seekable", (PyCFunction)buffered_seekable, METH_NOARGS}, + {"readable", (PyCFunction)buffered_readable, METH_NOARGS}, + {"writable", (PyCFunction)buffered_writable, METH_NOARGS}, + {"fileno", (PyCFunction)buffered_fileno, METH_NOARGS}, + {"isatty", (PyCFunction)buffered_isatty, METH_NOARGS}, + + {"write", (PyCFunction)bufferedwriter_write, METH_VARARGS}, + {"truncate", (PyCFunction)buffered_truncate, METH_VARARGS}, + {"flush", (PyCFunction)buffered_flush, METH_NOARGS}, + {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, + {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, + {NULL, NULL} +}; + +static PyMemberDef bufferedwriter_members[] = { + {"raw", T_OBJECT, offsetof(buffered, raw), 0}, + {NULL} +}; + +static PyGetSetDef bufferedwriter_getset[] = { + {"closed", (getter)buffered_closed_get, NULL, NULL}, + {"name", (getter)buffered_name_get, NULL, NULL}, + {"mode", (getter)buffered_mode_get, NULL, NULL}, + {NULL} +}; + + +PyTypeObject PyBufferedWriter_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.BufferedWriter", /*tp_name*/ + sizeof(buffered), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)buffered_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + (reprfunc)buffered_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + bufferedwriter_doc, /* tp_doc */ + (traverseproc)buffered_traverse, /* tp_traverse */ + (inquiry)buffered_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(buffered, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + 0, /* tp_iternext */ + bufferedwriter_methods, /* tp_methods */ + bufferedwriter_members, /* tp_members */ + bufferedwriter_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(buffered, dict), /* tp_dictoffset */ + (initproc)bufferedwriter_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + + +/* + * BufferedRWPair + */ + +PyDoc_STRVAR(bufferedrwpair_doc, + "A buffered reader and writer object together.\n" + "\n" + "A buffered reader object and buffered writer object put together to\n" + "form a sequential IO object that can read and write. This is typically\n" + "used with a socket or two-way pipe.\n" + "\n" + "reader and writer are RawIOBase objects that are readable and\n" + "writeable respectively. If the buffer_size is omitted it defaults to\n" + "DEFAULT_BUFFER_SIZE.\n" + ); + +/* XXX The usefulness of this (compared to having two separate IO objects) is + * questionable. + */ + +typedef struct { + PyObject_HEAD + buffered *reader; + buffered *writer; + PyObject *dict; + PyObject *weakreflist; +} rwpair; + +static int +bufferedrwpair_init(rwpair *self, PyObject *args, PyObject *kwds) +{ + PyObject *reader, *writer; + Py_ssize_t buffer_size = DEFAULT_BUFFER_SIZE; + Py_ssize_t max_buffer_size = -234; + + if (!PyArg_ParseTuple(args, "OO|nn:BufferedRWPair", &reader, &writer, + &buffer_size, &max_buffer_size)) { + return -1; + } + + if (max_buffer_size != -234 && !complain_about_max_buffer_size()) + return -1; + + if (_PyIOBase_check_readable(reader, Py_True) == NULL) + return -1; + if (_PyIOBase_check_writable(writer, Py_True) == NULL) + return -1; + + self->reader = (buffered *) PyObject_CallFunction( + (PyObject *) &PyBufferedReader_Type, "On", reader, buffer_size); + if (self->reader == NULL) + return -1; + + self->writer = (buffered *) PyObject_CallFunction( + (PyObject *) &PyBufferedWriter_Type, "On", writer, buffer_size); + if (self->writer == NULL) { + Py_CLEAR(self->reader); + return -1; + } + + return 0; +} + +static int +bufferedrwpair_traverse(rwpair *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + return 0; +} + +static int +bufferedrwpair_clear(rwpair *self) +{ + Py_CLEAR(self->reader); + Py_CLEAR(self->writer); + Py_CLEAR(self->dict); + return 0; +} + +static void +bufferedrwpair_dealloc(rwpair *self) +{ + _PyObject_GC_UNTRACK(self); + Py_CLEAR(self->reader); + Py_CLEAR(self->writer); + Py_CLEAR(self->dict); + Py_TYPE(self)->tp_free((PyObject *) self); +} + +static PyObject * +_forward_call(buffered *self, const char *name, PyObject *args) +{ + PyObject *func = PyObject_GetAttrString((PyObject *)self, name); + PyObject *ret; + + if (func == NULL) { + PyErr_SetString(PyExc_AttributeError, name); + return NULL; + } + + ret = PyObject_CallObject(func, args); + Py_DECREF(func); + return ret; +} + +static PyObject * +bufferedrwpair_read(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, "read", args); +} + +static PyObject * +bufferedrwpair_peek(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, "peek", args); +} + +static PyObject * +bufferedrwpair_read1(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, "read1", args); +} + +static PyObject * +bufferedrwpair_readinto(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, "readinto", args); +} + +static PyObject * +bufferedrwpair_write(rwpair *self, PyObject *args) +{ + return _forward_call(self->writer, "write", args); +} + +static PyObject * +bufferedrwpair_flush(rwpair *self, PyObject *args) +{ + return _forward_call(self->writer, "flush", args); +} + +static PyObject * +bufferedrwpair_readable(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, "readable", args); +} + +static PyObject * +bufferedrwpair_writable(rwpair *self, PyObject *args) +{ + return _forward_call(self->writer, "writable", args); +} + +static PyObject * +bufferedrwpair_close(rwpair *self, PyObject *args) +{ + PyObject *ret = _forward_call(self->writer, "close", args); + if (ret == NULL) + return NULL; + Py_DECREF(ret); + + return _forward_call(self->reader, "close", args); +} + +static PyObject * +bufferedrwpair_isatty(rwpair *self, PyObject *args) +{ + PyObject *ret = _forward_call(self->writer, "isatty", args); + + if (ret != Py_False) { + /* either True or exception */ + return ret; + } + Py_DECREF(ret); + + return _forward_call(self->reader, "isatty", args); +} + +static PyObject * +bufferedrwpair_closed_get(rwpair *self, void *context) +{ + return PyObject_GetAttr((PyObject *) self->writer, _PyIO_str_closed); +} + +static PyMethodDef bufferedrwpair_methods[] = { + {"read", (PyCFunction)bufferedrwpair_read, METH_VARARGS}, + {"peek", (PyCFunction)bufferedrwpair_peek, METH_VARARGS}, + {"read1", (PyCFunction)bufferedrwpair_read1, METH_VARARGS}, + {"readinto", (PyCFunction)bufferedrwpair_readinto, METH_VARARGS}, + + {"write", (PyCFunction)bufferedrwpair_write, METH_VARARGS}, + {"flush", (PyCFunction)bufferedrwpair_flush, METH_NOARGS}, + + {"readable", (PyCFunction)bufferedrwpair_readable, METH_NOARGS}, + {"writable", (PyCFunction)bufferedrwpair_writable, METH_NOARGS}, + + {"close", (PyCFunction)bufferedrwpair_close, METH_NOARGS}, + {"isatty", (PyCFunction)bufferedrwpair_isatty, METH_NOARGS}, + + {NULL, NULL} +}; + +static PyGetSetDef bufferedrwpair_getset[] = { + {"closed", (getter)bufferedrwpair_closed_get, NULL, NULL}, + {NULL} +}; + +PyTypeObject PyBufferedRWPair_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.BufferedRWPair", /*tp_name*/ + sizeof(rwpair), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)bufferedrwpair_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + bufferedrwpair_doc, /* tp_doc */ + (traverseproc)bufferedrwpair_traverse, /* tp_traverse */ + (inquiry)bufferedrwpair_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(rwpair, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + 0, /* tp_iternext */ + bufferedrwpair_methods, /* tp_methods */ + 0, /* tp_members */ + bufferedrwpair_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(rwpair, dict), /* tp_dictoffset */ + (initproc)bufferedrwpair_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + + +/* + * BufferedRandom + */ + +PyDoc_STRVAR(bufferedrandom_doc, + "A buffered interface to random access streams.\n" + "\n" + "The constructor creates a reader and writer for a seekable stream,\n" + "raw, given in the first argument. If the buffer_size is omitted it\n" + "defaults to DEFAULT_BUFFER_SIZE. max_buffer_size isn't used anymore.\n" + ); + +static int +bufferedrandom_init(buffered *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"raw", "buffer_size", "max_buffer_size", NULL}; + Py_ssize_t buffer_size = DEFAULT_BUFFER_SIZE; + Py_ssize_t max_buffer_size = -234; + PyObject *raw; + + self->ok = 0; + self->detached = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|nn:BufferedReader", kwlist, + &raw, &buffer_size, &max_buffer_size)) { + return -1; + } + + if (max_buffer_size != -234 && !complain_about_max_buffer_size()) + return -1; + + if (_PyIOBase_check_seekable(raw, Py_True) == NULL) + return -1; + if (_PyIOBase_check_readable(raw, Py_True) == NULL) + return -1; + if (_PyIOBase_check_writable(raw, Py_True) == NULL) + return -1; + + Py_CLEAR(self->raw); + Py_INCREF(raw); + self->raw = raw; + self->buffer_size = buffer_size; + self->readable = 1; + self->writable = 1; + + if (_buffered_init(self) < 0) + return -1; + _bufferedreader_reset_buf(self); + _bufferedwriter_reset_buf(self); + self->pos = 0; + + self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedRandom_Type && + Py_TYPE(raw) == &PyFileIO_Type); + + self->ok = 1; + return 0; +} + +static PyMethodDef bufferedrandom_methods[] = { + /* BufferedIOMixin methods */ + {"close", (PyCFunction)buffered_close, METH_NOARGS}, + {"detach", (PyCFunction)buffered_detach, METH_NOARGS}, + {"seekable", (PyCFunction)buffered_seekable, METH_NOARGS}, + {"readable", (PyCFunction)buffered_readable, METH_NOARGS}, + {"writable", (PyCFunction)buffered_writable, METH_NOARGS}, + {"fileno", (PyCFunction)buffered_fileno, METH_NOARGS}, + {"isatty", (PyCFunction)buffered_isatty, METH_NOARGS}, + + {"flush", (PyCFunction)buffered_flush, METH_NOARGS}, + + {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, + {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, + {"truncate", (PyCFunction)buffered_truncate, METH_VARARGS}, + {"read", (PyCFunction)buffered_read, METH_VARARGS}, + {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, + {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, + {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, + {"write", (PyCFunction)bufferedwriter_write, METH_VARARGS}, + {NULL, NULL} +}; + +static PyMemberDef bufferedrandom_members[] = { + {"raw", T_OBJECT, offsetof(buffered, raw), 0}, + {NULL} +}; + +static PyGetSetDef bufferedrandom_getset[] = { + {"closed", (getter)buffered_closed_get, NULL, NULL}, + {"name", (getter)buffered_name_get, NULL, NULL}, + {"mode", (getter)buffered_mode_get, NULL, NULL}, + {NULL} +}; + + +PyTypeObject PyBufferedRandom_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.BufferedRandom", /*tp_name*/ + sizeof(buffered), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)buffered_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + (reprfunc)buffered_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + bufferedrandom_doc, /* tp_doc */ + (traverseproc)buffered_traverse, /* tp_traverse */ + (inquiry)buffered_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(buffered, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + (iternextfunc)buffered_iternext, /* tp_iternext */ + bufferedrandom_methods, /* tp_methods */ + bufferedrandom_members, /* tp_members */ + bufferedrandom_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /*tp_dict*/ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(buffered, dict), /*tp_dictoffset*/ + (initproc)bufferedrandom_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + diff --git a/Modules/_bytesio.c b/Modules/_io/bytesio.c index c5c14b3..ed2f7cc 100644 --- a/Modules/_bytesio.c +++ b/Modules/_io/bytesio.c @@ -1,4 +1,6 @@ #include "Python.h" +#include "structmember.h" /* for offsetof() */ +#include "_iomodule.h" typedef struct { PyObject_HEAD @@ -6,7 +8,9 @@ typedef struct { Py_ssize_t pos; Py_ssize_t string_size; size_t buf_size; -} BytesIOObject; + PyObject *dict; + PyObject *weakreflist; +} bytesio; #define CHECK_CLOSED(self) \ if ((self)->buf == NULL) { \ @@ -19,7 +23,7 @@ typedef struct { object. Returns the length between the current position to the next newline character. */ static Py_ssize_t -get_line(BytesIOObject *self, char **output) +get_line(bytesio *self, char **output) { char *n; const char *str_end; @@ -52,7 +56,7 @@ get_line(BytesIOObject *self, char **output) The caller should ensure that the 'size' argument is non-negative. Returns 0 on success, -1 otherwise. */ static int -resize_buffer(BytesIOObject *self, size_t size) +resize_buffer(bytesio *self, size_t size) { /* Here, unsigned types are used to avoid dealing with signed integer overflow, which is undefined in C. */ @@ -104,7 +108,7 @@ resize_buffer(BytesIOObject *self, size_t size) /* Internal routine for writing a string of bytes to the buffer of a BytesIO object. Returns the number of bytes wrote, or -1 on error. */ static Py_ssize_t -write_bytes(BytesIOObject *self, const char *bytes, Py_ssize_t len) +write_bytes(bytesio *self, const char *bytes, Py_ssize_t len) { assert(self->buf != NULL); assert(self->pos >= 0); @@ -142,17 +146,19 @@ write_bytes(BytesIOObject *self, const char *bytes, Py_ssize_t len) } static PyObject * -bytesio_get_closed(BytesIOObject *self) +bytesio_get_closed(bytesio *self) { - if (self->buf == NULL) + if (self->buf == NULL) { Py_RETURN_TRUE; - else + } + else { Py_RETURN_FALSE; + } } /* Generic getter for the writable, readable and seekable properties */ static PyObject * -return_true(BytesIOObject *self) +return_true(bytesio *self) { Py_RETURN_TRUE; } @@ -161,7 +167,7 @@ PyDoc_STRVAR(flush_doc, "flush() -> None. Does nothing."); static PyObject * -bytesio_flush(BytesIOObject *self) +bytesio_flush(bytesio *self) { Py_RETURN_NONE; } @@ -172,10 +178,10 @@ PyDoc_STRVAR(getval_doc, "Retrieve the entire contents of the BytesIO object."); static PyObject * -bytesio_getvalue(BytesIOObject *self) +bytesio_getvalue(bytesio *self) { CHECK_CLOSED(self); - return PyString_FromStringAndSize(self->buf, self->string_size); + return PyBytes_FromStringAndSize(self->buf, self->string_size); } PyDoc_STRVAR(isatty_doc, @@ -185,7 +191,7 @@ PyDoc_STRVAR(isatty_doc, "to a tty-like device."); static PyObject * -bytesio_isatty(BytesIOObject *self) +bytesio_isatty(bytesio *self) { CHECK_CLOSED(self); Py_RETURN_FALSE; @@ -195,10 +201,10 @@ PyDoc_STRVAR(tell_doc, "tell() -> current file position, an integer\n"); static PyObject * -bytesio_tell(BytesIOObject *self) +bytesio_tell(bytesio *self) { CHECK_CLOSED(self); - return PyInt_FromSsize_t(self->pos); + return PyLong_FromSsize_t(self->pos); } PyDoc_STRVAR(read_doc, @@ -208,7 +214,7 @@ PyDoc_STRVAR(read_doc, "Return an empty string at EOF."); static PyObject * -bytesio_read(BytesIOObject *self, PyObject *args) +bytesio_read(bytesio *self, PyObject *args) { Py_ssize_t size, n; char *output; @@ -219,8 +225,8 @@ bytesio_read(BytesIOObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "|O:read", &arg)) return NULL; - if (PyInt_Check(arg)) { - size = PyInt_AsSsize_t(arg); + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); if (size == -1 && PyErr_Occurred()) return NULL; } @@ -246,7 +252,7 @@ bytesio_read(BytesIOObject *self, PyObject *args) output = self->buf + self->pos; self->pos += size; - return PyString_FromStringAndSize(output, size); + return PyBytes_FromStringAndSize(output, size); } @@ -257,7 +263,7 @@ PyDoc_STRVAR(read1_doc, "Return an empty string at EOF."); static PyObject * -bytesio_read1(BytesIOObject *self, PyObject *n) +bytesio_read1(bytesio *self, PyObject *n) { PyObject *arg, *res; @@ -277,7 +283,7 @@ PyDoc_STRVAR(readline_doc, "Return an empty string at EOF.\n"); static PyObject * -bytesio_readline(BytesIOObject *self, PyObject *args) +bytesio_readline(bytesio *self, PyObject *args) { Py_ssize_t size, n; char *output; @@ -288,8 +294,8 @@ bytesio_readline(BytesIOObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "|O:readline", &arg)) return NULL; - if (PyInt_Check(arg)) { - size = PyInt_AsSsize_t(arg); + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); if (size == -1 && PyErr_Occurred()) return NULL; } @@ -311,7 +317,7 @@ bytesio_readline(BytesIOObject *self, PyObject *args) self->pos -= size; } - return PyString_FromStringAndSize(output, n); + return PyBytes_FromStringAndSize(output, n); } PyDoc_STRVAR(readlines_doc, @@ -322,7 +328,7 @@ PyDoc_STRVAR(readlines_doc, "total number of bytes in the lines returned.\n"); static PyObject * -bytesio_readlines(BytesIOObject *self, PyObject *args) +bytesio_readlines(bytesio *self, PyObject *args) { Py_ssize_t maxsize, size, n; PyObject *result, *line; @@ -334,8 +340,8 @@ bytesio_readlines(BytesIOObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "|O:readlines", &arg)) return NULL; - if (PyInt_Check(arg)) { - maxsize = PyInt_AsSsize_t(arg); + if (PyNumber_Check(arg)) { + maxsize = PyNumber_AsSsize_t(arg, PyExc_OverflowError); if (maxsize == -1 && PyErr_Occurred()) return NULL; } @@ -355,7 +361,7 @@ bytesio_readlines(BytesIOObject *self, PyObject *args) return NULL; while ((n = get_line(self, &output)) != 0) { - line = PyString_FromStringAndSize(output, n); + line = PyBytes_FromStringAndSize(output, n); if (!line) goto on_error; if (PyList_Append(result, line) == -1) { @@ -381,25 +387,27 @@ PyDoc_STRVAR(readinto_doc, "is set not to block as has no data to read."); static PyObject * -bytesio_readinto(BytesIOObject *self, PyObject *buffer) +bytesio_readinto(bytesio *self, PyObject *args) { - void *raw_buffer; + Py_buffer buf; Py_ssize_t len; CHECK_CLOSED(self); - if (PyObject_AsWriteBuffer(buffer, &raw_buffer, &len) == -1) + if (!PyArg_ParseTuple(args, "w*", &buf)) return NULL; + len = buf.len; if (self->pos + len > self->string_size) len = self->string_size - self->pos; - memcpy(raw_buffer, self->buf + self->pos, len); + memcpy(buf.buf, self->buf + self->pos, len); assert(self->pos + len < PY_SSIZE_T_MAX); assert(len >= 0); self->pos += len; - return PyInt_FromSsize_t(len); + PyBuffer_Release(&buf); + return PyLong_FromSsize_t(len); } PyDoc_STRVAR(truncate_doc, @@ -409,7 +417,7 @@ PyDoc_STRVAR(truncate_doc, "Returns the new size. Imply an absolute seek to the position size."); static PyObject * -bytesio_truncate(BytesIOObject *self, PyObject *args) +bytesio_truncate(bytesio *self, PyObject *args) { Py_ssize_t size; PyObject *arg = Py_None; @@ -419,8 +427,8 @@ bytesio_truncate(BytesIOObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) return NULL; - if (PyInt_Check(arg)) { - size = PyInt_AsSsize_t(arg); + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); if (size == -1 && PyErr_Occurred()) return NULL; } @@ -447,11 +455,11 @@ bytesio_truncate(BytesIOObject *self, PyObject *args) } self->pos = size; - return PyInt_FromSsize_t(size); + return PyLong_FromSsize_t(size); } static PyObject * -bytesio_iternext(BytesIOObject *self) +bytesio_iternext(bytesio *self) { char *next; Py_ssize_t n; @@ -463,7 +471,7 @@ bytesio_iternext(BytesIOObject *self) if (!next || n == 0) return NULL; - return PyString_FromStringAndSize(next, n); + return PyBytes_FromStringAndSize(next, n); } PyDoc_STRVAR(seek_doc, @@ -476,27 +484,21 @@ PyDoc_STRVAR(seek_doc, "Returns the new absolute position."); static PyObject * -bytesio_seek(BytesIOObject *self, PyObject *args) +bytesio_seek(bytesio *self, PyObject *args) { - PyObject *pos_obj, *mode_obj; + PyObject *posobj; Py_ssize_t pos; int mode = 0; CHECK_CLOSED(self); - /* Special-case for 2.x to prevent floats from passing through. - This only needed to make a test in test_io succeed. */ - if (!PyArg_UnpackTuple(args, "seek", 1, 2, &pos_obj, &mode_obj)) + if (!PyArg_ParseTuple(args, "O|i:seek", &posobj, &mode)) return NULL; - if (PyFloat_Check(pos_obj)) { - PyErr_SetString(PyExc_TypeError, - "position argument must be an integer"); - return NULL; - } - if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode)) + pos = PyNumber_AsSsize_t(posobj, PyExc_OverflowError); + if (pos == -1 && PyErr_Occurred()) return NULL; - + if (pos < 0 && mode == 0) { PyErr_Format(PyExc_ValueError, "negative seek value %zd", pos); @@ -532,7 +534,7 @@ bytesio_seek(BytesIOObject *self, PyObject *args) pos = 0; self->pos = pos; - return PyInt_FromSsize_t(self->pos); + return PyLong_FromSsize_t(self->pos); } PyDoc_STRVAR(write_doc, @@ -541,31 +543,24 @@ PyDoc_STRVAR(write_doc, "Return the number of bytes written."); static PyObject * -bytesio_write(BytesIOObject *self, PyObject *obj) +bytesio_write(bytesio *self, PyObject *obj) { - const char *bytes; - Py_ssize_t size; Py_ssize_t n = 0; + Py_buffer buf; + PyObject *result = NULL; CHECK_CLOSED(self); - /* Special-case in 2.x to prevent unicode objects to pass through. */ - if (PyUnicode_Check(obj)) { - PyErr_SetString(PyExc_TypeError, - "expecting a bytes object, got unicode"); + if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0) return NULL; - } - if (PyObject_AsReadBuffer(obj, (void *)&bytes, &size) < 0) - return NULL; + if (buf.len != 0) + n = write_bytes(self, buf.buf, buf.len); + if (n >= 0) + result = PyLong_FromSsize_t(n); - if (size != 0) { - n = write_bytes(self, bytes, size); - if (n < 0) - return NULL; - } - - return PyInt_FromSsize_t(n); + PyBuffer_Release(&buf); + return result; } PyDoc_STRVAR(writelines_doc, @@ -576,7 +571,7 @@ PyDoc_STRVAR(writelines_doc, "each string."); static PyObject * -bytesio_writelines(BytesIOObject *self, PyObject *v) +bytesio_writelines(bytesio *self, PyObject *v) { PyObject *it, *item; PyObject *ret; @@ -609,7 +604,7 @@ PyDoc_STRVAR(close_doc, "close() -> None. Disable all I/O operations."); static PyObject * -bytesio_close(BytesIOObject *self) +bytesio_close(bytesio *self) { if (self->buf != NULL) { PyMem_Free(self->buf); @@ -619,22 +614,23 @@ bytesio_close(BytesIOObject *self) } static void -bytesio_dealloc(BytesIOObject *self) +bytesio_dealloc(bytesio *self) { if (self->buf != NULL) { PyMem_Free(self->buf); self->buf = NULL; } + Py_TYPE(self)->tp_clear((PyObject *)self); Py_TYPE(self)->tp_free(self); } static PyObject * bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - BytesIOObject *self; + bytesio *self; assert(type != NULL && type->tp_alloc != NULL); - self = (BytesIOObject *)type->tp_alloc(type, 0); + self = (bytesio *)type->tp_alloc(type, 0); if (self == NULL) return NULL; @@ -651,7 +647,7 @@ bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } static int -bytesio_init(BytesIOObject *self, PyObject *args, PyObject *kwds) +bytesio_init(bytesio *self, PyObject *args, PyObject *kwds) { PyObject *initvalue = NULL; @@ -674,10 +670,28 @@ bytesio_init(BytesIOObject *self, PyObject *args, PyObject *kwds) return 0; } +static int +bytesio_traverse(bytesio *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + Py_VISIT(self->weakreflist); + return 0; +} + +static int +bytesio_clear(bytesio *self) +{ + Py_CLEAR(self->dict); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *)self); + return 0; +} + + static PyGetSetDef bytesio_getsetlist[] = { {"closed", (getter)bytesio_get_closed, NULL, "True if the file is closed."}, - {0}, /* sentinel */ + {NULL}, /* sentinel */ }; static struct PyMethodDef bytesio_methods[] = { @@ -691,7 +705,7 @@ static struct PyMethodDef bytesio_methods[] = { {"write", (PyCFunction)bytesio_write, METH_O, write_doc}, {"writelines", (PyCFunction)bytesio_writelines, METH_O, writelines_doc}, {"read1", (PyCFunction)bytesio_read1, METH_O, read1_doc}, - {"readinto", (PyCFunction)bytesio_readinto, METH_O, readinto_doc}, + {"readinto", (PyCFunction)bytesio_readinto, METH_VARARGS, readinto_doc}, {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc}, {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc}, {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc}, @@ -707,16 +721,16 @@ PyDoc_STRVAR(bytesio_doc, "Create a buffered I/O implementation using an in-memory bytes\n" "buffer, ready for reading and writing."); -static PyTypeObject BytesIO_Type = { +PyTypeObject PyBytesIO_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "_bytesio._BytesIO", /*tp_name*/ - sizeof(BytesIOObject), /*tp_basicsize*/ + "_io.BytesIO", /*tp_name*/ + sizeof(bytesio), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)bytesio_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ - 0, /*tp_compare*/ + 0, /*tp_reserved*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ @@ -727,12 +741,13 @@ static PyTypeObject BytesIO_Type = { 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_HAVE_GC, /*tp_flags*/ bytesio_doc, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ + (traverseproc)bytesio_traverse, /*tp_traverse*/ + (inquiry)bytesio_clear, /*tp_clear*/ 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ + offsetof(bytesio, weakreflist), /*tp_weaklistoffset*/ PyObject_SelfIter, /*tp_iter*/ (iternextfunc)bytesio_iternext, /*tp_iternext*/ bytesio_methods, /*tp_methods*/ @@ -742,22 +757,8 @@ static PyTypeObject BytesIO_Type = { 0, /*tp_dict*/ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ - 0, /*tp_dictoffset*/ + offsetof(bytesio, dict), /*tp_dictoffset*/ (initproc)bytesio_init, /*tp_init*/ 0, /*tp_alloc*/ bytesio_new, /*tp_new*/ }; - -PyMODINIT_FUNC -init_bytesio(void) -{ - PyObject *m; - - if (PyType_Ready(&BytesIO_Type) < 0) - return; - m = Py_InitModule("_bytesio", NULL); - if (m == NULL) - return; - Py_INCREF(&BytesIO_Type); - PyModule_AddObject(m, "_BytesIO", (PyObject *)&BytesIO_Type); -} diff --git a/Modules/_fileio.c b/Modules/_io/fileio.c index efe0855..917ad63 100644 --- a/Modules/_fileio.c +++ b/Modules/_io/fileio.c @@ -6,6 +6,7 @@ #include <sys/stat.h> #include <fcntl.h> #include <stddef.h> /* For offsetof */ +#include "_iomodule.h" /* * Known likely problems: @@ -27,6 +28,20 @@ #include <windows.h> #endif +#if BUFSIZ < (8*1024) +#define SMALLCHUNK (8*1024) +#elif (BUFSIZ >= (2 << 25)) +#error "unreasonable BUFSIZ > 64MB defined" +#else +#define SMALLCHUNK BUFSIZ +#endif + +#if SIZEOF_INT < 4 +#define BIGCHUNK (512 * 32) +#else +#define BIGCHUNK (512 * 1024) +#endif + typedef struct { PyObject_HEAD int fd; @@ -35,55 +50,76 @@ typedef struct { int seekable : 2; /* -1 means unknown */ int closefd : 1; PyObject *weakreflist; -} PyFileIOObject; + PyObject *dict; +} fileio; PyTypeObject PyFileIO_Type; #define PyFileIO_Check(op) (PyObject_TypeCheck((op), &PyFileIO_Type)) +int +_PyFileIO_closed(PyObject *self) +{ + return ((fileio *)self)->fd < 0; +} + static PyObject * portable_lseek(int fd, PyObject *posobj, int whence); -/* Returns 0 on success, errno (which is < 0) on failure. */ +static PyObject *portable_lseek(int fd, PyObject *posobj, int whence); + +/* Returns 0 on success, -1 with exception set on failure. */ static int -internal_close(PyFileIOObject *self) +internal_close(fileio *self) { + int err = 0; int save_errno = 0; if (self->fd >= 0) { int fd = self->fd; self->fd = -1; - Py_BEGIN_ALLOW_THREADS - if (close(fd) < 0) + /* fd is accessible and someone else may have closed it */ + if (_PyVerify_fd(fd)) { + Py_BEGIN_ALLOW_THREADS + err = close(fd); + if (err < 0) + save_errno = errno; + Py_END_ALLOW_THREADS + } else { save_errno = errno; - Py_END_ALLOW_THREADS + err = -1; + } } - return save_errno; + if (err < 0) { + errno = save_errno; + PyErr_SetFromErrno(PyExc_IOError); + return -1; + } + return 0; } static PyObject * -fileio_close(PyFileIOObject *self) +fileio_close(fileio *self) { if (!self->closefd) { self->fd = -1; Py_RETURN_NONE; } errno = internal_close(self); - if (errno < 0) { - PyErr_SetFromErrno(PyExc_IOError); + if (errno < 0) return NULL; - } - Py_RETURN_NONE; + return PyObject_CallMethod((PyObject*)&PyRawIOBase_Type, + "close", "O", self); } static PyObject * -fileio_new(PyTypeObject *type, PyObject *args, PyObject *kews) +fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - PyFileIOObject *self; + fileio *self; assert(type != NULL && type->tp_alloc != NULL); - self = (PyFileIOObject *) type->tp_alloc(type, 0); + self = (fileio *) type->tp_alloc(type, 0); if (self != NULL) { self->fd = -1; self->readable = 0; @@ -101,7 +137,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kews) directories, so we need a check. */ static int -dircheck(PyFileIOObject* self, char *name) +dircheck(fileio* self, const char *name) { #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR) struct stat buf; @@ -110,7 +146,8 @@ dircheck(PyFileIOObject* self, char *name) if (fstat(self->fd, &buf) == 0 && S_ISDIR(buf.st_mode)) { char *msg = strerror(EISDIR); PyObject *exc; - internal_close(self); + if (internal_close(self)) + return -1; exc = PyObject_CallFunction(PyExc_IOError, "(iss)", EISDIR, msg, name); @@ -144,9 +181,10 @@ check_fd(int fd) static int fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) { - PyFileIOObject *self = (PyFileIOObject *) oself; + fileio *self = (fileio *) oself; static char *kwlist[] = {"file", "mode", "closefd", NULL}; - char *name = NULL; + const char *name = NULL; + PyObject *nameobj, *stringobj = NULL; char *mode = "r"; char *s; #ifdef MS_WINDOWS @@ -165,42 +203,59 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) return -1; } - if (PyArg_ParseTupleAndKeywords(args, kwds, "i|si:fileio", - kwlist, &fd, &mode, &closefd)) { - if (fd < 0) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:fileio", + kwlist, &nameobj, &mode, &closefd)) + return -1; + + if (PyFloat_Check(nameobj)) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float"); + return -1; + } + + fd = PyLong_AsLong(nameobj); + if (fd < 0) { + if (!PyErr_Occurred()) { PyErr_SetString(PyExc_ValueError, "Negative filedescriptor"); return -1; } - if (check_fd(fd)) - return -1; - } - else { PyErr_Clear(); + } #ifdef MS_WINDOWS - if (GetVersion() < 0x80000000) { + if (GetVersion() < 0x80000000) { /* On NT, so wide API available */ - PyObject *po; - if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:fileio", - kwlist, &po, &mode, &closefd) - ) { - widename = PyUnicode_AS_UNICODE(po); - } else { - /* Drop the argument parsing error as narrow - strings are also valid. */ - PyErr_Clear(); - } - } - if (widename == NULL) + if (PyUnicode_Check(nameobj)) + widename = PyUnicode_AS_UNICODE(nameobj); + } + if (widename == NULL) #endif - { - if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:fileio", - kwlist, - Py_FileSystemDefaultEncoding, - &name, &mode, &closefd)) - return -1; - } + if (fd < 0) + { + if (PyBytes_Check(nameobj) || PyByteArray_Check(nameobj)) { + Py_ssize_t namelen; + if (PyObject_AsCharBuffer(nameobj, &name, &namelen) < 0) + return -1; + } + else { + PyObject *u = PyUnicode_FromObject(nameobj); + + if (u == NULL) + return -1; + + stringobj = PyUnicode_AsEncodedString( + u, Py_FileSystemDefaultEncoding, "surrogateescape"); + Py_DECREF(u); + if (stringobj == NULL) + return -1; + if (!PyBytes_Check(stringobj)) { + PyErr_SetString(PyExc_TypeError, + "encoder failed to return bytes"); + goto error; + } + name = PyBytes_AS_STRING(stringobj); + } } s = mode; @@ -266,6 +321,8 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) #endif if (fd >= 0) { + if (check_fd(fd)) + goto error; self->fd = fd; self->closefd = closefd; } @@ -299,6 +356,9 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) goto error; } + if (PyObject_SetAttrString((PyObject *)self, "name", nameobj) < 0) + goto error; + if (append) { /* For consistent behaviour, we explicitly seek to the end of file (otherwise, it might be done only on the @@ -315,24 +375,33 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) ret = -1; done: - PyMem_Free(name); + Py_CLEAR(stringobj); return ret; } +static int +fileio_traverse(fileio *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + return 0; +} + +static int +fileio_clear(fileio *self) +{ + Py_CLEAR(self->dict); + return 0; +} + static void -fileio_dealloc(PyFileIOObject *self) +fileio_dealloc(fileio *self) { + if (_PyIOBase_finalize((PyObject *) self) < 0) + return; + _PyObject_GC_UNTRACK(self); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); - - if (self->fd >= 0 && self->closefd) { - errno = internal_close(self); - if (errno < 0) { - PySys_WriteStderr("close failed: [Errno %d] %s\n", - errno, strerror(errno)); - } - } - + Py_CLEAR(self->dict); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -351,7 +420,7 @@ err_mode(char *action) } static PyObject * -fileio_fileno(PyFileIOObject *self) +fileio_fileno(fileio *self) { if (self->fd < 0) return err_closed(); @@ -359,7 +428,7 @@ fileio_fileno(PyFileIOObject *self) } static PyObject * -fileio_readable(PyFileIOObject *self) +fileio_readable(fileio *self) { if (self->fd < 0) return err_closed(); @@ -367,7 +436,7 @@ fileio_readable(PyFileIOObject *self) } static PyObject * -fileio_writable(PyFileIOObject *self) +fileio_writable(fileio *self) { if (self->fd < 0) return err_closed(); @@ -375,25 +444,25 @@ fileio_writable(PyFileIOObject *self) } static PyObject * -fileio_seekable(PyFileIOObject *self) +fileio_seekable(fileio *self) { if (self->fd < 0) return err_closed(); if (self->seekable < 0) { - int ret; - Py_BEGIN_ALLOW_THREADS - ret = lseek(self->fd, 0, SEEK_CUR); - Py_END_ALLOW_THREADS - if (ret < 0) + PyObject *pos = portable_lseek(self->fd, NULL, SEEK_CUR); + if (pos == NULL) { + PyErr_Clear(); self->seekable = 0; - else + } else { + Py_DECREF(pos); self->seekable = 1; + } } return PyBool_FromLong((long) self->seekable); } static PyObject * -fileio_readinto(PyFileIOObject *self, PyObject *args) +fileio_readinto(fileio *self, PyObject *args) { Py_buffer pbuf; Py_ssize_t n; @@ -406,10 +475,13 @@ fileio_readinto(PyFileIOObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "w*", &pbuf)) return NULL; - Py_BEGIN_ALLOW_THREADS - errno = 0; - n = read(self->fd, pbuf.buf, pbuf.len); - Py_END_ALLOW_THREADS + if (_PyVerify_fd(self->fd)) { + Py_BEGIN_ALLOW_THREADS + errno = 0; + n = read(self->fd, pbuf.buf, pbuf.len); + Py_END_ALLOW_THREADS + } else + n = -1; PyBuffer_Release(&pbuf); if (n < 0) { if (errno == EAGAIN) @@ -421,23 +493,62 @@ fileio_readinto(PyFileIOObject *self, PyObject *args) return PyLong_FromSsize_t(n); } -#define DEFAULT_BUFFER_SIZE (8*1024) +static size_t +new_buffersize(fileio *self, size_t currentsize) +{ +#ifdef HAVE_FSTAT + off_t pos, end; + struct stat st; + if (fstat(self->fd, &st) == 0) { + end = st.st_size; + pos = lseek(self->fd, 0L, SEEK_CUR); + /* Files claiming a size smaller than SMALLCHUNK may + actually be streaming pseudo-files. In this case, we + apply the more aggressive algorithm below. + */ + if (end >= SMALLCHUNK && end >= pos && pos >= 0) { + /* Add 1 so if the file were to grow we'd notice. */ + return currentsize + end - pos + 1; + } + } +#endif + if (currentsize > SMALLCHUNK) { + /* Keep doubling until we reach BIGCHUNK; + then keep adding BIGCHUNK. */ + if (currentsize <= BIGCHUNK) + return currentsize + currentsize; + else + return currentsize + BIGCHUNK; + } + return currentsize + SMALLCHUNK; +} static PyObject * -fileio_readall(PyFileIOObject *self) +fileio_readall(fileio *self) { PyObject *result; Py_ssize_t total = 0; int n; - result = PyString_FromStringAndSize(NULL, DEFAULT_BUFFER_SIZE); + if (!_PyVerify_fd(self->fd)) + return PyErr_SetFromErrno(PyExc_IOError); + + result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK); if (result == NULL) return NULL; while (1) { - Py_ssize_t newsize = total + DEFAULT_BUFFER_SIZE; - if (PyString_GET_SIZE(result) < newsize) { - if (_PyString_Resize(&result, newsize) < 0) { + size_t newsize = new_buffersize(self, total); + if (newsize > PY_SSIZE_T_MAX || newsize <= 0) { + PyErr_SetString(PyExc_OverflowError, + "unbounded read returned more bytes " + "than a Python string can hold "); + Py_DECREF(result); + return NULL; + } + + if (PyBytes_GET_SIZE(result) < (Py_ssize_t)newsize) { + if (_PyBytes_Resize(&result, newsize) < 0) { if (total == 0) { Py_DECREF(result); return NULL; @@ -449,7 +560,7 @@ fileio_readall(PyFileIOObject *self) Py_BEGIN_ALLOW_THREADS errno = 0; n = read(self->fd, - PyString_AS_STRING(result) + total, + PyBytes_AS_STRING(result) + total, newsize - total); Py_END_ALLOW_THREADS if (n == 0) @@ -468,8 +579,8 @@ fileio_readall(PyFileIOObject *self) total += n; } - if (PyString_GET_SIZE(result) > total) { - if (_PyString_Resize(&result, total) < 0) { + if (PyBytes_GET_SIZE(result) > total) { + if (_PyBytes_Resize(&result, total) < 0) { /* This should never happen, but just in case */ Py_DECREF(result); return NULL; @@ -479,7 +590,7 @@ fileio_readall(PyFileIOObject *self) } static PyObject * -fileio_read(PyFileIOObject *self, PyObject *args) +fileio_read(fileio *self, PyObject *args) { char *ptr; Py_ssize_t n; @@ -498,17 +609,21 @@ fileio_read(PyFileIOObject *self, PyObject *args) return fileio_readall(self); } - bytes = PyString_FromStringAndSize(NULL, size); + bytes = PyBytes_FromStringAndSize(NULL, size); if (bytes == NULL) return NULL; - ptr = PyString_AS_STRING(bytes); + ptr = PyBytes_AS_STRING(bytes); - Py_BEGIN_ALLOW_THREADS - errno = 0; - n = read(self->fd, ptr, size); - Py_END_ALLOW_THREADS + if (_PyVerify_fd(self->fd)) { + Py_BEGIN_ALLOW_THREADS + errno = 0; + n = read(self->fd, ptr, size); + Py_END_ALLOW_THREADS + } else + n = -1; if (n < 0) { + Py_DECREF(bytes); if (errno == EAGAIN) Py_RETURN_NONE; PyErr_SetFromErrno(PyExc_IOError); @@ -516,7 +631,7 @@ fileio_read(PyFileIOObject *self, PyObject *args) } if (n != size) { - if (_PyString_Resize(&bytes, n) < 0) { + if (_PyBytes_Resize(&bytes, n) < 0) { Py_DECREF(bytes); return NULL; } @@ -526,7 +641,7 @@ fileio_read(PyFileIOObject *self, PyObject *args) } static PyObject * -fileio_write(PyFileIOObject *self, PyObject *args) +fileio_write(fileio *self, PyObject *args) { Py_buffer pbuf; Py_ssize_t n; @@ -539,10 +654,13 @@ fileio_write(PyFileIOObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "s*", &pbuf)) return NULL; - Py_BEGIN_ALLOW_THREADS - errno = 0; - n = write(self->fd, pbuf.buf, pbuf.len); - Py_END_ALLOW_THREADS + if (_PyVerify_fd(self->fd)) { + Py_BEGIN_ALLOW_THREADS + errno = 0; + n = write(self->fd, pbuf.buf, pbuf.len); + Py_END_ALLOW_THREADS + } else + n = -1; PyBuffer_Release(&pbuf); @@ -558,12 +676,6 @@ fileio_write(PyFileIOObject *self, PyObject *args) /* XXX Windows support below is likely incomplete */ -#if defined(MS_WIN64) || defined(MS_WINDOWS) -typedef PY_LONG_LONG Py_off_t; -#else -typedef off_t Py_off_t; -#endif - /* Cribbed from posix_lseek() */ static PyObject * portable_lseek(int fd, PyObject *posobj, int whence) @@ -601,13 +713,16 @@ portable_lseek(int fd, PyObject *posobj, int whence) return NULL; } - Py_BEGIN_ALLOW_THREADS + if (_PyVerify_fd(fd)) { + Py_BEGIN_ALLOW_THREADS #if defined(MS_WIN64) || defined(MS_WINDOWS) - res = _lseeki64(fd, pos, whence); + res = _lseeki64(fd, pos, whence); #else - res = lseek(fd, pos, whence); + res = lseek(fd, pos, whence); #endif - Py_END_ALLOW_THREADS + Py_END_ALLOW_THREADS + } else + res = -1; if (res < 0) return PyErr_SetFromErrno(PyExc_IOError); @@ -619,7 +734,7 @@ portable_lseek(int fd, PyObject *posobj, int whence) } static PyObject * -fileio_seek(PyFileIOObject *self, PyObject *args) +fileio_seek(fileio *self, PyObject *args) { PyObject *posobj; int whence = 0; @@ -634,7 +749,7 @@ fileio_seek(PyFileIOObject *self, PyObject *args) } static PyObject * -fileio_tell(PyFileIOObject *self, PyObject *args) +fileio_tell(fileio *self, PyObject *args) { if (self->fd < 0) return err_closed(); @@ -644,7 +759,7 @@ fileio_tell(PyFileIOObject *self, PyObject *args) #ifdef HAVE_FTRUNCATE static PyObject * -fileio_truncate(PyFileIOObject *self, PyObject *args) +fileio_truncate(fileio *self, PyObject *args) { PyObject *posobj = NULL; Py_off_t pos; @@ -670,13 +785,15 @@ fileio_truncate(PyFileIOObject *self, PyObject *args) /* Move to the position to be truncated. */ posobj = portable_lseek(fd, posobj, 0); } + if (posobj == NULL) + return NULL; #if defined(HAVE_LARGEFILE_SUPPORT) pos = PyLong_AsLongLong(posobj); #else pos = PyLong_AsLong(posobj); #endif - if (PyErr_Occurred()) + if (pos == -1 && PyErr_Occurred()) return NULL; #ifdef MS_WINDOWS @@ -714,7 +831,7 @@ fileio_truncate(PyFileIOObject *self, PyObject *args) #endif static char * -mode_string(PyFileIOObject *self) +mode_string(fileio *self) { if (self->readable) { if (self->writable) @@ -727,17 +844,37 @@ mode_string(PyFileIOObject *self) } static PyObject * -fileio_repr(PyFileIOObject *self) +fileio_repr(fileio *self) { + PyObject *nameobj, *res; + if (self->fd < 0) - return PyString_FromFormat("_fileio._FileIO(-1)"); + return PyString_FromFormat("<_io.FileIO [closed]>"); - return PyString_FromFormat("_fileio._FileIO(%d, '%s')", - self->fd, mode_string(self)); + nameobj = PyObject_GetAttrString((PyObject *) self, "name"); + if (nameobj == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + return NULL; + res = PyString_FromFormat("<_io.FileIO fd=%d mode='%s'>", + self->fd, mode_string(self)); + } + else { + PyObject *repr = PyObject_Repr(nameobj); + Py_DECREF(nameobj); + if (repr == NULL) + return NULL; + res = PyString_FromFormat("<_io.FileIO name=%s mode='%s'>", + PyString_AS_STRING(repr), + mode_string(self)); + Py_DECREF(repr); + } + return res; } static PyObject * -fileio_isatty(PyFileIOObject *self) +fileio_isatty(fileio *self) { long res; @@ -806,7 +943,7 @@ PyDoc_STRVAR(tell_doc, "tell() -> int. Current file position"); PyDoc_STRVAR(readinto_doc, -"readinto() -> Undocumented. Don't use this; it may go away."); +"readinto() -> Same as RawIOBase.readinto()."); PyDoc_STRVAR(close_doc, "close() -> None. Close the file.\n" @@ -848,21 +985,21 @@ static PyMethodDef fileio_methods[] = { /* 'closed' and 'mode' are attributes for backwards compatibility reasons. */ static PyObject * -get_closed(PyFileIOObject *self, void *closure) +get_closed(fileio *self, void *closure) { return PyBool_FromLong((long)(self->fd < 0)); } static PyObject * -get_closefd(PyFileIOObject *self, void *closure) +get_closefd(fileio *self, void *closure) { return PyBool_FromLong((long)(self->closefd)); } static PyObject * -get_mode(PyFileIOObject *self, void *closure) +get_mode(fileio *self, void *closure) { - return PyString_FromString(mode_string(self)); + return PyUnicode_FromString(mode_string(self)); } static PyGetSetDef fileio_getsetlist[] = { @@ -870,19 +1007,19 @@ static PyGetSetDef fileio_getsetlist[] = { {"closefd", (getter)get_closefd, NULL, "True if the file descriptor will be closed"}, {"mode", (getter)get_mode, NULL, "String giving the file mode"}, - {0}, + {NULL}, }; PyTypeObject PyFileIO_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "_FileIO", - sizeof(PyFileIOObject), + "_io.FileIO", + sizeof(fileio), 0, (destructor)fileio_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ - 0, /* tp_compare */ + 0, /* tp_reserved */ (reprfunc)fileio_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ @@ -893,12 +1030,13 @@ PyTypeObject PyFileIO_Type = { PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /* tp_flags */ fileio_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ + (traverseproc)fileio_traverse, /* tp_traverse */ + (inquiry)fileio_clear, /* tp_clear */ 0, /* tp_richcompare */ - offsetof(PyFileIOObject, weakreflist), /* tp_weaklistoffset */ + offsetof(fileio, weakreflist), /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ fileio_methods, /* tp_methods */ @@ -908,28 +1046,9 @@ PyTypeObject PyFileIO_Type = { 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ + offsetof(fileio, dict), /* tp_dictoffset */ fileio_init, /* tp_init */ PyType_GenericAlloc, /* tp_alloc */ fileio_new, /* tp_new */ - PyObject_Del, /* tp_free */ + PyObject_GC_Del, /* tp_free */ }; - -static PyMethodDef module_methods[] = { - {NULL, NULL} -}; - -PyMODINIT_FUNC -init_fileio(void) -{ - PyObject *m; /* a module object */ - - m = Py_InitModule3("_fileio", module_methods, - "Fast implementation of io.FileIO."); - if (m == NULL) - return; - if (PyType_Ready(&PyFileIO_Type) < 0) - return; - Py_INCREF(&PyFileIO_Type); - PyModule_AddObject(m, "_FileIO", (PyObject *) &PyFileIO_Type); -} diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c new file mode 100644 index 0000000..38080f7 --- /dev/null +++ b/Modules/_io/iobase.c @@ -0,0 +1,894 @@ +/* + An implementation of the I/O abstract base classes hierarchy + as defined by PEP 3116 - "New I/O" + + Classes defined here: IOBase, RawIOBase. + + Written by Amaury Forgeot d'Arc and Antoine Pitrou +*/ + + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "_iomodule.h" + +/* + * IOBase class, an abstract class + */ + +typedef struct { + PyObject_HEAD + + PyObject *dict; + PyObject *weakreflist; +} iobase; + +PyDoc_STRVAR(iobase_doc, + "The abstract base class for all I/O classes, acting on streams of\n" + "bytes. There is no public constructor.\n" + "\n" + "This class provides dummy implementations for many methods that\n" + "derived classes can override selectively; the default implementations\n" + "represent a file that cannot be read, written or seeked.\n" + "\n" + "Even though IOBase does not declare read, readinto, or write because\n" + "their signatures will vary, implementations and clients should\n" + "consider those methods part of the interface. Also, implementations\n" + "may raise a IOError when operations they do not support are called.\n" + "\n" + "The basic type used for binary data read from or written to a file is\n" + "bytes. bytearrays are accepted too, and in some cases (such as\n" + "readinto) needed. Text I/O classes work with str data.\n" + "\n" + "Note that calling any method (even inquiries) on a closed stream is\n" + "undefined. Implementations may raise IOError in this case.\n" + "\n" + "IOBase (and its subclasses) support the iterator protocol, meaning\n" + "that an IOBase object can be iterated over yielding the lines in a\n" + "stream.\n" + "\n" + "IOBase also supports the :keyword:`with` statement. In this example,\n" + "fp is closed after the suite of the with statment is complete:\n" + "\n" + "with open('spam.txt', 'r') as fp:\n" + " fp.write('Spam and eggs!')\n"); + +/* Use this macro whenever you want to check the internal `closed` status + of the IOBase object rather than the virtual `closed` attribute as returned + by whatever subclass. */ + +#define IS_CLOSED(self) \ + PyObject_HasAttrString(self, "__IOBase_closed") + +/* Internal methods */ +static PyObject * +iobase_unsupported(const char *message) +{ + PyErr_SetString(_PyIO_unsupported_operation, message); + return NULL; +} + +/* Positionning */ + +PyDoc_STRVAR(iobase_seek_doc, + "Change stream position.\n" + "\n" + "Change the stream position to byte offset offset. offset is\n" + "interpreted relative to the position indicated by whence. Values\n" + "for whence are:\n" + "\n" + "* 0 -- start of stream (the default); offset should be zero or positive\n" + "* 1 -- current stream position; offset may be negative\n" + "* 2 -- end of stream; offset is usually negative\n" + "\n" + "Return the new absolute position."); + +static PyObject * +iobase_seek(PyObject *self, PyObject *args) +{ + return iobase_unsupported("seek"); +} + +PyDoc_STRVAR(iobase_tell_doc, + "Return current stream position."); + +static PyObject * +iobase_tell(PyObject *self, PyObject *args) +{ + return PyObject_CallMethod(self, "seek", "ii", 0, 1); +} + +PyDoc_STRVAR(iobase_truncate_doc, + "Truncate file to size bytes.\n" + "\n" + "Size defaults to the current IO position as reported by tell(). Return\n" + "the new size."); + +static PyObject * +iobase_truncate(PyObject *self, PyObject *args) +{ + return iobase_unsupported("truncate"); +} + +/* Flush and close methods */ + +PyDoc_STRVAR(iobase_flush_doc, + "Flush write buffers, if applicable.\n" + "\n" + "This is not implemented for read-only and non-blocking streams.\n"); + +static PyObject * +iobase_flush(PyObject *self, PyObject *args) +{ + /* XXX Should this return the number of bytes written??? */ + if (IS_CLOSED(self)) { + PyErr_SetString(PyExc_ValueError, "I/O operation on closed file."); + return NULL; + } + Py_RETURN_NONE; +} + +PyDoc_STRVAR(iobase_close_doc, + "Flush and close the IO object.\n" + "\n" + "This method has no effect if the file is already closed.\n"); + +static int +iobase_closed(PyObject *self) +{ + PyObject *res; + int closed; + /* This gets the derived attribute, which is *not* __IOBase_closed + in most cases! */ + res = PyObject_GetAttr(self, _PyIO_str_closed); + if (res == NULL) + return 0; + closed = PyObject_IsTrue(res); + Py_DECREF(res); + return closed; +} + +static PyObject * +iobase_closed_get(PyObject *self, void *context) +{ + return PyBool_FromLong(IS_CLOSED(self)); +} + +PyObject * +_PyIOBase_check_closed(PyObject *self, PyObject *args) +{ + if (iobase_closed(self)) { + PyErr_SetString(PyExc_ValueError, "I/O operation on closed file."); + return NULL; + } + if (args == Py_True) + return Py_None; + else + Py_RETURN_NONE; +} + +/* XXX: IOBase thinks it has to maintain its own internal state in + `__IOBase_closed` and call flush() by itself, but it is redundant with + whatever behaviour a non-trivial derived class will implement. */ + +static PyObject * +iobase_close(PyObject *self, PyObject *args) +{ + PyObject *res; + + if (IS_CLOSED(self)) + Py_RETURN_NONE; + + res = PyObject_CallMethodObjArgs(self, _PyIO_str_flush, NULL); + PyObject_SetAttrString(self, "__IOBase_closed", Py_True); + if (res == NULL) { + /* If flush() fails, just give up */ + if (PyErr_ExceptionMatches(PyExc_IOError)) + PyErr_Clear(); + else + return NULL; + } + Py_XDECREF(res); + Py_RETURN_NONE; +} + +/* Finalization and garbage collection support */ + +int +_PyIOBase_finalize(PyObject *self) +{ + PyObject *res; + PyObject *tp, *v, *tb; + int closed = 1; + int is_zombie; + + /* If _PyIOBase_finalize() is called from a destructor, we need to + resurrect the object as calling close() can invoke arbitrary code. */ + is_zombie = (Py_REFCNT(self) == 0); + if (is_zombie) { + ++Py_REFCNT(self); + } + PyErr_Fetch(&tp, &v, &tb); + /* If `closed` doesn't exist or can't be evaluated as bool, then the + object is probably in an unusable state, so ignore. */ + res = PyObject_GetAttr(self, _PyIO_str_closed); + if (res == NULL) + PyErr_Clear(); + else { + closed = PyObject_IsTrue(res); + Py_DECREF(res); + if (closed == -1) + PyErr_Clear(); + } + if (closed == 0) { + res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_close, + NULL); + /* Silencing I/O errors is bad, but printing spurious tracebacks is + equally as bad, and potentially more frequent (because of + shutdown issues). */ + if (res == NULL) + PyErr_Clear(); + else + Py_DECREF(res); + } + PyErr_Restore(tp, v, tb); + if (is_zombie) { + if (--Py_REFCNT(self) != 0) { + /* The object lives again. The following code is taken from + slot_tp_del in typeobject.c. */ + Py_ssize_t refcnt = Py_REFCNT(self); + _Py_NewReference(self); + Py_REFCNT(self) = refcnt; + /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so + * we need to undo that. */ + _Py_DEC_REFTOTAL; + /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object + * chain, so no more to do there. + * If COUNT_ALLOCS, the original decref bumped tp_frees, and + * _Py_NewReference bumped tp_allocs: both of those need to be + * undone. + */ +#ifdef COUNT_ALLOCS + --Py_TYPE(self)->tp_frees; + --Py_TYPE(self)->tp_allocs; +#endif + return -1; + } + } + return 0; +} + +static int +iobase_traverse(iobase *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + return 0; +} + +static int +iobase_clear(iobase *self) +{ + if (_PyIOBase_finalize((PyObject *) self) < 0) + return -1; + Py_CLEAR(self->dict); + return 0; +} + +/* Destructor */ + +static void +iobase_dealloc(iobase *self) +{ + /* NOTE: since IOBaseObject has its own dict, Python-defined attributes + are still available here for close() to use. + However, if the derived class declares a __slots__, those slots are + already gone. + */ + if (_PyIOBase_finalize((PyObject *) self) < 0) { + /* When called from a heap type's dealloc, the type will be + decref'ed on return (see e.g. subtype_dealloc in typeobject.c). */ + if (PyType_HasFeature(Py_TYPE(self), Py_TPFLAGS_HEAPTYPE)) + Py_INCREF(Py_TYPE(self)); + return; + } + _PyObject_GC_UNTRACK(self); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) self); + Py_CLEAR(self->dict); + Py_TYPE(self)->tp_free((PyObject *) self); +} + +/* Inquiry methods */ + +PyDoc_STRVAR(iobase_seekable_doc, + "Return whether object supports random access.\n" + "\n" + "If False, seek(), tell() and truncate() will raise IOError.\n" + "This method may need to do a test seek()."); + +static PyObject * +iobase_seekable(PyObject *self, PyObject *args) +{ + Py_RETURN_FALSE; +} + +PyObject * +_PyIOBase_check_seekable(PyObject *self, PyObject *args) +{ + PyObject *res = PyObject_CallMethodObjArgs(self, _PyIO_str_seekable, NULL); + if (res == NULL) + return NULL; + if (res != Py_True) { + Py_CLEAR(res); + PyErr_SetString(PyExc_IOError, "File or stream is not seekable."); + return NULL; + } + if (args == Py_True) { + Py_DECREF(res); + } + return res; +} + +PyDoc_STRVAR(iobase_readable_doc, + "Return whether object was opened for reading.\n" + "\n" + "If False, read() will raise IOError."); + +static PyObject * +iobase_readable(PyObject *self, PyObject *args) +{ + Py_RETURN_FALSE; +} + +/* May be called with any object */ +PyObject * +_PyIOBase_check_readable(PyObject *self, PyObject *args) +{ + PyObject *res = PyObject_CallMethodObjArgs(self, _PyIO_str_readable, NULL); + if (res == NULL) + return NULL; + if (res != Py_True) { + Py_CLEAR(res); + PyErr_SetString(PyExc_IOError, "File or stream is not readable."); + return NULL; + } + if (args == Py_True) { + Py_DECREF(res); + } + return res; +} + +PyDoc_STRVAR(iobase_writable_doc, + "Return whether object was opened for writing.\n" + "\n" + "If False, read() will raise IOError."); + +static PyObject * +iobase_writable(PyObject *self, PyObject *args) +{ + Py_RETURN_FALSE; +} + +/* May be called with any object */ +PyObject * +_PyIOBase_check_writable(PyObject *self, PyObject *args) +{ + PyObject *res = PyObject_CallMethodObjArgs(self, _PyIO_str_writable, NULL); + if (res == NULL) + return NULL; + if (res != Py_True) { + Py_CLEAR(res); + PyErr_SetString(PyExc_IOError, "File or stream is not writable."); + return NULL; + } + if (args == Py_True) { + Py_DECREF(res); + } + return res; +} + +/* Context manager */ + +static PyObject * +iobase_enter(PyObject *self, PyObject *args) +{ + if (_PyIOBase_check_closed(self, Py_True) == NULL) + return NULL; + + Py_INCREF(self); + return self; +} + +static PyObject * +iobase_exit(PyObject *self, PyObject *args) +{ + return PyObject_CallMethodObjArgs(self, _PyIO_str_close, NULL); +} + +/* Lower-level APIs */ + +/* XXX Should these be present even if unimplemented? */ + +PyDoc_STRVAR(iobase_fileno_doc, + "Returns underlying file descriptor if one exists.\n" + "\n" + "An IOError is raised if the IO object does not use a file descriptor.\n"); + +static PyObject * +iobase_fileno(PyObject *self, PyObject *args) +{ + return iobase_unsupported("fileno"); +} + +PyDoc_STRVAR(iobase_isatty_doc, + "Return whether this is an 'interactive' stream.\n" + "\n" + "Return False if it can't be determined.\n"); + +static PyObject * +iobase_isatty(PyObject *self, PyObject *args) +{ + if (_PyIOBase_check_closed(self, Py_True) == NULL) + return NULL; + Py_RETURN_FALSE; +} + +/* Readline(s) and writelines */ + +PyDoc_STRVAR(iobase_readline_doc, + "Read and return a line from the stream.\n" + "\n" + "If limit is specified, at most limit bytes will be read.\n" + "\n" + "The line terminator is always b'\n' for binary files; for text\n" + "files, the newlines argument to open can be used to select the line\n" + "terminator(s) recognized.\n"); + +static PyObject * +iobase_readline(PyObject *self, PyObject *args) +{ + /* For backwards compatibility, a (slowish) readline(). */ + + Py_ssize_t limit = -1; + int has_peek = 0; + PyObject *buffer, *result; + Py_ssize_t old_size = -1; + + if (!PyArg_ParseTuple(args, "|n:readline", &limit)) { + return NULL; + } + + if (PyObject_HasAttrString(self, "peek")) + has_peek = 1; + + buffer = PyByteArray_FromStringAndSize(NULL, 0); + if (buffer == NULL) + return NULL; + + while (limit < 0 || Py_SIZE(buffer) < limit) { + Py_ssize_t nreadahead = 1; + PyObject *b; + + if (has_peek) { + PyObject *readahead = PyObject_CallMethod(self, "peek", "i", 1); + if (readahead == NULL) + goto fail; + if (!PyBytes_Check(readahead)) { + PyErr_Format(PyExc_IOError, + "peek() should have returned a bytes object, " + "not '%.200s'", Py_TYPE(readahead)->tp_name); + Py_DECREF(readahead); + goto fail; + } + if (PyBytes_GET_SIZE(readahead) > 0) { + Py_ssize_t n = 0; + const char *buf = PyBytes_AS_STRING(readahead); + if (limit >= 0) { + do { + if (n >= PyBytes_GET_SIZE(readahead) || n >= limit) + break; + if (buf[n++] == '\n') + break; + } while (1); + } + else { + do { + if (n >= PyBytes_GET_SIZE(readahead)) + break; + if (buf[n++] == '\n') + break; + } while (1); + } + nreadahead = n; + } + Py_DECREF(readahead); + } + + b = PyObject_CallMethod(self, "read", "n", nreadahead); + if (b == NULL) + goto fail; + if (!PyBytes_Check(b)) { + PyErr_Format(PyExc_IOError, + "read() should have returned a bytes object, " + "not '%.200s'", Py_TYPE(b)->tp_name); + Py_DECREF(b); + goto fail; + } + if (PyBytes_GET_SIZE(b) == 0) { + Py_DECREF(b); + break; + } + + old_size = PyByteArray_GET_SIZE(buffer); + PyByteArray_Resize(buffer, old_size + PyBytes_GET_SIZE(b)); + memcpy(PyByteArray_AS_STRING(buffer) + old_size, + PyBytes_AS_STRING(b), PyBytes_GET_SIZE(b)); + + Py_DECREF(b); + + if (PyByteArray_AS_STRING(buffer)[PyByteArray_GET_SIZE(buffer) - 1] == '\n') + break; + } + + result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(buffer), + PyByteArray_GET_SIZE(buffer)); + Py_DECREF(buffer); + return result; + fail: + Py_DECREF(buffer); + return NULL; +} + +static PyObject * +iobase_iter(PyObject *self) +{ + if (_PyIOBase_check_closed(self, Py_True) == NULL) + return NULL; + + Py_INCREF(self); + return self; +} + +static PyObject * +iobase_iternext(PyObject *self) +{ + PyObject *line = PyObject_CallMethodObjArgs(self, _PyIO_str_readline, NULL); + + if (line == NULL) + return NULL; + + if (PyObject_Size(line) == 0) { + Py_DECREF(line); + return NULL; + } + + return line; +} + +PyDoc_STRVAR(iobase_readlines_doc, + "Return a list of lines from the stream.\n" + "\n" + "hint can be specified to control the number of lines read: no more\n" + "lines will be read if the total size (in bytes/characters) of all\n" + "lines so far exceeds hint."); + +static PyObject * +iobase_readlines(PyObject *self, PyObject *args) +{ + Py_ssize_t hint = -1, length = 0; + PyObject *hintobj = Py_None, *result; + + if (!PyArg_ParseTuple(args, "|O:readlines", &hintobj)) { + return NULL; + } + if (hintobj != Py_None) { + hint = PyNumber_AsSsize_t(hintobj, PyExc_ValueError); + if (hint == -1 && PyErr_Occurred()) + return NULL; + } + + result = PyList_New(0); + if (result == NULL) + return NULL; + + if (hint <= 0) { + /* XXX special-casing this made sense in the Python version in order + to remove the bytecode interpretation overhead, but it could + probably be removed here. */ + PyObject *ret = PyObject_CallMethod(result, "extend", "O", self); + if (ret == NULL) { + Py_DECREF(result); + return NULL; + } + Py_DECREF(ret); + return result; + } + + while (1) { + PyObject *line = PyIter_Next(self); + if (line == NULL) { + if (PyErr_Occurred()) { + Py_DECREF(result); + return NULL; + } + else + break; /* StopIteration raised */ + } + + if (PyList_Append(result, line) < 0) { + Py_DECREF(line); + Py_DECREF(result); + return NULL; + } + length += PyObject_Size(line); + Py_DECREF(line); + + if (length > hint) + break; + } + return result; +} + +static PyObject * +iobase_writelines(PyObject *self, PyObject *args) +{ + PyObject *lines, *iter, *res; + + if (!PyArg_ParseTuple(args, "O:writelines", &lines)) { + return NULL; + } + + if (_PyIOBase_check_closed(self, Py_True) == NULL) + return NULL; + + iter = PyObject_GetIter(lines); + if (iter == NULL) + return NULL; + + while (1) { + PyObject *line = PyIter_Next(iter); + if (line == NULL) { + if (PyErr_Occurred()) { + Py_DECREF(iter); + return NULL; + } + else + break; /* Stop Iteration */ + } + + res = PyObject_CallMethodObjArgs(self, _PyIO_str_write, line, NULL); + Py_DECREF(line); + if (res == NULL) { + Py_DECREF(iter); + return NULL; + } + Py_DECREF(res); + } + Py_DECREF(iter); + Py_RETURN_NONE; +} + +static PyMethodDef iobase_methods[] = { + {"seek", iobase_seek, METH_VARARGS, iobase_seek_doc}, + {"tell", iobase_tell, METH_NOARGS, iobase_tell_doc}, + {"truncate", iobase_truncate, METH_VARARGS, iobase_truncate_doc}, + {"flush", iobase_flush, METH_NOARGS, iobase_flush_doc}, + {"close", iobase_close, METH_NOARGS, iobase_close_doc}, + + {"seekable", iobase_seekable, METH_NOARGS, iobase_seekable_doc}, + {"readable", iobase_readable, METH_NOARGS, iobase_readable_doc}, + {"writable", iobase_writable, METH_NOARGS, iobase_writable_doc}, + + {"_checkClosed", _PyIOBase_check_closed, METH_NOARGS}, + {"_checkSeekable", _PyIOBase_check_seekable, METH_NOARGS}, + {"_checkReadable", _PyIOBase_check_readable, METH_NOARGS}, + {"_checkWritable", _PyIOBase_check_writable, METH_NOARGS}, + + {"fileno", iobase_fileno, METH_NOARGS, iobase_fileno_doc}, + {"isatty", iobase_isatty, METH_NOARGS, iobase_isatty_doc}, + + {"__enter__", iobase_enter, METH_NOARGS}, + {"__exit__", iobase_exit, METH_VARARGS}, + + {"readline", iobase_readline, METH_VARARGS, iobase_readline_doc}, + {"readlines", iobase_readlines, METH_VARARGS, iobase_readlines_doc}, + {"writelines", iobase_writelines, METH_VARARGS}, + + {NULL, NULL} +}; + +static PyGetSetDef iobase_getset[] = { + {"closed", (getter)iobase_closed_get, NULL, NULL}, + {NULL} +}; + + +PyTypeObject PyIOBase_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._IOBase", /*tp_name*/ + sizeof(iobase), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)iobase_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + iobase_doc, /* tp_doc */ + (traverseproc)iobase_traverse, /* tp_traverse */ + (inquiry)iobase_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(iobase, weakreflist), /* tp_weaklistoffset */ + iobase_iter, /* tp_iter */ + iobase_iternext, /* tp_iternext */ + iobase_methods, /* tp_methods */ + 0, /* tp_members */ + iobase_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(iobase, dict), /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* + * RawIOBase class, Inherits from IOBase. + */ +PyDoc_STRVAR(rawiobase_doc, + "Base class for raw binary I/O."); + +/* + * The read() method is implemented by calling readinto(); derived classes + * that want to support read() only need to implement readinto() as a + * primitive operation. In general, readinto() can be more efficient than + * read(). + * + * (It would be tempting to also provide an implementation of readinto() in + * terms of read(), in case the latter is a more suitable primitive operation, + * but that would lead to nasty recursion in case a subclass doesn't implement + * either.) +*/ + +static PyObject * +rawiobase_read(PyObject *self, PyObject *args) +{ + Py_ssize_t n = -1; + PyObject *b, *res; + + if (!PyArg_ParseTuple(args, "|n:read", &n)) { + return NULL; + } + + if (n < 0) + return PyObject_CallMethod(self, "readall", NULL); + + /* TODO: allocate a bytes object directly instead and manually construct + a writable memoryview pointing to it. */ + b = PyByteArray_FromStringAndSize(NULL, n); + if (b == NULL) + return NULL; + + res = PyObject_CallMethodObjArgs(self, _PyIO_str_readinto, b, NULL); + if (res == NULL) { + Py_DECREF(b); + return NULL; + } + + n = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_DECREF(res); + if (n == -1 && PyErr_Occurred()) { + Py_DECREF(b); + return NULL; + } + + res = PyBytes_FromStringAndSize(PyByteArray_AsString(b), n); + Py_DECREF(b); + return res; +} + + +PyDoc_STRVAR(rawiobase_readall_doc, + "Read until EOF, using multiple read() call."); + +static PyObject * +rawiobase_readall(PyObject *self, PyObject *args) +{ + int r; + PyObject *chunks = PyList_New(0); + PyObject *result; + + if (chunks == NULL) + return NULL; + + while (1) { + PyObject *data = PyObject_CallMethod(self, "read", + "i", DEFAULT_BUFFER_SIZE); + if (!data) { + Py_DECREF(chunks); + return NULL; + } + if (!PyBytes_Check(data)) { + Py_DECREF(chunks); + Py_DECREF(data); + PyErr_SetString(PyExc_TypeError, "read() should return bytes"); + return NULL; + } + if (PyBytes_GET_SIZE(data) == 0) { + /* EOF */ + Py_DECREF(data); + break; + } + r = PyList_Append(chunks, data); + Py_DECREF(data); + if (r < 0) { + Py_DECREF(chunks); + return NULL; + } + } + result = _PyBytes_Join(_PyIO_empty_bytes, chunks); + Py_DECREF(chunks); + return result; +} + +static PyMethodDef rawiobase_methods[] = { + {"read", rawiobase_read, METH_VARARGS}, + {"readall", rawiobase_readall, METH_NOARGS, rawiobase_readall_doc}, + {NULL, NULL} +}; + +PyTypeObject PyRawIOBase_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._RawIOBase", /*tp_name*/ + 0, /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + rawiobase_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + rawiobase_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + &PyIOBase_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c new file mode 100644 index 0000000..b602ee8 --- /dev/null +++ b/Modules/_io/stringio.c @@ -0,0 +1,756 @@ +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "_iomodule.h" + +/* Implementation note: the buffer is always at least one character longer + than the enclosed string, for proper functioning of _PyIO_find_line_ending. +*/ + +typedef struct { + PyObject_HEAD + Py_UNICODE *buf; + Py_ssize_t pos; + Py_ssize_t string_size; + size_t buf_size; + + char ok; /* initialized? */ + char closed; + char readuniversal; + char readtranslate; + PyObject *decoder; + PyObject *readnl; + PyObject *writenl; + + PyObject *dict; + PyObject *weakreflist; +} stringio; + +#define CHECK_INITIALIZED(self) \ + if (self->ok <= 0) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + return NULL; \ + } + +#define CHECK_CLOSED(self) \ + if (self->closed) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on closed file"); \ + return NULL; \ + } + +PyDoc_STRVAR(stringio_doc, + "Text I/O implementation using an in-memory buffer.\n" + "\n" + "The initial_value argument sets the value of object. The newline\n" + "argument is like the one of TextIOWrapper's constructor."); + + +/* Internal routine for changing the size, in terms of characters, of the + buffer of StringIO objects. The caller should ensure that the 'size' + argument is non-negative. Returns 0 on success, -1 otherwise. */ +static int +resize_buffer(stringio *self, size_t size) +{ + /* Here, unsigned types are used to avoid dealing with signed integer + overflow, which is undefined in C. */ + size_t alloc = self->buf_size; + Py_UNICODE *new_buf = NULL; + + assert(self->buf != NULL); + + /* Reserve one more char for line ending detection. */ + size = size + 1; + /* For simplicity, stay in the range of the signed type. Anyway, Python + doesn't allow strings to be longer than this. */ + if (size > PY_SSIZE_T_MAX) + goto overflow; + + if (size < alloc / 2) { + /* Major downsize; resize down to exact size. */ + alloc = size + 1; + } + else if (size < alloc) { + /* Within allocated size; quick exit */ + return 0; + } + else if (size <= alloc * 1.125) { + /* Moderate upsize; overallocate similar to list_resize() */ + alloc = size + (size >> 3) + (size < 9 ? 3 : 6); + } + else { + /* Major upsize; resize up to exact size */ + alloc = size + 1; + } + + if (alloc > ((size_t)-1) / sizeof(Py_UNICODE)) + goto overflow; + new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf, + alloc * sizeof(Py_UNICODE)); + if (new_buf == NULL) { + PyErr_NoMemory(); + return -1; + } + self->buf_size = alloc; + self->buf = new_buf; + + return 0; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "new buffer size too large"); + return -1; +} + +/* Internal routine for writing a whole PyUnicode object to the buffer of a + StringIO object. Returns 0 on success, or -1 on error. */ +static Py_ssize_t +write_str(stringio *self, PyObject *obj) +{ + Py_UNICODE *str; + Py_ssize_t len; + PyObject *decoded = NULL; + assert(self->buf != NULL); + assert(self->pos >= 0); + + if (self->decoder != NULL) { + decoded = _PyIncrementalNewlineDecoder_decode( + self->decoder, obj, 1 /* always final */); + } + else { + decoded = obj; + Py_INCREF(decoded); + } + if (self->writenl) { + PyObject *translated = PyUnicode_Replace( + decoded, _PyIO_str_nl, self->writenl, -1); + Py_DECREF(decoded); + decoded = translated; + } + if (decoded == NULL) + return -1; + + assert(PyUnicode_Check(decoded)); + str = PyUnicode_AS_UNICODE(decoded); + len = PyUnicode_GET_SIZE(decoded); + + assert(len >= 0); + + /* This overflow check is not strictly necessary. However, it avoids us to + deal with funky things like comparing an unsigned and a signed + integer. */ + if (self->pos > PY_SSIZE_T_MAX - len) { + PyErr_SetString(PyExc_OverflowError, + "new position too large"); + goto fail; + } + if (self->pos + len > self->string_size) { + if (resize_buffer(self, self->pos + len) < 0) + goto fail; + } + + if (self->pos > self->string_size) { + /* In case of overseek, pad with null bytes the buffer region between + the end of stream and the current position. + + 0 lo string_size hi + | |<---used--->|<----------available----------->| + | | <--to pad-->|<---to write---> | + 0 buf positon + + */ + memset(self->buf + self->string_size, '\0', + (self->pos - self->string_size) * sizeof(Py_UNICODE)); + } + + /* Copy the data to the internal buffer, overwriting some of the + existing data if self->pos < self->string_size. */ + memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE)); + self->pos += len; + + /* Set the new length of the internal string if it has changed. */ + if (self->string_size < self->pos) { + self->string_size = self->pos; + } + + Py_DECREF(decoded); + return 0; + +fail: + Py_XDECREF(decoded); + return -1; +} + +PyDoc_STRVAR(stringio_getvalue_doc, + "Retrieve the entire contents of the object."); + +static PyObject * +stringio_getvalue(stringio *self) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + return PyUnicode_FromUnicode(self->buf, self->string_size); +} + +PyDoc_STRVAR(stringio_tell_doc, + "Tell the current file position."); + +static PyObject * +stringio_tell(stringio *self) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + return PyLong_FromSsize_t(self->pos); +} + +PyDoc_STRVAR(stringio_read_doc, + "Read at most n characters, returned as a string.\n" + "\n" + "If the argument is negative or omitted, read until EOF\n" + "is reached. Return an empty string at EOF.\n"); + +static PyObject * +stringio_read(stringio *self, PyObject *args) +{ + Py_ssize_t size, n; + Py_UNICODE *output; + PyObject *arg = Py_None; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "|O:read", &arg)) + return NULL; + CHECK_CLOSED(self); + + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (size == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg == Py_None) { + /* Read until EOF is reached, by default. */ + size = -1; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + /* adjust invalid sizes */ + n = self->string_size - self->pos; + if (size < 0 || size > n) { + size = n; + if (size < 0) + size = 0; + } + + output = self->buf + self->pos; + self->pos += size; + return PyUnicode_FromUnicode(output, size); +} + +/* Internal helper, used by stringio_readline and stringio_iternext */ +static PyObject * +_stringio_readline(stringio *self, Py_ssize_t limit) +{ + Py_UNICODE *start, *end, old_char; + Py_ssize_t len, consumed; + + /* In case of overseek, return the empty string */ + if (self->pos >= self->string_size) + return PyUnicode_FromString(""); + + start = self->buf + self->pos; + if (limit < 0 || limit > self->string_size - self->pos) + limit = self->string_size - self->pos; + + end = start + limit; + old_char = *end; + *end = '\0'; + len = _PyIO_find_line_ending( + self->readtranslate, self->readuniversal, self->readnl, + start, end, &consumed); + *end = old_char; + /* If we haven't found any line ending, we just return everything + (`consumed` is ignored). */ + if (len < 0) + len = limit; + self->pos += len; + return PyUnicode_FromUnicode(start, len); +} + +PyDoc_STRVAR(stringio_readline_doc, + "Read until newline or EOF.\n" + "\n" + "Returns an empty string if EOF is hit immediately.\n"); + +static PyObject * +stringio_readline(stringio *self, PyObject *args) +{ + PyObject *arg = Py_None; + Py_ssize_t limit = -1; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "|O:readline", &arg)) + return NULL; + CHECK_CLOSED(self); + + if (PyNumber_Check(arg)) { + limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (limit == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg != Py_None) { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + return _stringio_readline(self, limit); +} + +static PyObject * +stringio_iternext(stringio *self) +{ + PyObject *line; + + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + + if (Py_TYPE(self) == &PyStringIO_Type) { + /* Skip method call overhead for speed */ + line = _stringio_readline(self, -1); + } + else { + /* XXX is subclassing StringIO really supported? */ + line = PyObject_CallMethodObjArgs((PyObject *)self, + _PyIO_str_readline, NULL); + if (line && !PyUnicode_Check(line)) { + PyErr_Format(PyExc_IOError, + "readline() should have returned an str object, " + "not '%.200s'", Py_TYPE(line)->tp_name); + Py_DECREF(line); + return NULL; + } + } + + if (line == NULL) + return NULL; + + if (PyUnicode_GET_SIZE(line) == 0) { + /* Reached EOF */ + Py_DECREF(line); + return NULL; + } + + return line; +} + +PyDoc_STRVAR(stringio_truncate_doc, + "Truncate size to pos.\n" + "\n" + "The pos argument defaults to the current file position, as\n" + "returned by tell(). Imply an absolute seek to pos.\n" + "Returns the new absolute position.\n"); + +static PyObject * +stringio_truncate(stringio *self, PyObject *args) +{ + Py_ssize_t size; + PyObject *arg = Py_None; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) + return NULL; + CHECK_CLOSED(self); + + if (PyNumber_Check(arg)) { + size = PyNumber_AsSsize_t(arg, PyExc_OverflowError); + if (size == -1 && PyErr_Occurred()) + return NULL; + } + else if (arg == Py_None) { + /* Truncate to current position if no argument is passed. */ + size = self->pos; + } + else { + PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", + Py_TYPE(arg)->tp_name); + return NULL; + } + + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "Negative size value %zd", size); + return NULL; + } + + if (size < self->string_size) { + if (resize_buffer(self, size) < 0) + return NULL; + self->string_size = size; + } + self->pos = size; + + return PyLong_FromSsize_t(size); +} + +PyDoc_STRVAR(stringio_seek_doc, + "Change stream position.\n" + "\n" + "Seek to character offset pos relative to position indicated by whence:\n" + " 0 Start of stream (the default). pos should be >= 0;\n" + " 1 Current position - pos must be 0;\n" + " 2 End of stream - pos must be 0.\n" + "Returns the new absolute position.\n"); + +static PyObject * +stringio_seek(stringio *self, PyObject *args) +{ + PyObject *posobj; + Py_ssize_t pos; + int mode = 0; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "O|i:seek", &posobj, &mode)) + return NULL; + + pos = PyNumber_AsSsize_t(posobj, PyExc_OverflowError); + if (pos == -1 && PyErr_Occurred()) + return NULL; + + CHECK_CLOSED(self); + + if (mode != 0 && mode != 1 && mode != 2) { + PyErr_Format(PyExc_ValueError, + "Invalid whence (%i, should be 0, 1 or 2)", mode); + return NULL; + } + else if (pos < 0 && mode == 0) { + PyErr_Format(PyExc_ValueError, + "Negative seek position %zd", pos); + return NULL; + } + else if (mode != 0 && pos != 0) { + PyErr_SetString(PyExc_IOError, + "Can't do nonzero cur-relative seeks"); + return NULL; + } + + /* mode 0: offset relative to beginning of the string. + mode 1: no change to current position. + mode 2: change position to end of file. */ + if (mode == 1) { + pos = self->pos; + } + else if (mode == 2) { + pos = self->string_size; + } + + self->pos = pos; + + return PyLong_FromSsize_t(self->pos); +} + +PyDoc_STRVAR(stringio_write_doc, + "Write string to file.\n" + "\n" + "Returns the number of characters written, which is always equal to\n" + "the length of the string.\n"); + +static PyObject * +stringio_write(stringio *self, PyObject *obj) +{ + Py_ssize_t size; + + CHECK_INITIALIZED(self); + if (!PyUnicode_Check(obj)) { + PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'", + Py_TYPE(obj)->tp_name); + return NULL; + } + CHECK_CLOSED(self); + size = PyUnicode_GET_SIZE(obj); + + if (size > 0 && write_str(self, obj) < 0) + return NULL; + + return PyLong_FromSsize_t(size); +} + +PyDoc_STRVAR(stringio_close_doc, + "Close the IO object. Attempting any further operation after the\n" + "object is closed will raise a ValueError.\n" + "\n" + "This method has no effect if the file is already closed.\n"); + +static PyObject * +stringio_close(stringio *self) +{ + self->closed = 1; + /* Free up some memory */ + if (resize_buffer(self, 0) < 0) + return NULL; + Py_CLEAR(self->readnl); + Py_CLEAR(self->writenl); + Py_CLEAR(self->decoder); + Py_RETURN_NONE; +} + +static int +stringio_traverse(stringio *self, visitproc visit, void *arg) +{ + Py_VISIT(self->dict); + return 0; +} + +static int +stringio_clear(stringio *self) +{ + Py_CLEAR(self->dict); + return 0; +} + +static void +stringio_dealloc(stringio *self) +{ + _PyObject_GC_UNTRACK(self); + Py_CLEAR(self->readnl); + Py_CLEAR(self->writenl); + Py_CLEAR(self->decoder); + if (self->buf) + PyMem_Free(self->buf); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) self); + Py_TYPE(self)->tp_free(self); +} + +static PyObject * +stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + stringio *self; + + assert(type != NULL && type->tp_alloc != NULL); + self = (stringio *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + self->string_size = 0; + self->pos = 0; + self->buf_size = 0; + self->buf = (Py_UNICODE *)PyMem_Malloc(0); + if (self->buf == NULL) { + Py_DECREF(self); + return PyErr_NoMemory(); + } + + return (PyObject *)self; +} + +static int +stringio_init(stringio *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"initial_value", "newline", NULL}; + PyObject *value = NULL; + char *newline = "\n"; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oz:__init__", kwlist, + &value, &newline)) + return -1; + + if (newline && newline[0] != '\0' + && !(newline[0] == '\n' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { + PyErr_Format(PyExc_ValueError, + "illegal newline value: %s", newline); + return -1; + } + if (value && value != Py_None && !PyUnicode_Check(value)) { + PyErr_Format(PyExc_ValueError, + "initial_value must be str or None, not %.200s", + Py_TYPE(value)->tp_name); + return -1; + } + + self->ok = 0; + + Py_CLEAR(self->readnl); + Py_CLEAR(self->writenl); + Py_CLEAR(self->decoder); + + if (newline) { + self->readnl = PyString_FromString(newline); + if (self->readnl == NULL) + return -1; + } + self->readuniversal = (newline == NULL || newline[0] == '\0'); + self->readtranslate = (newline == NULL); + /* If newline == "", we don't translate anything. + If newline == "\n" or newline == None, we translate to "\n", which is + a no-op. + (for newline == None, TextIOWrapper translates to os.sepline, but it + is pointless for StringIO) + */ + if (newline != NULL && newline[0] == '\r') { + self->writenl = PyUnicode_FromString(newline); + } + + if (self->readuniversal) { + self->decoder = PyObject_CallFunction( + (PyObject *)&PyIncrementalNewlineDecoder_Type, + "Oi", Py_None, (int) self->readtranslate); + if (self->decoder == NULL) + return -1; + } + + /* Now everything is set up, resize buffer to size of initial value, + and copy it */ + self->string_size = 0; + if (value && value != Py_None) { + Py_ssize_t len = PyUnicode_GetSize(value); + /* This is a heuristic, for newline translation might change + the string length. */ + if (resize_buffer(self, len) < 0) + return -1; + self->pos = 0; + if (write_str(self, value) < 0) + return -1; + } + else { + if (resize_buffer(self, 0) < 0) + return -1; + } + self->pos = 0; + + self->closed = 0; + self->ok = 1; + return 0; +} + +/* Properties and pseudo-properties */ +static PyObject * +stringio_seekable(stringio *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + Py_RETURN_TRUE; +} + +static PyObject * +stringio_readable(stringio *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + Py_RETURN_TRUE; +} + +static PyObject * +stringio_writable(stringio *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + Py_RETURN_TRUE; +} + +static PyObject * +stringio_buffer(stringio *self, void *context) +{ + PyErr_SetString(_PyIO_unsupported_operation, + "buffer attribute is unsupported on type StringIO"); + return NULL; +} + +static PyObject * +stringio_closed(stringio *self, void *context) +{ + CHECK_INITIALIZED(self); + return PyBool_FromLong(self->closed); +} + +static PyObject * +stringio_line_buffering(stringio *self, void *context) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + Py_RETURN_FALSE; +} + +static PyObject * +stringio_newlines(stringio *self, void *context) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + if (self->decoder == NULL) + Py_RETURN_NONE; + return PyObject_GetAttr(self->decoder, _PyIO_str_newlines); +} + +static struct PyMethodDef stringio_methods[] = { + {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc}, + {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, stringio_getvalue_doc}, + {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc}, + {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc}, + {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc}, + {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc}, + {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc}, + {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc}, + + {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS}, + {"readable", (PyCFunction)stringio_readable, METH_NOARGS}, + {"writable", (PyCFunction)stringio_writable, METH_NOARGS}, + {NULL, NULL} /* sentinel */ +}; + +static PyGetSetDef stringio_getset[] = { + {"closed", (getter)stringio_closed, NULL, NULL}, + {"newlines", (getter)stringio_newlines, NULL, NULL}, + /* (following comments straight off of the original Python wrapper:) + XXX Cruft to support the TextIOWrapper API. This would only + be meaningful if StringIO supported the buffer attribute. + Hopefully, a better solution, than adding these pseudo-attributes, + will be found. + */ + {"buffer", (getter)stringio_buffer, NULL, NULL}, + {"line_buffering", (getter)stringio_line_buffering, NULL, NULL}, + {NULL} +}; + +PyTypeObject PyStringIO_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.StringIO", /*tp_name*/ + sizeof(stringio), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)stringio_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + stringio_doc, /*tp_doc*/ + (traverseproc)stringio_traverse, /*tp_traverse*/ + (inquiry)stringio_clear, /*tp_clear*/ + 0, /*tp_richcompare*/ + offsetof(stringio, weakreflist), /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + (iternextfunc)stringio_iternext, /*tp_iternext*/ + stringio_methods, /*tp_methods*/ + 0, /*tp_members*/ + stringio_getset, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + offsetof(stringio, dict), /*tp_dictoffset*/ + (initproc)stringio_init, /*tp_init*/ + 0, /*tp_alloc*/ + stringio_new, /*tp_new*/ +}; diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c new file mode 100644 index 0000000..c129303 --- /dev/null +++ b/Modules/_io/textio.c @@ -0,0 +1,2606 @@ +/* + An implementation of Text I/O as defined by PEP 3116 - "New I/O" + + Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper. + + Written by Amaury Forgeot d'Arc and Antoine Pitrou +*/ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include "_iomodule.h" + +/* TextIOBase */ + +PyDoc_STRVAR(textiobase_doc, + "Base class for text I/O.\n" + "\n" + "This class provides a character and line based interface to stream\n" + "I/O. There is no readinto method because Python's character strings\n" + "are immutable. There is no public constructor.\n" + ); + +static PyObject * +_unsupported(const char *message) +{ + PyErr_SetString(_PyIO_unsupported_operation, message); + return NULL; +} + +PyDoc_STRVAR(textiobase_detach_doc, + "Separate the underlying buffer from the TextIOBase and return it.\n" + "\n" + "After the underlying buffer has been detached, the TextIO is in an\n" + "unusable state.\n" + ); + +static PyObject * +textiobase_detach(PyObject *self) +{ + return _unsupported("detach"); +} + +PyDoc_STRVAR(textiobase_read_doc, + "Read at most n characters from stream.\n" + "\n" + "Read from underlying buffer until we have n characters or we hit EOF.\n" + "If n is negative or omitted, read until EOF.\n" + ); + +static PyObject * +textiobase_read(PyObject *self, PyObject *args) +{ + return _unsupported("read"); +} + +PyDoc_STRVAR(textiobase_readline_doc, + "Read until newline or EOF.\n" + "\n" + "Returns an empty string if EOF is hit immediately.\n" + ); + +static PyObject * +textiobase_readline(PyObject *self, PyObject *args) +{ + return _unsupported("readline"); +} + +PyDoc_STRVAR(textiobase_write_doc, + "Write string to stream.\n" + "Returns the number of characters written (which is always equal to\n" + "the length of the string).\n" + ); + +static PyObject * +textiobase_write(PyObject *self, PyObject *args) +{ + return _unsupported("write"); +} + +PyDoc_STRVAR(textiobase_encoding_doc, + "Encoding of the text stream.\n" + "\n" + "Subclasses should override.\n" + ); + +static PyObject * +textiobase_encoding_get(PyObject *self, void *context) +{ + Py_RETURN_NONE; +} + +PyDoc_STRVAR(textiobase_newlines_doc, + "Line endings translated so far.\n" + "\n" + "Only line endings translated during reading are considered.\n" + "\n" + "Subclasses should override.\n" + ); + +static PyObject * +textiobase_newlines_get(PyObject *self, void *context) +{ + Py_RETURN_NONE; +} + +PyDoc_STRVAR(textiobase_errors_doc, + "The error setting of the decoder or encoder.\n" + "\n" + "Subclasses should override.\n" + ); + +static PyObject * +textiobase_errors_get(PyObject *self, void *context) +{ + Py_RETURN_NONE; +} + + +static PyMethodDef textiobase_methods[] = { + {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc}, + {"read", textiobase_read, METH_VARARGS, textiobase_read_doc}, + {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc}, + {"write", textiobase_write, METH_VARARGS, textiobase_write_doc}, + {NULL, NULL} +}; + +static PyGetSetDef textiobase_getset[] = { + {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc}, + {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc}, + {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc}, + {NULL} +}; + +PyTypeObject PyTextIOBase_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._TextIOBase", /*tp_name*/ + 0, /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + textiobase_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + textiobase_methods, /* tp_methods */ + 0, /* tp_members */ + textiobase_getset, /* tp_getset */ + &PyIOBase_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + + +/* IncrementalNewlineDecoder */ + +PyDoc_STRVAR(incrementalnewlinedecoder_doc, + "Codec used when reading a file in universal newlines mode. It wraps\n" + "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n" + "records the types of newlines encountered. When used with\n" + "translate=False, it ensures that the newline sequence is returned in\n" + "one piece. When used with decoder=None, it expects unicode strings as\n" + "decode input and translates newlines without first invoking an external\n" + "decoder.\n" + ); + +typedef struct { + PyObject_HEAD + PyObject *decoder; + PyObject *errors; + int pendingcr:1; + int translate:1; + unsigned int seennl:3; +} nldecoder_object; + +static int +incrementalnewlinedecoder_init(nldecoder_object *self, + PyObject *args, PyObject *kwds) +{ + PyObject *decoder; + int translate; + PyObject *errors = NULL; + char *kwlist[] = {"decoder", "translate", "errors", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder", + kwlist, &decoder, &translate, &errors)) + return -1; + + self->decoder = decoder; + Py_INCREF(decoder); + + if (errors == NULL) { + self->errors = PyUnicode_FromString("strict"); + if (self->errors == NULL) + return -1; + } + else { + Py_INCREF(errors); + self->errors = errors; + } + + self->translate = translate; + self->seennl = 0; + self->pendingcr = 0; + + return 0; +} + +static void +incrementalnewlinedecoder_dealloc(nldecoder_object *self) +{ + Py_CLEAR(self->decoder); + Py_CLEAR(self->errors); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +#define SEEN_CR 1 +#define SEEN_LF 2 +#define SEEN_CRLF 4 +#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF) + +PyObject * +_PyIncrementalNewlineDecoder_decode(PyObject *_self, + PyObject *input, int final) +{ + PyObject *output; + Py_ssize_t output_len; + nldecoder_object *self = (nldecoder_object *) _self; + + if (self->decoder == NULL) { + PyErr_SetString(PyExc_ValueError, + "IncrementalNewlineDecoder.__init__ not called"); + return NULL; + } + + /* decode input (with the eventual \r from a previous pass) */ + if (self->decoder != Py_None) { + output = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_decode, input, final ? Py_True : Py_False, NULL); + } + else { + output = input; + Py_INCREF(output); + } + + if (output == NULL) + return NULL; + + if (!PyUnicode_Check(output)) { + PyErr_SetString(PyExc_TypeError, + "decoder should return a string result"); + goto error; + } + + output_len = PyUnicode_GET_SIZE(output); + if (self->pendingcr && (final || output_len > 0)) { + Py_UNICODE *out; + PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1); + if (modified == NULL) + goto error; + out = PyUnicode_AS_UNICODE(modified); + out[0] = '\r'; + memcpy(out + 1, PyUnicode_AS_UNICODE(output), + output_len * sizeof(Py_UNICODE)); + Py_DECREF(output); + output = modified; + self->pendingcr = 0; + output_len++; + } + + /* retain last \r even when not translating data: + * then readline() is sure to get \r\n in one pass + */ + if (!final) { + if (output_len > 0 + && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') { + + if (Py_REFCNT(output) == 1) { + if (PyUnicode_Resize(&output, output_len - 1) < 0) + goto error; + } + else { + PyObject *modified = PyUnicode_FromUnicode( + PyUnicode_AS_UNICODE(output), + output_len - 1); + if (modified == NULL) + goto error; + Py_DECREF(output); + output = modified; + } + self->pendingcr = 1; + } + } + + /* Record which newlines are read and do newline translation if desired, + all in one pass. */ + { + Py_UNICODE *in_str; + Py_ssize_t len; + int seennl = self->seennl; + int only_lf = 0; + + in_str = PyUnicode_AS_UNICODE(output); + len = PyUnicode_GET_SIZE(output); + + if (len == 0) + return output; + + /* If, up to now, newlines are consistently \n, do a quick check + for the \r *byte* with the libc's optimized memchr. + */ + if (seennl == SEEN_LF || seennl == 0) { + only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL); + } + + if (only_lf) { + /* If not already seen, quick scan for a possible "\n" character. + (there's nothing else to be done, even when in translation mode) + */ + if (seennl == 0 && + memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) { + Py_UNICODE *s, *end; + s = in_str; + end = in_str + len; + for (;;) { + Py_UNICODE c; + /* Fast loop for non-control characters */ + while (*s > '\n') + s++; + c = *s++; + if (c == '\n') { + seennl |= SEEN_LF; + break; + } + if (s > end) + break; + } + } + /* Finished: we have scanned for newlines, and none of them + need translating */ + } + else if (!self->translate) { + Py_UNICODE *s, *end; + /* We have already seen all newline types, no need to scan again */ + if (seennl == SEEN_ALL) + goto endscan; + s = in_str; + end = in_str + len; + for (;;) { + Py_UNICODE c; + /* Fast loop for non-control characters */ + while (*s > '\r') + s++; + c = *s++; + if (c == '\n') + seennl |= SEEN_LF; + else if (c == '\r') { + if (*s == '\n') { + seennl |= SEEN_CRLF; + s++; + } + else + seennl |= SEEN_CR; + } + if (s > end) + break; + if (seennl == SEEN_ALL) + break; + } + endscan: + ; + } + else { + PyObject *translated = NULL; + Py_UNICODE *out_str; + Py_UNICODE *in, *out, *end; + if (Py_REFCNT(output) != 1) { + /* We could try to optimize this so that we only do a copy + when there is something to translate. On the other hand, + most decoders should only output non-shared strings, i.e. + translation is done in place. */ + translated = PyUnicode_FromUnicode(NULL, len); + if (translated == NULL) + goto error; + assert(Py_REFCNT(translated) == 1); + memcpy(PyUnicode_AS_UNICODE(translated), + PyUnicode_AS_UNICODE(output), + len * sizeof(Py_UNICODE)); + } + else { + translated = output; + } + out_str = PyUnicode_AS_UNICODE(translated); + in = in_str; + out = out_str; + end = in_str + len; + for (;;) { + Py_UNICODE c; + /* Fast loop for non-control characters */ + while ((c = *in++) > '\r') + *out++ = c; + if (c == '\n') { + *out++ = c; + seennl |= SEEN_LF; + continue; + } + if (c == '\r') { + if (*in == '\n') { + in++; + seennl |= SEEN_CRLF; + } + else + seennl |= SEEN_CR; + *out++ = '\n'; + continue; + } + if (in > end) + break; + *out++ = c; + } + if (translated != output) { + Py_DECREF(output); + output = translated; + } + if (out - out_str != len) { + if (PyUnicode_Resize(&output, out - out_str) < 0) + goto error; + } + } + self->seennl |= seennl; + } + + return output; + + error: + Py_DECREF(output); + return NULL; +} + +static PyObject * +incrementalnewlinedecoder_decode(nldecoder_object *self, + PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"input", "final", NULL}; + PyObject *input; + int final = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder", + kwlist, &input, &final)) + return NULL; + return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final); +} + +static PyObject * +incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args) +{ + PyObject *buffer; + unsigned PY_LONG_LONG flag; + + if (self->decoder != Py_None) { + PyObject *state = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_getstate, NULL); + if (state == NULL) + return NULL; + if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) { + Py_DECREF(state); + return NULL; + } + Py_INCREF(buffer); + Py_DECREF(state); + } + else { + buffer = PyBytes_FromString(""); + flag = 0; + } + flag <<= 1; + if (self->pendingcr) + flag |= 1; + return Py_BuildValue("NK", buffer, flag); +} + +static PyObject * +incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state) +{ + PyObject *buffer; + unsigned PY_LONG_LONG flag; + + if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) + return NULL; + + self->pendingcr = (int) flag & 1; + flag >>= 1; + + if (self->decoder != Py_None) + return PyObject_CallMethod(self->decoder, + "setstate", "((OK))", buffer, flag); + else + Py_RETURN_NONE; +} + +static PyObject * +incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args) +{ + self->seennl = 0; + self->pendingcr = 0; + if (self->decoder != Py_None) + return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL); + else + Py_RETURN_NONE; +} + +static PyObject * +incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context) +{ + switch (self->seennl) { + case SEEN_CR: + return PyUnicode_FromString("\r"); + case SEEN_LF: + return PyUnicode_FromString("\n"); + case SEEN_CRLF: + return PyUnicode_FromString("\r\n"); + case SEEN_CR | SEEN_LF: + return Py_BuildValue("ss", "\r", "\n"); + case SEEN_CR | SEEN_CRLF: + return Py_BuildValue("ss", "\r", "\r\n"); + case SEEN_LF | SEEN_CRLF: + return Py_BuildValue("ss", "\n", "\r\n"); + case SEEN_CR | SEEN_LF | SEEN_CRLF: + return Py_BuildValue("sss", "\r", "\n", "\r\n"); + default: + Py_RETURN_NONE; + } + +} + + +static PyMethodDef incrementalnewlinedecoder_methods[] = { + {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS}, + {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS}, + {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O}, + {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS}, + {NULL} +}; + +static PyGetSetDef incrementalnewlinedecoder_getset[] = { + {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL}, + {NULL} +}; + +PyTypeObject PyIncrementalNewlineDecoder_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.IncrementalNewlineDecoder", /*tp_name*/ + sizeof(nldecoder_object), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare */ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + incrementalnewlinedecoder_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /*tp_weaklistoffset*/ + 0, /* tp_iter */ + 0, /* tp_iternext */ + incrementalnewlinedecoder_methods, /* tp_methods */ + 0, /* tp_members */ + incrementalnewlinedecoder_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)incrementalnewlinedecoder_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* TextIOWrapper */ + +PyDoc_STRVAR(textiowrapper_doc, + "Character and line based layer over a BufferedIOBase object, buffer.\n" + "\n" + "encoding gives the name of the encoding that the stream will be\n" + "decoded or encoded with. It defaults to locale.getpreferredencoding.\n" + "\n" + "errors determines the strictness of encoding and decoding (see the\n" + "codecs.register) and defaults to \"strict\".\n" + "\n" + "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n" + "handling of line endings. If it is None, universal newlines is\n" + "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n" + "or '\\r\\n' are translated to '\\n' before being returned to the\n" + "caller. Conversely, on output, '\\n' is translated to the system\n" + "default line seperator, os.linesep. If newline is any other of its\n" + "legal values, that newline becomes the newline when the file is read\n" + "and it is returned untranslated. On output, '\\n' is converted to the\n" + "newline.\n" + "\n" + "If line_buffering is True, a call to flush is implied when a call to\n" + "write contains a newline character." + ); + +typedef PyObject * + (*encodefunc_t)(PyObject *, PyObject *); + +typedef struct +{ + PyObject_HEAD + int ok; /* initialized? */ + int detached; + Py_ssize_t chunk_size; + PyObject *buffer; + PyObject *encoding; + PyObject *encoder; + PyObject *decoder; + PyObject *readnl; + PyObject *errors; + const char *writenl; /* utf-8 encoded, NULL stands for \n */ + char line_buffering; + char readuniversal; + char readtranslate; + char writetranslate; + char seekable; + char telling; + /* Specialized encoding func (see below) */ + encodefunc_t encodefunc; + /* Whether or not it's the start of the stream */ + char encoding_start_of_stream; + + /* Reads and writes are internally buffered in order to speed things up. + However, any read will first flush the write buffer if itsn't empty. + + Please also note that text to be written is first encoded before being + buffered. This is necessary so that encoding errors are immediately + reported to the caller, but it unfortunately means that the + IncrementalEncoder (whose encode() method is always written in Python) + becomes a bottleneck for small writes. + */ + PyObject *decoded_chars; /* buffer for text returned from decoder */ + Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */ + PyObject *pending_bytes; /* list of bytes objects waiting to be + written, or NULL */ + Py_ssize_t pending_bytes_count; + PyObject *snapshot; + /* snapshot is either None, or a tuple (dec_flags, next_input) where + * dec_flags is the second (integer) item of the decoder state and + * next_input is the chunk of input bytes that comes next after the + * snapshot point. We use this to reconstruct decoder states in tell(). + */ + + /* Cache raw object if it's a FileIO object */ + PyObject *raw; + + PyObject *weakreflist; + PyObject *dict; +} textio; + + +/* A couple of specialized cases in order to bypass the slow incremental + encoding methods for the most popular encodings. */ + +static PyObject * +ascii_encode(textio *self, PyObject *text) +{ + return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors)); +} + +static PyObject * +utf16be_encode(textio *self, PyObject *text) +{ + return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 1); +} + +static PyObject * +utf16le_encode(textio *self, PyObject *text) +{ + return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), -1); +} + +static PyObject * +utf16_encode(textio *self, PyObject *text) +{ + if (!self->encoding_start_of_stream) { + /* Skip the BOM and use native byte ordering */ +#if defined(WORDS_BIGENDIAN) + return utf16be_encode(self, text); +#else + return utf16le_encode(self, text); +#endif + } + return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 0); +} + +static PyObject * +utf32be_encode(textio *self, PyObject *text) +{ + return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 1); +} + +static PyObject * +utf32le_encode(textio *self, PyObject *text) +{ + return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), -1); +} + +static PyObject * +utf32_encode(textio *self, PyObject *text) +{ + if (!self->encoding_start_of_stream) { + /* Skip the BOM and use native byte ordering */ +#if defined(WORDS_BIGENDIAN) + return utf32be_encode(self, text); +#else + return utf32le_encode(self, text); +#endif + } + return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 0); +} + +static PyObject * +utf8_encode(textio *self, PyObject *text) +{ + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors)); +} + +static PyObject * +latin1_encode(textio *self, PyObject *text) +{ + return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors)); +} + +/* Map normalized encoding names onto the specialized encoding funcs */ + +typedef struct { + const char *name; + encodefunc_t encodefunc; +} encodefuncentry; + +static encodefuncentry encodefuncs[] = { + {"ascii", (encodefunc_t) ascii_encode}, + {"iso8859-1", (encodefunc_t) latin1_encode}, + {"utf-8", (encodefunc_t) utf8_encode}, + {"utf-16-be", (encodefunc_t) utf16be_encode}, + {"utf-16-le", (encodefunc_t) utf16le_encode}, + {"utf-16", (encodefunc_t) utf16_encode}, + {"utf-32-be", (encodefunc_t) utf32be_encode}, + {"utf-32-le", (encodefunc_t) utf32le_encode}, + {"utf-32", (encodefunc_t) utf32_encode}, + {NULL, NULL} +}; + + +static int +textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) +{ + char *kwlist[] = {"buffer", "encoding", "errors", + "newline", "line_buffering", + NULL}; + PyObject *buffer, *raw; + char *encoding = NULL; + char *errors = NULL; + char *newline = NULL; + int line_buffering = 0; + + PyObject *res; + int r; + + self->ok = 0; + self->detached = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio", + kwlist, &buffer, &encoding, &errors, + &newline, &line_buffering)) + return -1; + + if (newline && newline[0] != '\0' + && !(newline[0] == '\n' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { + PyErr_Format(PyExc_ValueError, + "illegal newline value: %s", newline); + return -1; + } + + Py_CLEAR(self->buffer); + Py_CLEAR(self->encoding); + Py_CLEAR(self->encoder); + Py_CLEAR(self->decoder); + Py_CLEAR(self->readnl); + Py_CLEAR(self->decoded_chars); + Py_CLEAR(self->pending_bytes); + Py_CLEAR(self->snapshot); + Py_CLEAR(self->errors); + Py_CLEAR(self->raw); + self->decoded_chars_used = 0; + self->pending_bytes_count = 0; + self->encodefunc = NULL; + self->writenl = NULL; + + if (encoding == NULL && self->encoding == NULL) { + if (_PyIO_locale_module == NULL) { + _PyIO_locale_module = PyImport_ImportModule("locale"); + if (_PyIO_locale_module == NULL) + goto catch_ImportError; + else + goto use_locale; + } + else { + use_locale: + self->encoding = PyObject_CallMethod( + _PyIO_locale_module, "getpreferredencoding", NULL); + if (self->encoding == NULL) { + catch_ImportError: + /* + Importing locale can raise a ImportError because of + _functools, and locale.getpreferredencoding can raise a + ImportError if _locale is not available. These will happen + during module building. + */ + if (PyErr_ExceptionMatches(PyExc_ImportError)) { + PyErr_Clear(); + self->encoding = PyString_FromString("ascii"); + } + else + goto error; + } + else if (!PyString_Check(self->encoding)) + Py_CLEAR(self->encoding); + } + } + if (self->encoding != NULL) + encoding = PyString_AsString(self->encoding); + else if (encoding != NULL) { + self->encoding = PyString_FromString(encoding); + if (self->encoding == NULL) + goto error; + } + else { + PyErr_SetString(PyExc_IOError, + "could not determine default encoding"); + } + + if (errors == NULL) + errors = "strict"; + self->errors = PyBytes_FromString(errors); + if (self->errors == NULL) + goto error; + + self->chunk_size = 8192; + self->readuniversal = (newline == NULL || newline[0] == '\0'); + self->line_buffering = line_buffering; + self->readtranslate = (newline == NULL); + if (newline) { + self->readnl = PyString_FromString(newline); + if (self->readnl == NULL) + return -1; + } + self->writetranslate = (newline == NULL || newline[0] != '\0'); + if (!self->readuniversal && self->writetranslate) { + self->writenl = PyString_AsString(self->readnl); + if (!strcmp(self->writenl, "\n")) + self->writenl = NULL; + } +#ifdef MS_WINDOWS + else + self->writenl = "\r\n"; +#endif + + /* Build the decoder object */ + res = PyObject_CallMethod(buffer, "readable", NULL); + if (res == NULL) + goto error; + r = PyObject_IsTrue(res); + Py_DECREF(res); + if (r == -1) + goto error; + if (r == 1) { + self->decoder = PyCodec_IncrementalDecoder( + encoding, errors); + if (self->decoder == NULL) + goto error; + + if (self->readuniversal) { + PyObject *incrementalDecoder = PyObject_CallFunction( + (PyObject *)&PyIncrementalNewlineDecoder_Type, + "Oi", self->decoder, (int)self->readtranslate); + if (incrementalDecoder == NULL) + goto error; + Py_CLEAR(self->decoder); + self->decoder = incrementalDecoder; + } + } + + /* Build the encoder object */ + res = PyObject_CallMethod(buffer, "writable", NULL); + if (res == NULL) + goto error; + r = PyObject_IsTrue(res); + Py_DECREF(res); + if (r == -1) + goto error; + if (r == 1) { + PyObject *ci; + self->encoder = PyCodec_IncrementalEncoder( + encoding, errors); + if (self->encoder == NULL) + goto error; + /* Get the normalized named of the codec */ + ci = _PyCodec_Lookup(encoding); + if (ci == NULL) + goto error; + res = PyObject_GetAttrString(ci, "name"); + Py_DECREF(ci); + if (res == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + goto error; + } + else if (PyString_Check(res)) { + encodefuncentry *e = encodefuncs; + while (e->name != NULL) { + if (!strcmp(PyString_AS_STRING(res), e->name)) { + self->encodefunc = e->encodefunc; + break; + } + e++; + } + } + Py_XDECREF(res); + } + + self->buffer = buffer; + Py_INCREF(buffer); + + if (Py_TYPE(buffer) == &PyBufferedReader_Type || + Py_TYPE(buffer) == &PyBufferedWriter_Type || + Py_TYPE(buffer) == &PyBufferedRandom_Type) { + raw = PyObject_GetAttrString(buffer, "raw"); + /* Cache the raw FileIO object to speed up 'closed' checks */ + if (raw == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + goto error; + } + else if (Py_TYPE(raw) == &PyFileIO_Type) + self->raw = raw; + else + Py_DECREF(raw); + } + + res = PyObject_CallMethod(buffer, "seekable", NULL); + if (res == NULL) + goto error; + self->seekable = self->telling = PyObject_IsTrue(res); + Py_DECREF(res); + + self->encoding_start_of_stream = 0; + if (self->seekable && self->encoder) { + PyObject *cookieObj; + int cmp; + + self->encoding_start_of_stream = 1; + + cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL); + if (cookieObj == NULL) + goto error; + + cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); + Py_DECREF(cookieObj); + if (cmp < 0) { + goto error; + } + + if (cmp == 0) { + self->encoding_start_of_stream = 0; + res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate, + _PyIO_zero, NULL); + if (res == NULL) + goto error; + Py_DECREF(res); + } + } + + self->ok = 1; + return 0; + + error: + return -1; +} + +static int +_textiowrapper_clear(textio *self) +{ + if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0) + return -1; + self->ok = 0; + Py_CLEAR(self->buffer); + Py_CLEAR(self->encoding); + Py_CLEAR(self->encoder); + Py_CLEAR(self->decoder); + Py_CLEAR(self->readnl); + Py_CLEAR(self->decoded_chars); + Py_CLEAR(self->pending_bytes); + Py_CLEAR(self->snapshot); + Py_CLEAR(self->errors); + Py_CLEAR(self->raw); + return 0; +} + +static void +textiowrapper_dealloc(textio *self) +{ + if (_textiowrapper_clear(self) < 0) + return; + _PyObject_GC_UNTRACK(self); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *)self); + Py_CLEAR(self->dict); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int +textiowrapper_traverse(textio *self, visitproc visit, void *arg) +{ + Py_VISIT(self->buffer); + Py_VISIT(self->encoding); + Py_VISIT(self->encoder); + Py_VISIT(self->decoder); + Py_VISIT(self->readnl); + Py_VISIT(self->decoded_chars); + Py_VISIT(self->pending_bytes); + Py_VISIT(self->snapshot); + Py_VISIT(self->errors); + Py_VISIT(self->raw); + + Py_VISIT(self->dict); + return 0; +} + +static int +textiowrapper_clear(textio *self) +{ + if (_textiowrapper_clear(self) < 0) + return -1; + Py_CLEAR(self->dict); + return 0; +} + +static PyObject * +textiowrapper_closed_get(textio *self, void *context); + +/* This macro takes some shortcuts to make the common case faster. */ +#define CHECK_CLOSED(self) \ + do { \ + int r; \ + PyObject *_res; \ + if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \ + if (self->raw != NULL) \ + r = _PyFileIO_closed(self->raw); \ + else { \ + _res = textiowrapper_closed_get(self, NULL); \ + if (_res == NULL) \ + return NULL; \ + r = PyObject_IsTrue(_res); \ + Py_DECREF(_res); \ + if (r < 0) \ + return NULL; \ + } \ + if (r > 0) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on closed file."); \ + return NULL; \ + } \ + } \ + else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \ + return NULL; \ + } while (0) + +#define CHECK_INITIALIZED(self) \ + if (self->ok <= 0) { \ + if (self->detached) { \ + PyErr_SetString(PyExc_ValueError, \ + "underlying buffer has been detached"); \ + } else { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + } \ + return NULL; \ + } + +#define CHECK_INITIALIZED_INT(self) \ + if (self->ok <= 0) { \ + if (self->detached) { \ + PyErr_SetString(PyExc_ValueError, \ + "underlying buffer has been detached"); \ + } else { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + } \ + return -1; \ + } + + +static PyObject * +textiowrapper_detach(textio *self) +{ + PyObject *buffer, *res; + CHECK_INITIALIZED(self); + res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); + if (res == NULL) + return NULL; + Py_DECREF(res); + buffer = self->buffer; + self->buffer = NULL; + self->detached = 1; + self->ok = 0; + return buffer; +} + +Py_LOCAL_INLINE(const Py_UNICODE *) +findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch) +{ + /* like wcschr, but doesn't stop at NULL characters */ + while (size-- > 0) { + if (*s == ch) + return s; + s++; + } + return NULL; +} + +/* Flush the internal write buffer. This doesn't explicitly flush the + underlying buffered object, though. */ +static int +_textiowrapper_writeflush(textio *self) +{ + PyObject *b, *ret; + + if (self->pending_bytes == NULL) + return 0; + b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes); + if (b == NULL) + return -1; + ret = PyObject_CallMethodObjArgs(self->buffer, + _PyIO_str_write, b, NULL); + Py_DECREF(b); + if (ret == NULL) + return -1; + Py_DECREF(ret); + Py_CLEAR(self->pending_bytes); + self->pending_bytes_count = 0; + return 0; +} + +static PyObject * +textiowrapper_write(textio *self, PyObject *args) +{ + PyObject *ret; + PyObject *text; /* owned reference */ + PyObject *b; + Py_ssize_t textlen; + int haslf = 0; + int needflush = 0; + + CHECK_INITIALIZED(self); + + if (!PyArg_ParseTuple(args, "U:write", &text)) { + return NULL; + } + + CHECK_CLOSED(self); + + if (self->encoder == NULL) { + PyErr_SetString(PyExc_IOError, "not writable"); + return NULL; + } + + Py_INCREF(text); + + textlen = PyUnicode_GetSize(text); + + if ((self->writetranslate && self->writenl != NULL) || self->line_buffering) + if (findchar(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), '\n')) + haslf = 1; + + if (haslf && self->writetranslate && self->writenl != NULL) { + PyObject *newtext = PyObject_CallMethod( + text, "replace", "ss", "\n", self->writenl); + Py_DECREF(text); + if (newtext == NULL) + return NULL; + text = newtext; + } + + if (self->line_buffering && + (haslf || + findchar(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), '\r'))) + needflush = 1; + + /* XXX What if we were just reading? */ + if (self->encodefunc != NULL) { + b = (*self->encodefunc)((PyObject *) self, text); + self->encoding_start_of_stream = 0; + } + else + b = PyObject_CallMethodObjArgs(self->encoder, + _PyIO_str_encode, text, NULL); + Py_DECREF(text); + if (b == NULL) + return NULL; + + if (self->pending_bytes == NULL) { + self->pending_bytes = PyList_New(0); + if (self->pending_bytes == NULL) { + Py_DECREF(b); + return NULL; + } + self->pending_bytes_count = 0; + } + if (PyList_Append(self->pending_bytes, b) < 0) { + Py_DECREF(b); + return NULL; + } + self->pending_bytes_count += PyBytes_GET_SIZE(b); + Py_DECREF(b); + if (self->pending_bytes_count > self->chunk_size || needflush) { + if (_textiowrapper_writeflush(self) < 0) + return NULL; + } + + if (needflush) { + ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL); + if (ret == NULL) + return NULL; + Py_DECREF(ret); + } + + Py_CLEAR(self->snapshot); + + if (self->decoder) { + ret = PyObject_CallMethod(self->decoder, "reset", NULL); + if (ret == NULL) + return NULL; + Py_DECREF(ret); + } + + return PyLong_FromSsize_t(textlen); +} + +/* Steal a reference to chars and store it in the decoded_char buffer; + */ +static void +textiowrapper_set_decoded_chars(textio *self, PyObject *chars) +{ + Py_CLEAR(self->decoded_chars); + self->decoded_chars = chars; + self->decoded_chars_used = 0; +} + +static PyObject * +textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) +{ + PyObject *chars; + Py_ssize_t avail; + + if (self->decoded_chars == NULL) + return PyUnicode_FromStringAndSize(NULL, 0); + + avail = (PyUnicode_GET_SIZE(self->decoded_chars) + - self->decoded_chars_used); + + assert(avail >= 0); + + if (n < 0 || n > avail) + n = avail; + + if (self->decoded_chars_used > 0 || n < avail) { + chars = PyUnicode_FromUnicode( + PyUnicode_AS_UNICODE(self->decoded_chars) + + self->decoded_chars_used, n); + if (chars == NULL) + return NULL; + } + else { + chars = self->decoded_chars; + Py_INCREF(chars); + } + + self->decoded_chars_used += n; + return chars; +} + +/* Read and decode the next chunk of data from the BufferedReader. + */ +static int +textiowrapper_read_chunk(textio *self) +{ + PyObject *dec_buffer = NULL; + PyObject *dec_flags = NULL; + PyObject *input_chunk = NULL; + PyObject *decoded_chars, *chunk_size; + int eof; + + /* The return value is True unless EOF was reached. The decoded string is + * placed in self._decoded_chars (replacing its previous value). The + * entire input chunk is sent to the decoder, though some of it may remain + * buffered in the decoder, yet to be converted. + */ + + if (self->decoder == NULL) { + PyErr_SetString(PyExc_IOError, "not readable"); + return -1; + } + + if (self->telling) { + /* To prepare for tell(), we need to snapshot a point in the file + * where the decoder's input buffer is empty. + */ + + PyObject *state = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_getstate, NULL); + if (state == NULL) + return -1; + /* Given this, we know there was a valid snapshot point + * len(dec_buffer) bytes ago with decoder state (b'', dec_flags). + */ + if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) { + Py_DECREF(state); + return -1; + } + Py_INCREF(dec_buffer); + Py_INCREF(dec_flags); + Py_DECREF(state); + } + + /* Read a chunk, decode it, and put the result in self._decoded_chars. */ + chunk_size = PyLong_FromSsize_t(self->chunk_size); + if (chunk_size == NULL) + goto fail; + input_chunk = PyObject_CallMethodObjArgs(self->buffer, + _PyIO_str_read1, chunk_size, NULL); + Py_DECREF(chunk_size); + if (input_chunk == NULL) + goto fail; + assert(PyBytes_Check(input_chunk)); + + eof = (PyBytes_Size(input_chunk) == 0); + + if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { + decoded_chars = _PyIncrementalNewlineDecoder_decode( + self->decoder, input_chunk, eof); + } + else { + decoded_chars = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL); + } + + /* TODO sanity check: isinstance(decoded_chars, unicode) */ + if (decoded_chars == NULL) + goto fail; + textiowrapper_set_decoded_chars(self, decoded_chars); + if (PyUnicode_GET_SIZE(decoded_chars) > 0) + eof = 0; + + if (self->telling) { + /* At the snapshot point, len(dec_buffer) bytes before the read, the + * next input to be decoded is dec_buffer + input_chunk. + */ + PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk); + if (next_input == NULL) + goto fail; + assert (PyBytes_Check(next_input)); + Py_DECREF(dec_buffer); + Py_CLEAR(self->snapshot); + self->snapshot = Py_BuildValue("NN", dec_flags, next_input); + } + Py_DECREF(input_chunk); + + return (eof == 0); + + fail: + Py_XDECREF(dec_buffer); + Py_XDECREF(dec_flags); + Py_XDECREF(input_chunk); + return -1; +} + +static PyObject * +textiowrapper_read(textio *self, PyObject *args) +{ + Py_ssize_t n = -1; + PyObject *result = NULL, *chunks = NULL; + + CHECK_INITIALIZED(self); + + if (!PyArg_ParseTuple(args, "|n:read", &n)) + return NULL; + + CHECK_CLOSED(self); + + if (self->decoder == NULL) { + PyErr_SetString(PyExc_IOError, "not readable"); + return NULL; + } + + if (_textiowrapper_writeflush(self) < 0) + return NULL; + + if (n < 0) { + /* Read everything */ + PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL); + PyObject *decoded, *final; + if (bytes == NULL) + goto fail; + decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode, + bytes, Py_True, NULL); + Py_DECREF(bytes); + if (decoded == NULL) + goto fail; + + result = textiowrapper_get_decoded_chars(self, -1); + + if (result == NULL) { + Py_DECREF(decoded); + return NULL; + } + + final = PyUnicode_Concat(result, decoded); + Py_DECREF(result); + Py_DECREF(decoded); + if (final == NULL) + goto fail; + + Py_CLEAR(self->snapshot); + return final; + } + else { + int res = 1; + Py_ssize_t remaining = n; + + result = textiowrapper_get_decoded_chars(self, n); + if (result == NULL) + goto fail; + remaining -= PyUnicode_GET_SIZE(result); + + /* Keep reading chunks until we have n characters to return */ + while (remaining > 0) { + res = textiowrapper_read_chunk(self); + if (res < 0) + goto fail; + if (res == 0) /* EOF */ + break; + if (chunks == NULL) { + chunks = PyList_New(0); + if (chunks == NULL) + goto fail; + } + if (PyList_Append(chunks, result) < 0) + goto fail; + Py_DECREF(result); + result = textiowrapper_get_decoded_chars(self, remaining); + if (result == NULL) + goto fail; + remaining -= PyUnicode_GET_SIZE(result); + } + if (chunks != NULL) { + if (result != NULL && PyList_Append(chunks, result) < 0) + goto fail; + Py_CLEAR(result); + result = PyUnicode_Join(_PyIO_empty_str, chunks); + if (result == NULL) + goto fail; + Py_CLEAR(chunks); + } + return result; + } + fail: + Py_XDECREF(result); + Py_XDECREF(chunks); + return NULL; +} + + +/* NOTE: `end` must point to the real end of the Py_UNICODE storage, + that is to the NUL character. Otherwise the function will produce + incorrect results. */ +static Py_UNICODE * +find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch) +{ + Py_UNICODE *s = start; + for (;;) { + while (*s > ch) + s++; + if (*s == ch) + return s; + if (s == end) + return NULL; + s++; + } +} + +Py_ssize_t +_PyIO_find_line_ending( + int translated, int universal, PyObject *readnl, + Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed) +{ + Py_ssize_t len = end - start; + + if (translated) { + /* Newlines are already translated, only search for \n */ + Py_UNICODE *pos = find_control_char(start, end, '\n'); + if (pos != NULL) + return pos - start + 1; + else { + *consumed = len; + return -1; + } + } + else if (universal) { + /* Universal newline search. Find any of \r, \r\n, \n + * The decoder ensures that \r\n are not split in two pieces + */ + Py_UNICODE *s = start; + for (;;) { + Py_UNICODE ch; + /* Fast path for non-control chars. The loop always ends + since the Py_UNICODE storage is NUL-terminated. */ + while (*s > '\r') + s++; + if (s >= end) { + *consumed = len; + return -1; + } + ch = *s++; + if (ch == '\n') + return s - start; + if (ch == '\r') { + if (*s == '\n') + return s - start + 1; + else + return s - start; + } + } + } + else { + /* Non-universal mode. */ + Py_ssize_t readnl_len = PyString_GET_SIZE(readnl); + unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl); + if (readnl_len == 1) { + Py_UNICODE *pos = find_control_char(start, end, nl[0]); + if (pos != NULL) + return pos - start + 1; + *consumed = len; + return -1; + } + else { + Py_UNICODE *s = start; + Py_UNICODE *e = end - readnl_len + 1; + Py_UNICODE *pos; + if (e < s) + e = s; + while (s < e) { + Py_ssize_t i; + Py_UNICODE *pos = find_control_char(s, end, nl[0]); + if (pos == NULL || pos >= e) + break; + for (i = 1; i < readnl_len; i++) { + if (pos[i] != nl[i]) + break; + } + if (i == readnl_len) + return pos - start + readnl_len; + s = pos + 1; + } + pos = find_control_char(e, end, nl[0]); + if (pos == NULL) + *consumed = len; + else + *consumed = pos - start; + return -1; + } + } +} + +static PyObject * +_textiowrapper_readline(textio *self, Py_ssize_t limit) +{ + PyObject *line = NULL, *chunks = NULL, *remaining = NULL; + Py_ssize_t start, endpos, chunked, offset_to_buffer; + int res; + + CHECK_CLOSED(self); + + if (_textiowrapper_writeflush(self) < 0) + return NULL; + + chunked = 0; + + while (1) { + Py_UNICODE *ptr; + Py_ssize_t line_len; + Py_ssize_t consumed = 0; + + /* First, get some data if necessary */ + res = 1; + while (!self->decoded_chars || + !PyUnicode_GET_SIZE(self->decoded_chars)) { + res = textiowrapper_read_chunk(self); + if (res < 0) + goto error; + if (res == 0) + break; + } + if (res == 0) { + /* end of file */ + textiowrapper_set_decoded_chars(self, NULL); + Py_CLEAR(self->snapshot); + start = endpos = offset_to_buffer = 0; + break; + } + + if (remaining == NULL) { + line = self->decoded_chars; + start = self->decoded_chars_used; + offset_to_buffer = 0; + Py_INCREF(line); + } + else { + assert(self->decoded_chars_used == 0); + line = PyUnicode_Concat(remaining, self->decoded_chars); + start = 0; + offset_to_buffer = PyUnicode_GET_SIZE(remaining); + Py_CLEAR(remaining); + if (line == NULL) + goto error; + } + + ptr = PyUnicode_AS_UNICODE(line); + line_len = PyUnicode_GET_SIZE(line); + + endpos = _PyIO_find_line_ending( + self->readtranslate, self->readuniversal, self->readnl, + ptr + start, ptr + line_len, &consumed); + if (endpos >= 0) { + endpos += start; + if (limit >= 0 && (endpos - start) + chunked >= limit) + endpos = start + limit - chunked; + break; + } + + /* We can put aside up to `endpos` */ + endpos = consumed + start; + if (limit >= 0 && (endpos - start) + chunked >= limit) { + /* Didn't find line ending, but reached length limit */ + endpos = start + limit - chunked; + break; + } + + if (endpos > start) { + /* No line ending seen yet - put aside current data */ + PyObject *s; + if (chunks == NULL) { + chunks = PyList_New(0); + if (chunks == NULL) + goto error; + } + s = PyUnicode_FromUnicode(ptr + start, endpos - start); + if (s == NULL) + goto error; + if (PyList_Append(chunks, s) < 0) { + Py_DECREF(s); + goto error; + } + chunked += PyUnicode_GET_SIZE(s); + Py_DECREF(s); + } + /* There may be some remaining bytes we'll have to prepend to the + next chunk of data */ + if (endpos < line_len) { + remaining = PyUnicode_FromUnicode( + ptr + endpos, line_len - endpos); + if (remaining == NULL) + goto error; + } + Py_CLEAR(line); + /* We have consumed the buffer */ + textiowrapper_set_decoded_chars(self, NULL); + } + + if (line != NULL) { + /* Our line ends in the current buffer */ + self->decoded_chars_used = endpos - offset_to_buffer; + if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) { + if (start == 0 && Py_REFCNT(line) == 1) { + if (PyUnicode_Resize(&line, endpos) < 0) + goto error; + } + else { + PyObject *s = PyUnicode_FromUnicode( + PyUnicode_AS_UNICODE(line) + start, endpos - start); + Py_CLEAR(line); + if (s == NULL) + goto error; + line = s; + } + } + } + if (remaining != NULL) { + if (chunks == NULL) { + chunks = PyList_New(0); + if (chunks == NULL) + goto error; + } + if (PyList_Append(chunks, remaining) < 0) + goto error; + Py_CLEAR(remaining); + } + if (chunks != NULL) { + if (line != NULL && PyList_Append(chunks, line) < 0) + goto error; + Py_CLEAR(line); + line = PyUnicode_Join(_PyIO_empty_str, chunks); + if (line == NULL) + goto error; + Py_DECREF(chunks); + } + if (line == NULL) + line = PyUnicode_FromStringAndSize(NULL, 0); + + return line; + + error: + Py_XDECREF(chunks); + Py_XDECREF(remaining); + Py_XDECREF(line); + return NULL; +} + +static PyObject * +textiowrapper_readline(textio *self, PyObject *args) +{ + PyObject *limitobj = NULL; + Py_ssize_t limit = -1; + + CHECK_INITIALIZED(self); + if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) { + return NULL; + } + if (limitobj) { + if (!PyNumber_Check(limitobj)) { + PyErr_Format(PyExc_TypeError, + "integer argument expected, got '%.200s'", + Py_TYPE(limitobj)->tp_name); + return NULL; + } + limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError); + if (limit == -1 && PyErr_Occurred()) + return NULL; + } + return _textiowrapper_readline(self, limit); +} + +/* Seek and Tell */ + +typedef struct { + Py_off_t start_pos; + int dec_flags; + int bytes_to_feed; + int chars_to_skip; + char need_eof; +} cookie_type; + +/* + To speed up cookie packing/unpacking, we store the fields in a temporary + string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.). + The following macros define at which offsets in the intermediary byte + string the various CookieStruct fields will be stored. + */ + +#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char)) + +#if defined(WORDS_BIGENDIAN) + +# define IS_LITTLE_ENDIAN 0 + +/* We want the least significant byte of start_pos to also be the least + significant byte of the cookie, which means that in big-endian mode we + must copy the fields in reverse order. */ + +# define OFF_START_POS (sizeof(char) + 3 * sizeof(int)) +# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int)) +# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int)) +# define OFF_CHARS_TO_SKIP (sizeof(char)) +# define OFF_NEED_EOF 0 + +#else + +# define IS_LITTLE_ENDIAN 1 + +/* Little-endian mode: the least significant byte of start_pos will + naturally end up the least significant byte of the cookie. */ + +# define OFF_START_POS 0 +# define OFF_DEC_FLAGS (sizeof(Py_off_t)) +# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int)) +# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int)) +# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int)) + +#endif + +static int +textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) +{ + unsigned char buffer[COOKIE_BUF_LEN]; + PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj); + if (cookieLong == NULL) + return -1; + + if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), + IS_LITTLE_ENDIAN, 0) < 0) { + Py_DECREF(cookieLong); + return -1; + } + Py_DECREF(cookieLong); + + memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos)); + memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags)); + memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed)); + memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip)); + memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof)); + + return 0; +} + +static PyObject * +textiowrapper_build_cookie(cookie_type *cookie) +{ + unsigned char buffer[COOKIE_BUF_LEN]; + + memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos)); + memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags)); + memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed)); + memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip)); + memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof)); + + return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0); +} +#undef IS_LITTLE_ENDIAN + +static int +_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) +{ + PyObject *res; + /* When seeking to the start of the stream, we call decoder.reset() + rather than decoder.getstate(). + This is for a few decoders such as utf-16 for which the state value + at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of + utf-16, that we are expecting a BOM). + */ + if (cookie->start_pos == 0 && cookie->dec_flags == 0) + res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL); + else + res = PyObject_CallMethod(self->decoder, "setstate", + "((si))", "", cookie->dec_flags); + if (res == NULL) + return -1; + Py_DECREF(res); + return 0; +} + +static int +_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) +{ + PyObject *res; + /* Same as _textiowrapper_decoder_setstate() above. */ + if (cookie->start_pos == 0 && cookie->dec_flags == 0) { + res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL); + self->encoding_start_of_stream = 1; + } + else { + res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate, + _PyIO_zero, NULL); + self->encoding_start_of_stream = 0; + } + if (res == NULL) + return -1; + Py_DECREF(res); + return 0; +} + +static PyObject * +textiowrapper_seek(textio *self, PyObject *args) +{ + PyObject *cookieObj, *posobj; + cookie_type cookie; + int whence = 0; + PyObject *res; + int cmp; + + CHECK_INITIALIZED(self); + + if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence)) + return NULL; + CHECK_CLOSED(self); + + Py_INCREF(cookieObj); + + if (!self->seekable) { + PyErr_SetString(PyExc_IOError, + "underlying stream is not seekable"); + goto fail; + } + + if (whence == 1) { + /* seek relative to current position */ + cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); + if (cmp < 0) + goto fail; + + if (cmp == 0) { + PyErr_SetString(PyExc_IOError, + "can't do nonzero cur-relative seeks"); + goto fail; + } + + /* Seeking to the current position should attempt to + * sync the underlying buffer with the current position. + */ + Py_DECREF(cookieObj); + cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL); + if (cookieObj == NULL) + goto fail; + } + else if (whence == 2) { + /* seek relative to end of file */ + + cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); + if (cmp < 0) + goto fail; + + if (cmp == 0) { + PyErr_SetString(PyExc_IOError, + "can't do nonzero end-relative seeks"); + goto fail; + } + + res = PyObject_CallMethod((PyObject *)self, "flush", NULL); + if (res == NULL) + goto fail; + Py_DECREF(res); + + textiowrapper_set_decoded_chars(self, NULL); + Py_CLEAR(self->snapshot); + if (self->decoder) { + res = PyObject_CallMethod(self->decoder, "reset", NULL); + if (res == NULL) + goto fail; + Py_DECREF(res); + } + + res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2); + Py_XDECREF(cookieObj); + return res; + } + else if (whence != 0) { + PyErr_Format(PyExc_ValueError, + "invalid whence (%d, should be 0, 1 or 2)", whence); + goto fail; + } + + cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT); + if (cmp < 0) + goto fail; + + if (cmp == 1) { + PyObject *repr = PyObject_Repr(cookieObj); + if (repr != NULL) { + PyErr_Format(PyExc_ValueError, + "negative seek position %s", + PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + goto fail; + } + + res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); + if (res == NULL) + goto fail; + Py_DECREF(res); + + /* The strategy of seek() is to go back to the safe start point + * and replay the effect of read(chars_to_skip) from there. + */ + if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0) + goto fail; + + /* Seek back to the safe start point. */ + posobj = PyLong_FromOff_t(cookie.start_pos); + if (posobj == NULL) + goto fail; + res = PyObject_CallMethodObjArgs(self->buffer, + _PyIO_str_seek, posobj, NULL); + Py_DECREF(posobj); + if (res == NULL) + goto fail; + Py_DECREF(res); + + textiowrapper_set_decoded_chars(self, NULL); + Py_CLEAR(self->snapshot); + + /* Restore the decoder to its state from the safe start point. */ + if (self->decoder) { + if (_textiowrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + } + + if (cookie.chars_to_skip) { + /* Just like _read_chunk, feed the decoder and save a snapshot. */ + PyObject *input_chunk = PyObject_CallMethod( + self->buffer, "read", "i", cookie.bytes_to_feed); + PyObject *decoded; + + if (input_chunk == NULL) + goto fail; + + assert (PyBytes_Check(input_chunk)); + + self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk); + if (self->snapshot == NULL) { + Py_DECREF(input_chunk); + goto fail; + } + + decoded = PyObject_CallMethod(self->decoder, "decode", + "Oi", input_chunk, (int)cookie.need_eof); + + if (decoded == NULL) + goto fail; + + textiowrapper_set_decoded_chars(self, decoded); + + /* Skip chars_to_skip of the decoded characters. */ + if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) { + PyErr_SetString(PyExc_IOError, "can't restore logical file position"); + goto fail; + } + self->decoded_chars_used = cookie.chars_to_skip; + } + else { + self->snapshot = Py_BuildValue("is", cookie.dec_flags, ""); + if (self->snapshot == NULL) + goto fail; + } + + /* Finally, reset the encoder (merely useful for proper BOM handling) */ + if (self->encoder) { + if (_textiowrapper_encoder_setstate(self, &cookie) < 0) + goto fail; + } + return cookieObj; + fail: + Py_XDECREF(cookieObj); + return NULL; + +} + +static PyObject * +textiowrapper_tell(textio *self, PyObject *args) +{ + PyObject *res; + PyObject *posobj = NULL; + cookie_type cookie = {0,0,0,0,0}; + PyObject *next_input; + Py_ssize_t chars_to_skip, chars_decoded; + PyObject *saved_state = NULL; + char *input, *input_end; + + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + + if (!self->seekable) { + PyErr_SetString(PyExc_IOError, + "underlying stream is not seekable"); + goto fail; + } + if (!self->telling) { + PyErr_SetString(PyExc_IOError, + "telling position disabled by next() call"); + goto fail; + } + + if (_textiowrapper_writeflush(self) < 0) + return NULL; + res = PyObject_CallMethod((PyObject *)self, "flush", NULL); + if (res == NULL) + goto fail; + Py_DECREF(res); + + posobj = PyObject_CallMethod(self->buffer, "tell", NULL); + if (posobj == NULL) + goto fail; + + if (self->decoder == NULL || self->snapshot == NULL) { + assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0); + return posobj; + } + +#if defined(HAVE_LARGEFILE_SUPPORT) + cookie.start_pos = PyLong_AsLongLong(posobj); +#else + cookie.start_pos = PyLong_AsLong(posobj); +#endif + if (PyErr_Occurred()) + goto fail; + + /* Skip backward to the snapshot point (see _read_chunk). */ + if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input)) + goto fail; + + assert (PyBytes_Check(next_input)); + + cookie.start_pos -= PyBytes_GET_SIZE(next_input); + + /* How many decoded characters have been used up since the snapshot? */ + if (self->decoded_chars_used == 0) { + /* We haven't moved from the snapshot point. */ + Py_DECREF(posobj); + return textiowrapper_build_cookie(&cookie); + } + + chars_to_skip = self->decoded_chars_used; + + /* Starting from the snapshot position, we will walk the decoder + * forward until it gives us enough decoded characters. + */ + saved_state = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_getstate, NULL); + if (saved_state == NULL) + goto fail; + + /* Note our initial start point. */ + if (_textiowrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + + /* Feed the decoder one byte at a time. As we go, note the + * nearest "safe start point" before the current location + * (a point where the decoder has nothing buffered, so seek() + * can safely start from there and advance to this location). + */ + chars_decoded = 0; + input = PyBytes_AS_STRING(next_input); + input_end = input + PyBytes_GET_SIZE(next_input); + while (input < input_end) { + PyObject *state; + char *dec_buffer; + Py_ssize_t dec_buffer_len; + int dec_flags; + + PyObject *decoded = PyObject_CallMethod( + self->decoder, "decode", "s#", input, 1); + if (decoded == NULL) + goto fail; + assert (PyUnicode_Check(decoded)); + chars_decoded += PyUnicode_GET_SIZE(decoded); + Py_DECREF(decoded); + + cookie.bytes_to_feed += 1; + + state = PyObject_CallMethodObjArgs(self->decoder, + _PyIO_str_getstate, NULL); + if (state == NULL) + goto fail; + if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { + Py_DECREF(state); + goto fail; + } + Py_DECREF(state); + + if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) { + /* Decoder buffer is empty, so this is a safe start point. */ + cookie.start_pos += cookie.bytes_to_feed; + chars_to_skip -= chars_decoded; + cookie.dec_flags = dec_flags; + cookie.bytes_to_feed = 0; + chars_decoded = 0; + } + if (chars_decoded >= chars_to_skip) + break; + input++; + } + if (input == input_end) { + /* We didn't get enough decoded data; signal EOF to get more. */ + PyObject *decoded = PyObject_CallMethod( + self->decoder, "decode", "si", "", /* final = */ 1); + if (decoded == NULL) + goto fail; + assert (PyUnicode_Check(decoded)); + chars_decoded += PyUnicode_GET_SIZE(decoded); + Py_DECREF(decoded); + cookie.need_eof = 1; + + if (chars_decoded < chars_to_skip) { + PyErr_SetString(PyExc_IOError, + "can't reconstruct logical file position"); + goto fail; + } + } + + /* finally */ + Py_XDECREF(posobj); + res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state); + Py_DECREF(saved_state); + if (res == NULL) + return NULL; + Py_DECREF(res); + + /* The returned cookie corresponds to the last safe start point. */ + cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); + return textiowrapper_build_cookie(&cookie); + + fail: + Py_XDECREF(posobj); + if (saved_state) { + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + + res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state); + Py_DECREF(saved_state); + if (res == NULL) + return NULL; + Py_DECREF(res); + + PyErr_Restore(type, value, traceback); + } + return NULL; +} + +static PyObject * +textiowrapper_truncate(textio *self, PyObject *args) +{ + PyObject *pos = Py_None; + PyObject *res; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) { + return NULL; + } + + res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL); + if (res == NULL) + return NULL; + Py_DECREF(res); + + if (pos != Py_None) { + res = PyObject_CallMethodObjArgs((PyObject *) self, + _PyIO_str_seek, pos, NULL); + if (res == NULL) + return NULL; + Py_DECREF(res); + } + + return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL); +} + +static PyObject * +textiowrapper_repr(textio *self) +{ + PyObject *nameobj, *res; + PyObject *namerepr = NULL, *encrepr = NULL; + + CHECK_INITIALIZED(self); + + nameobj = PyObject_GetAttrString((PyObject *) self, "name"); + if (nameobj == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + goto error; + encrepr = PyObject_Repr(self->encoding); + res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>", + PyString_AS_STRING(encrepr)); + } + else { + encrepr = PyObject_Repr(self->encoding); + namerepr = PyObject_Repr(nameobj); + res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>", + PyString_AS_STRING(namerepr), + PyString_AS_STRING(encrepr)); + Py_DECREF(nameobj); + } + Py_XDECREF(namerepr); + Py_XDECREF(encrepr); + return res; + +error: + Py_XDECREF(namerepr); + Py_XDECREF(encrepr); + return NULL; +} + + +/* Inquiries */ + +static PyObject * +textiowrapper_fileno(textio *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "fileno", NULL); +} + +static PyObject * +textiowrapper_seekable(textio *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "seekable", NULL); +} + +static PyObject * +textiowrapper_readable(textio *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "readable", NULL); +} + +static PyObject * +textiowrapper_writable(textio *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "writable", NULL); +} + +static PyObject * +textiowrapper_isatty(textio *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + return PyObject_CallMethod(self->buffer, "isatty", NULL); +} + +static PyObject * +textiowrapper_flush(textio *self, PyObject *args) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + self->telling = self->seekable; + if (_textiowrapper_writeflush(self) < 0) + return NULL; + return PyObject_CallMethod(self->buffer, "flush", NULL); +} + +static PyObject * +textiowrapper_close(textio *self, PyObject *args) +{ + PyObject *res; + CHECK_INITIALIZED(self); + res = PyObject_CallMethod((PyObject *)self, "flush", NULL); + if (res == NULL) { + /* If flush() fails, just give up */ + PyErr_Clear(); + } + else + Py_DECREF(res); + + return PyObject_CallMethod(self->buffer, "close", NULL); +} + +static PyObject * +textiowrapper_iternext(textio *self) +{ + PyObject *line; + + CHECK_INITIALIZED(self); + + self->telling = 0; + if (Py_TYPE(self) == &PyTextIOWrapper_Type) { + /* Skip method call overhead for speed */ + line = _textiowrapper_readline(self, -1); + } + else { + line = PyObject_CallMethodObjArgs((PyObject *)self, + _PyIO_str_readline, NULL); + if (line && !PyUnicode_Check(line)) { + PyErr_Format(PyExc_IOError, + "readline() should have returned an str object, " + "not '%.200s'", Py_TYPE(line)->tp_name); + Py_DECREF(line); + return NULL; + } + } + + if (line == NULL) + return NULL; + + if (PyUnicode_GET_SIZE(line) == 0) { + /* Reached EOF or would have blocked */ + Py_DECREF(line); + Py_CLEAR(self->snapshot); + self->telling = self->seekable; + return NULL; + } + + return line; +} + +static PyObject * +textiowrapper_name_get(textio *self, void *context) +{ + CHECK_INITIALIZED(self); + return PyObject_GetAttrString(self->buffer, "name"); +} + +static PyObject * +textiowrapper_closed_get(textio *self, void *context) +{ + CHECK_INITIALIZED(self); + return PyObject_GetAttr(self->buffer, _PyIO_str_closed); +} + +static PyObject * +textiowrapper_newlines_get(textio *self, void *context) +{ + PyObject *res; + CHECK_INITIALIZED(self); + if (self->decoder == NULL) + Py_RETURN_NONE; + res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines); + if (res == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + Py_RETURN_NONE; + } + else { + return NULL; + } + } + return res; +} + +static PyObject * +textiowrapper_errors_get(textio *self, void *context) +{ + CHECK_INITIALIZED(self); + Py_INCREF(self->errors); + return self->errors; +} + +static PyObject * +textiowrapper_chunk_size_get(textio *self, void *context) +{ + CHECK_INITIALIZED(self); + return PyLong_FromSsize_t(self->chunk_size); +} + +static int +textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context) +{ + Py_ssize_t n; + CHECK_INITIALIZED_INT(self); + n = PyNumber_AsSsize_t(arg, PyExc_TypeError); + if (n == -1 && PyErr_Occurred()) + return -1; + if (n <= 0) { + PyErr_SetString(PyExc_ValueError, + "a strictly positive integer is required"); + return -1; + } + self->chunk_size = n; + return 0; +} + +static PyMethodDef textiowrapper_methods[] = { + {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS}, + {"write", (PyCFunction)textiowrapper_write, METH_VARARGS}, + {"read", (PyCFunction)textiowrapper_read, METH_VARARGS}, + {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS}, + {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS}, + {"close", (PyCFunction)textiowrapper_close, METH_NOARGS}, + + {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS}, + {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS}, + {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS}, + {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS}, + {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS}, + + {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS}, + {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS}, + {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS}, + {NULL, NULL} +}; + +static PyMemberDef textiowrapper_members[] = { + {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY}, + {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY}, + {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY}, + {NULL} +}; + +static PyGetSetDef textiowrapper_getset[] = { + {"name", (getter)textiowrapper_name_get, NULL, NULL}, + {"closed", (getter)textiowrapper_closed_get, NULL, NULL}, +/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL}, +*/ + {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL}, + {"errors", (getter)textiowrapper_errors_get, NULL, NULL}, + {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get, + (setter)textiowrapper_chunk_size_set, NULL}, + {NULL} +}; + +PyTypeObject PyTextIOWrapper_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io.TextIOWrapper", /*tp_name*/ + sizeof(textio), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)textiowrapper_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tps_etattr*/ + 0, /*tp_compare */ + (reprfunc)textiowrapper_repr,/*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + textiowrapper_doc, /* tp_doc */ + (traverseproc)textiowrapper_traverse, /* tp_traverse */ + (inquiry)textiowrapper_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(textio, weakreflist), /*tp_weaklistoffset*/ + 0, /* tp_iter */ + (iternextfunc)textiowrapper_iternext, /* tp_iternext */ + textiowrapper_methods, /* tp_methods */ + textiowrapper_members, /* tp_members */ + textiowrapper_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(textio, dict), /*tp_dictoffset*/ + (initproc)textiowrapper_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; |