diff options
Diffstat (limited to 'Modules/_io')
-rw-r--r-- | Modules/_io/_iomodule.c | 163 | ||||
-rw-r--r-- | Modules/_io/_iomodule.h | 17 | ||||
-rw-r--r-- | Modules/_io/bufferedio.c | 253 | ||||
-rw-r--r-- | Modules/_io/bytesio.c | 2 | ||||
-rw-r--r-- | Modules/_io/fileio.c | 139 | ||||
-rw-r--r-- | Modules/_io/iobase.c | 51 | ||||
-rw-r--r-- | Modules/_io/stringio.c | 185 | ||||
-rw-r--r-- | Modules/_io/textio.c | 681 |
8 files changed, 887 insertions, 604 deletions
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 4c90433..31eea3c 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -1,9 +1,9 @@ /* An implementation of the new I/O lib as defined by PEP 3116 - "New I/O" - + Classes defined here: UnsupportedOperation, BlockingIOError. Functions defined here: open(). - + Mostly written by Amaury Forgeot d'Arc */ @@ -36,6 +36,7 @@ PyObject *_PyIO_str_nl; PyObject *_PyIO_str_read; PyObject *_PyIO_str_read1; PyObject *_PyIO_str_readable; +PyObject *_PyIO_str_readall; PyObject *_PyIO_str_readinto; PyObject *_PyIO_str_readline; PyObject *_PyIO_str_reset; @@ -90,94 +91,11 @@ PyDoc_STRVAR(module_doc, /* - * BlockingIOError extends IOError - */ - -static int -blockingioerror_init(PyBlockingIOErrorObject *self, PyObject *args, - PyObject *kwds) -{ - PyObject *myerrno = NULL, *strerror = NULL; - PyObject *baseargs = NULL; - Py_ssize_t written = 0; - - assert(PyTuple_Check(args)); - - self->written = 0; - if (!PyArg_ParseTuple(args, "OO|n:BlockingIOError", - &myerrno, &strerror, &written)) - return -1; - - baseargs = PyTuple_Pack(2, myerrno, strerror); - if (baseargs == NULL) - return -1; - /* This will take care of initializing of myerrno and strerror members */ - if (((PyTypeObject *)PyExc_IOError)->tp_init( - (PyObject *)self, baseargs, kwds) == -1) { - Py_DECREF(baseargs); - return -1; - } - Py_DECREF(baseargs); - - self->written = written; - return 0; -} - -static PyMemberDef blockingioerror_members[] = { - {"characters_written", T_PYSSIZET, offsetof(PyBlockingIOErrorObject, written), 0}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject _PyExc_BlockingIOError = { - PyVarObject_HEAD_INIT(NULL, 0) - "BlockingIOError", /*tp_name*/ - sizeof(PyBlockingIOErrorObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - 0, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare */ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - PyDoc_STR("Exception raised when I/O would block " - "on a non-blocking I/O stream"), /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - blockingioerror_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)blockingioerror_init, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ -}; -PyObject *PyExc_BlockingIOError = (PyObject *)&_PyExc_BlockingIOError; - - -/* * The main open() function */ PyDoc_STRVAR(open_doc, "open(file, mode='r', buffering=-1, encoding=None,\n" -" errors=None, newline=None, closefd=True) -> file object\n" +" errors=None, newline=None, closefd=True, opener=None) -> file object\n" "\n" "Open file and return a stream. Raise IOError upon failure.\n" "\n" @@ -190,18 +108,19 @@ PyDoc_STRVAR(open_doc, "mode is an optional string that specifies the mode in which the file\n" "is opened. It defaults to 'r' which means open for reading in text\n" "mode. Other common values are 'w' for writing (truncating the file if\n" -"it already exists), and 'a' for appending (which on some Unix systems,\n" -"means that all writes append to the end of the file regardless of the\n" -"current seek position). In text mode, if encoding is not specified the\n" -"encoding used is platform dependent. (For reading and writing raw\n" -"bytes use binary mode and leave encoding unspecified.) The available\n" -"modes are:\n" +"it already exists), 'x' for creating and writing to a new file, and\n" +"'a' for appending (which on some Unix systems, means that all writes\n" +"append to the end of the file regardless of the current seek position).\n" +"In text mode, if encoding is not specified the encoding used is platform\n" +"dependent. (For reading and writing raw bytes use binary mode and leave\n" +"encoding unspecified.) The available modes are:\n" "\n" "========= ===============================================================\n" "Character Meaning\n" "--------- ---------------------------------------------------------------\n" "'r' open for reading (default)\n" "'w' open for writing, truncating the file first\n" +"'x' create a new file and open it for writing\n" "'a' open for writing, appending to the end of the file if it exists\n" "'b' binary mode\n" "'t' text mode (default)\n" @@ -212,7 +131,8 @@ PyDoc_STRVAR(open_doc, "\n" "The default mode is 'rt' (open for reading text). For binary random\n" "access, the mode 'w+b' opens and truncates the file to 0 bytes, while\n" -"'r+b' opens the file without truncation.\n" +"'r+b' opens the file without truncation. The 'x' mode implies 'w' and\n" +"raises an `FileExistsError` if the file already exists.\n" "\n" "Python distinguishes between files opened in binary and text modes,\n" "even when the underlying operating system doesn't. Files opened in\n" @@ -272,6 +192,12 @@ PyDoc_STRVAR(open_doc, "when the file is closed. This does not work when a file name is given\n" "and must be True in that case.\n" "\n" +"A custom opener can be used by passing a callable as *opener*. The\n" +"underlying file descriptor for the file object is then obtained by\n" +"calling *opener* with (*file*, *flags*). *opener* must return an open\n" +"file descriptor (passing os.open as *opener* results in functionality\n" +"similar to passing None).\n" +"\n" "open() returns a file object whose type depends on the mode, and\n" "through which the standard file operations such as reading and writing\n" "are performed. When open() is used to open a file in a text mode ('w',\n" @@ -292,14 +218,14 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) { char *kwlist[] = {"file", "mode", "buffering", "encoding", "errors", "newline", - "closefd", NULL}; - PyObject *file; + "closefd", "opener", NULL}; + PyObject *file, *opener = Py_None; char *mode = "r"; int buffering = -1, closefd = 1; char *encoding = NULL, *errors = NULL, *newline = NULL; unsigned i; - int reading = 0, writing = 0, appending = 0, updating = 0; + int creating = 0, reading = 0, writing = 0, appending = 0, updating = 0; int text = 0, binary = 0, universal = 0; char rawmode[5], *m; @@ -307,10 +233,14 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) PyObject *raw, *modeobj = NULL, *buffer = NULL, *wrapper = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|sizzzi:open", kwlist, + _Py_IDENTIFIER(isatty); + _Py_IDENTIFIER(fileno); + _Py_IDENTIFIER(mode); + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|sizzziO:open", kwlist, &file, &mode, &buffering, &encoding, &errors, &newline, - &closefd)) { + &closefd, &opener)) { return NULL; } @@ -326,6 +256,9 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) char c = mode[i]; switch (c) { + case 'x': + creating = 1; + break; case 'r': reading = 1; break; @@ -362,6 +295,7 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) } m = rawmode; + if (creating) *(m++) = 'x'; if (reading) *(m++) = 'r'; if (writing) *(m++) = 'w'; if (appending) *(m++) = 'a'; @@ -384,9 +318,9 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - if (reading + writing + appending > 1) { + if (creating + reading + writing + appending > 1) { PyErr_SetString(PyExc_ValueError, - "must have exactly one of read/write/append mode"); + "must have exactly one of create/read/write/append mode"); return NULL; } @@ -410,7 +344,7 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) /* Create the Raw file stream */ raw = PyObject_CallFunction((PyObject *)&PyFileIO_Type, - "Osi", file, rawmode, closefd); + "OsiO", file, rawmode, closefd, opener); if (raw == NULL) return NULL; @@ -420,7 +354,7 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) /* buffering */ { - PyObject *res = PyObject_CallMethod(raw, "isatty", NULL); + PyObject *res = _PyObject_CallMethodId(raw, &PyId_isatty, NULL); if (res == NULL) goto error; isatty = PyLong_AsLong(res); @@ -442,7 +376,7 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) { struct stat st; long fileno; - PyObject *res = PyObject_CallMethod(raw, "fileno", NULL); + PyObject *res = _PyObject_CallMethodId(raw, &PyId_fileno, NULL); if (res == NULL) goto error; @@ -480,7 +414,7 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) if (updating) Buffered_class = (PyObject *)&PyBufferedRandom_Type; - else if (writing || appending) + else if (creating || writing || appending) Buffered_class = (PyObject *)&PyBufferedWriter_Type; else if (reading) Buffered_class = (PyObject *)&PyBufferedReader_Type; @@ -513,7 +447,7 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) if (wrapper == NULL) goto error; - if (PyObject_SetAttrString(wrapper, "mode", modeobj) < 0) + if (_PyObject_SetAttrId(wrapper, &PyId_mode, modeobj) < 0) goto error; Py_DECREF(modeobj); return wrapper; @@ -576,7 +510,7 @@ PyNumber_AsOff_t(PyObject *item, PyObject *err) /* Basically the "n" format code with the ability to turn None into -1. */ -int +int _PyIO_ConvertSsize_t(PyObject *obj, void *result) { Py_ssize_t limit; if (obj == Py_None) { @@ -603,7 +537,6 @@ iomodule_traverse(PyObject *mod, visitproc visit, void *arg) { _PyIO_State *state = IO_MOD_STATE(mod); if (!state->initialized) return 0; - Py_VISIT(state->os_module); if (state->locale_module != NULL) { Py_VISIT(state->locale_module); } @@ -617,7 +550,6 @@ iomodule_clear(PyObject *mod) { _PyIO_State *state = IO_MOD_STATE(mod); if (!state->initialized) return 0; - Py_CLEAR(state->os_module); if (state->locale_module != NULL) Py_CLEAR(state->locale_module); Py_CLEAR(state->unsupported_operation); @@ -661,11 +593,6 @@ PyInit__io(void) state = IO_MOD_STATE(m); state->initialized = 0; - /* put os in the module state */ - state->os_module = PyImport_ImportModule("os"); - if (state->os_module == NULL) - goto fail; - #define ADD_TYPE(type, name) \ if (PyType_Ready(type) < 0) \ goto fail; \ @@ -690,9 +617,11 @@ PyInit__io(void) state->unsupported_operation) < 0) goto fail; - /* BlockingIOError */ - _PyExc_BlockingIOError.tp_base = (PyTypeObject *) PyExc_IOError; - ADD_TYPE(&_PyExc_BlockingIOError, "BlockingIOError"); + /* BlockingIOError, for compatibility */ + Py_INCREF(PyExc_BlockingIOError); + if (PyModule_AddObject(m, "BlockingIOError", + (PyObject *) PyExc_BlockingIOError) < 0) + goto fail; /* Concrete base types of the IO ABCs. (the ABCs themselves are declared through inheritance in io.py) @@ -758,6 +687,7 @@ PyInit__io(void) ADD_INTERNED(read) ADD_INTERNED(read1) ADD_INTERNED(readable) + ADD_INTERNED(readall) ADD_INTERNED(readinto) ADD_INTERNED(readline) ADD_INTERNED(reset) @@ -788,7 +718,6 @@ PyInit__io(void) return m; fail: - Py_XDECREF(state->os_module); Py_XDECREF(state->unsupported_operation); Py_DECREF(m); return NULL; diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 925e4f2..987aac8 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -50,25 +50,16 @@ extern PyObject *_PyIncrementalNewlineDecoder_decode( `*consumed`. If not found, returns -1 and sets `*consumed` to the number of characters which can be safely put aside until another search. - - NOTE: for performance reasons, `end` must point to a NUL character ('\0'). + + NOTE: for performance reasons, `end` must point to a NUL character ('\0'). Otherwise, the function will scan further and return garbage. */ extern Py_ssize_t _PyIO_find_line_ending( int translated, int universal, PyObject *readnl, - Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed); + int kind, char *start, char *end, Py_ssize_t *consumed); #define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */ -typedef struct { - PyException_HEAD - PyObject *myerrno; - PyObject *strerror; - PyObject *filename; /* Not used, but part of the IOError object */ - Py_ssize_t written; -} PyBlockingIOErrorObject; -PyAPI_DATA(PyObject *) PyExc_BlockingIOError; - /* * Offset type for positioning. */ @@ -133,7 +124,6 @@ extern PyModuleDef _PyIO_Module; typedef struct { int initialized; - PyObject *os_module; PyObject *locale_module; PyObject *unsupported_operation; @@ -155,6 +145,7 @@ extern PyObject *_PyIO_str_nl; extern PyObject *_PyIO_str_read; extern PyObject *_PyIO_str_read1; extern PyObject *_PyIO_str_readable; +extern PyObject *_PyIO_str_readall; extern PyObject *_PyIO_str_readinto; extern PyObject *_PyIO_str_readline; extern PyObject *_PyIO_str_reset; diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 4c43c69..8a9ae47 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -1,9 +1,9 @@ /* An implementation of Buffered I/O as defined by PEP 3116 - "New I/O" - + Classes defined here: BufferedIOBase, BufferedReader, BufferedWriter, BufferedRandom. - + Written by Amaury Forgeot d'Arc and Antoine Pitrou */ @@ -13,6 +13,20 @@ #include "pythread.h" #include "_iomodule.h" +_Py_IDENTIFIER(close); +_Py_IDENTIFIER(_dealloc_warn); +_Py_IDENTIFIER(flush); +_Py_IDENTIFIER(isatty); +_Py_IDENTIFIER(mode); +_Py_IDENTIFIER(name); +_Py_IDENTIFIER(peek); +_Py_IDENTIFIER(read); +_Py_IDENTIFIER(read1); +_Py_IDENTIFIER(readable); +_Py_IDENTIFIER(readinto); +_Py_IDENTIFIER(writable); +_Py_IDENTIFIER(write); + /* * BufferedIOBase class, inherits from IOBase. */ @@ -38,12 +52,13 @@ bufferediobase_readinto(PyObject *self, PyObject *args) Py_buffer buf; Py_ssize_t len; PyObject *data; + _Py_IDENTIFIER(read); if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) { return NULL; } - data = PyObject_CallMethod(self, "read", "n", buf.len); + data = _PyObject_CallMethodId(self, &PyId_read, "n", buf.len); if (data == NULL) goto error; @@ -198,7 +213,7 @@ typedef struct { int readable; int writable; int deallocating; - + /* True if this is a vanilla Buffered object (rather than a user derived class) *and* the raw stream is a vanilla FileIO object. */ int fast_closed_checks; @@ -237,7 +252,7 @@ typedef struct { /* Implementation notes: - + * BufferedReader, BufferedWriter and BufferedRandom try to share most methods (this is helped by the members `readable` and `writable`, which are initialized in the respective constructors) @@ -255,7 +270,7 @@ typedef struct { NOTE: we should try to maintain block alignment of reads and writes to the raw stream (according to the buffer size), but for now it is only done in read() and friends. - + */ /* These macros protect the buffered object against concurrent operations. */ @@ -410,7 +425,7 @@ buffered_dealloc_warn(buffered *self, PyObject *source) { if (self->ok && self->raw) { PyObject *r; - r = PyObject_CallMethod(self->raw, "_dealloc_warn", "O", source); + r = _PyObject_CallMethodId(self->raw, &PyId__dealloc_warn, "O", source); if (r) Py_DECREF(r); else @@ -543,14 +558,14 @@ static PyObject * buffered_name_get(buffered *self, void *context) { CHECK_INITIALIZED(self) - return PyObject_GetAttrString(self->raw, "name"); + return _PyObject_GetAttrId(self->raw, &PyId_name); } static PyObject * buffered_mode_get(buffered *self, void *context) { CHECK_INITIALIZED(self) - return PyObject_GetAttrString(self->raw, "mode"); + return _PyObject_GetAttrId(self->raw, &PyId_mode); } /* Lower-level APIs */ @@ -589,14 +604,15 @@ _bufferedreader_reset_buf(buffered *self); static void _bufferedwriter_reset_buf(buffered *self); static PyObject * -_bufferedreader_peek_unlocked(buffered *self, Py_ssize_t); +_bufferedreader_peek_unlocked(buffered *self); static PyObject * _bufferedreader_read_all(buffered *self); static PyObject * _bufferedreader_read_fast(buffered *self, Py_ssize_t); static PyObject * _bufferedreader_read_generic(buffered *self, Py_ssize_t); - +static Py_ssize_t +_bufferedreader_raw_read(buffered *self, char *start, Py_ssize_t len); /* * Helpers @@ -620,14 +636,14 @@ static Py_ssize_t * _buffered_check_blocking_error(void) { PyObject *t, *v, *tb; - PyBlockingIOErrorObject *err; + PyOSErrorObject *err; PyErr_Fetch(&t, &v, &tb); if (v == NULL || !PyErr_GivenExceptionMatches(v, PyExc_BlockingIOError)) { PyErr_Restore(t, v, tb); return NULL; } - err = (PyBlockingIOErrorObject *) v; + err = (PyOSErrorObject *) v; /* TODO: sanity check (err->written >= 0) */ PyErr_Restore(t, v, tb); return &err->written; @@ -647,7 +663,7 @@ _buffered_raw_tell(buffered *self) if (!PyErr_Occurred()) PyErr_Format(PyExc_IOError, "Raw stream returned invalid position %" PY_PRIdOFF, - (PY_OFF_T_COMPAT)n); + (PY_OFF_T_COMPAT)n); return -1; } self->abs_pos = n; @@ -680,7 +696,7 @@ _buffered_raw_seek(buffered *self, Py_off_t target, int whence) if (!PyErr_Occurred()) PyErr_Format(PyExc_IOError, "Raw stream returned invalid position %" PY_PRIdOFF, - (PY_OFF_T_COMPAT)n); + (PY_OFF_T_COMPAT)n); return -1; } self->abs_pos = n; @@ -821,7 +837,7 @@ buffered_peek(buffered *self, PyObject *args) goto end; Py_CLEAR(res); } - res = _bufferedreader_peek_unlocked(self, n); + res = _bufferedreader_peek_unlocked(self); end: LEAVE_BUFFERED(self) @@ -885,61 +901,112 @@ buffered_read1(buffered *self, PyObject *args) if (n == 0) return PyBytes_FromStringAndSize(NULL, 0); - if (!ENTER_BUFFERED(self)) - return NULL; - /* Return up to n bytes. If at least one byte is buffered, we only return buffered bytes. Otherwise, we do one raw read. */ - /* XXX: this mimicks the io.py implementation but is probably wrong. - If we need to read from the raw stream, then we could actually read - all `n` bytes asked by the caller (and possibly more, so as to fill - our buffer for the next reads). */ - have = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); if (have > 0) { - if (n > have) - n = have; - res = PyBytes_FromStringAndSize(self->buffer + self->pos, n); - if (res == NULL) - goto end; - self->pos += n; - goto end; + n = Py_MIN(have, n); + res = _bufferedreader_read_fast(self, n); + assert(res != Py_None); + return res; } - - if (self->writable) { - res = buffered_flush_and_rewind_unlocked(self); - if (res == NULL) - goto end; + res = PyBytes_FromStringAndSize(NULL, n); + if (res == NULL) + return NULL; + if (!ENTER_BUFFERED(self)) { Py_DECREF(res); + return NULL; } - - /* Fill the buffer from the raw stream, and copy it to the result. */ _bufferedreader_reset_buf(self); - r = _bufferedreader_fill_buffer(self); - if (r == -1) - goto end; + r = _bufferedreader_raw_read(self, PyBytes_AS_STRING(res), n); + LEAVE_BUFFERED(self) + if (r == -1) { + Py_DECREF(res); + return NULL; + } if (r == -2) r = 0; if (n > r) - n = r; - res = PyBytes_FromStringAndSize(self->buffer, n); - if (res == NULL) - goto end; - self->pos = n; - -end: - LEAVE_BUFFERED(self) + _PyBytes_Resize(&res, r); return res; } static PyObject * buffered_readinto(buffered *self, PyObject *args) { + Py_buffer buf; + Py_ssize_t n, written = 0, remaining; + PyObject *res = NULL; + CHECK_INITIALIZED(self) - - /* TODO: use raw.readinto() (or a direct copy from our buffer) instead! */ - return bufferediobase_readinto((PyObject *)self, args); + + if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) + return NULL; + + n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (n > 0) { + if (n >= buf.len) { + memcpy(buf.buf, self->buffer + self->pos, buf.len); + self->pos += buf.len; + res = PyLong_FromSsize_t(buf.len); + goto end_unlocked; + } + memcpy(buf.buf, self->buffer + self->pos, n); + self->pos += n; + written = n; + } + + if (!ENTER_BUFFERED(self)) + goto end_unlocked; + + if (self->writable) { + res = buffered_flush_and_rewind_unlocked(self); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + + _bufferedreader_reset_buf(self); + self->pos = 0; + + for (remaining = buf.len - written; + remaining > 0; + written += n, remaining -= n) { + /* If remaining bytes is larger than internal buffer size, copy + * directly into caller's buffer. */ + if (remaining > self->buffer_size) { + n = _bufferedreader_raw_read(self, (char *) buf.buf + written, + remaining); + } + else { + n = _bufferedreader_fill_buffer(self); + if (n > 0) { + if (n > remaining) + n = remaining; + memcpy((char *) buf.buf + written, + self->buffer + self->pos, n); + self->pos += n; + continue; /* short circuit */ + } + } + if (n == 0 || (n == -2 && written > 0)) + break; + if (n < 0) { + if (n == -2) { + Py_INCREF(Py_None); + res = Py_None; + } + goto end; + } + } + res = PyLong_FromSsize_t(written); + +end: + LEAVE_BUFFERED(self); +end_unlocked: + PyBuffer_Release(&buf); + return res; } static PyObject * @@ -1231,7 +1298,7 @@ buffered_repr(buffered *self) { PyObject *nameobj, *res; - nameobj = PyObject_GetAttrString((PyObject *) self, "name"); + nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name); if (nameobj == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); @@ -1357,33 +1424,58 @@ static PyObject * _bufferedreader_read_all(buffered *self) { Py_ssize_t current_size; - PyObject *res, *data = NULL; - PyObject *chunks = PyList_New(0); - - if (chunks == NULL) - return NULL; + PyObject *res, *data = NULL, *chunk, *chunks; /* First copy what we have in the current buffer. */ current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); if (current_size) { data = PyBytes_FromStringAndSize( self->buffer + self->pos, current_size); - if (data == NULL) { - Py_DECREF(chunks); + if (data == NULL) return NULL; - } self->pos += current_size; } /* We're going past the buffer's bounds, flush it */ if (self->writable) { res = buffered_flush_and_rewind_unlocked(self); - if (res == NULL) { - Py_DECREF(chunks); + if (res == NULL) return NULL; - } Py_CLEAR(res); } _bufferedreader_reset_buf(self); + + if (PyObject_HasAttr(self->raw, _PyIO_str_readall)) { + chunk = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readall, NULL); + if (chunk == NULL) + return NULL; + if (chunk != Py_None && !PyBytes_Check(chunk)) { + Py_XDECREF(data); + Py_DECREF(chunk); + PyErr_SetString(PyExc_TypeError, "readall() should return bytes"); + return NULL; + } + if (chunk == Py_None) { + if (current_size == 0) + return chunk; + else { + Py_DECREF(chunk); + return data; + } + } + else if (current_size) { + PyBytes_Concat(&data, chunk); + Py_DECREF(chunk); + if (data == NULL) + return NULL; + return data; + } else + return chunk; + } + + chunks = PyList_New(0); + if (chunks == NULL) + return NULL; + while (1) { if (data) { if (PyList_Append(chunks, data) < 0) { @@ -1545,7 +1637,7 @@ error: } static PyObject * -_bufferedreader_peek_unlocked(buffered *self, Py_ssize_t n) +_bufferedreader_peek_unlocked(buffered *self) { Py_ssize_t have, r; @@ -1587,6 +1679,7 @@ static PyMethodDef bufferedreader_methods[] = { {"read", (PyCFunction)buffered_read, METH_VARARGS}, {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, + {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, @@ -2135,13 +2228,13 @@ bufferedrwpair_dealloc(rwpair *self) } static PyObject * -_forward_call(buffered *self, const char *name, PyObject *args) +_forward_call(buffered *self, _Py_Identifier *name, PyObject *args) { - PyObject *func = PyObject_GetAttrString((PyObject *)self, name); + PyObject *func = _PyObject_GetAttrId((PyObject *)self, name); PyObject *ret; if (func == NULL) { - PyErr_SetString(PyExc_AttributeError, name); + PyErr_SetString(PyExc_AttributeError, name->string); return NULL; } @@ -2153,66 +2246,66 @@ _forward_call(buffered *self, const char *name, PyObject *args) static PyObject * bufferedrwpair_read(rwpair *self, PyObject *args) { - return _forward_call(self->reader, "read", args); + return _forward_call(self->reader, &PyId_read, args); } static PyObject * bufferedrwpair_peek(rwpair *self, PyObject *args) { - return _forward_call(self->reader, "peek", args); + return _forward_call(self->reader, &PyId_peek, args); } static PyObject * bufferedrwpair_read1(rwpair *self, PyObject *args) { - return _forward_call(self->reader, "read1", args); + return _forward_call(self->reader, &PyId_read1, args); } static PyObject * bufferedrwpair_readinto(rwpair *self, PyObject *args) { - return _forward_call(self->reader, "readinto", args); + return _forward_call(self->reader, &PyId_readinto, args); } static PyObject * bufferedrwpair_write(rwpair *self, PyObject *args) { - return _forward_call(self->writer, "write", args); + return _forward_call(self->writer, &PyId_write, args); } static PyObject * bufferedrwpair_flush(rwpair *self, PyObject *args) { - return _forward_call(self->writer, "flush", args); + return _forward_call(self->writer, &PyId_flush, args); } static PyObject * bufferedrwpair_readable(rwpair *self, PyObject *args) { - return _forward_call(self->reader, "readable", args); + return _forward_call(self->reader, &PyId_readable, args); } static PyObject * bufferedrwpair_writable(rwpair *self, PyObject *args) { - return _forward_call(self->writer, "writable", args); + return _forward_call(self->writer, &PyId_writable, args); } static PyObject * bufferedrwpair_close(rwpair *self, PyObject *args) { - PyObject *ret = _forward_call(self->writer, "close", args); + PyObject *ret = _forward_call(self->writer, &PyId_close, args); if (ret == NULL) return NULL; Py_DECREF(ret); - return _forward_call(self->reader, "close", args); + return _forward_call(self->reader, &PyId_close, args); } static PyObject * bufferedrwpair_isatty(rwpair *self, PyObject *args) { - PyObject *ret = _forward_call(self->writer, "isatty", args); + PyObject *ret = _forward_call(self->writer, &PyId_isatty, args); if (ret != Py_False) { /* either True or exception */ @@ -2220,7 +2313,7 @@ bufferedrwpair_isatty(rwpair *self, PyObject *args) } Py_DECREF(ret); - return _forward_call(self->reader, "isatty", args); + return _forward_call(self->reader, &PyId_isatty, args); } static PyObject * diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index b40513f..65ec931 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -938,13 +938,11 @@ static int bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags) { int ret; - void *ptr; bytesio *b = (bytesio *) obj->source; if (view == NULL) { b->exports++; return 0; } - ptr = (void *) obj; ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size, 0, flags); if (ret >= 0) { diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index d5b03ee..34ff1e0 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -46,6 +46,7 @@ typedef struct { PyObject_HEAD int fd; + unsigned int created : 1; unsigned int readable : 1; unsigned int writable : 1; signed int seekable : 2; /* -1 means unknown */ @@ -122,6 +123,7 @@ internal_close(fileio *self) static PyObject * fileio_close(fileio *self) { + _Py_IDENTIFIER(close); if (!self->closefd) { self->fd = -1; Py_RETURN_NONE; @@ -137,8 +139,8 @@ fileio_close(fileio *self) if (errno < 0) return NULL; - return PyObject_CallMethod((PyObject*)&PyRawIOBase_Type, - "close", "O", self); + return _PyObject_CallMethodId((PyObject*)&PyRawIOBase_Type, + &PyId_close, "O", self); } static PyObject * @@ -151,6 +153,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self = (fileio *) type->tp_alloc(type, 0); if (self != NULL) { self->fd = -1; + self->created = 0; self->readable = 0; self->writable = 0; self->seekable = -1; @@ -211,9 +214,9 @@ static int fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) { fileio *self = (fileio *) oself; - static char *kwlist[] = {"file", "mode", "closefd", NULL}; + static char *kwlist[] = {"file", "mode", "closefd", "opener", NULL}; const char *name = NULL; - PyObject *nameobj, *stringobj = NULL; + PyObject *nameobj, *stringobj = NULL, *opener = Py_None; char *mode = "r"; char *s; #ifdef MS_WINDOWS @@ -232,8 +235,9 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) return -1; } - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:fileio", - kwlist, &nameobj, &mode, &closefd)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|siO:fileio", + kwlist, &nameobj, &mode, &closefd, + &opener)) return -1; if (PyFloat_Check(nameobj)) { @@ -260,9 +264,10 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) PyErr_SetString(PyExc_TypeError, "embedded NUL character"); return -1; } - widename = PyUnicode_AS_UNICODE(nameobj); - } - if (widename == NULL) + widename = PyUnicode_AsUnicode(nameobj); + if (widename == NULL) + return -1; + } else #endif if (fd < 0) { @@ -275,15 +280,23 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) s = mode; while (*s) { switch (*s++) { - case 'r': + case 'x': if (rwa) { bad_mode: PyErr_SetString(PyExc_ValueError, - "Must have exactly one of read/write/append " + "Must have exactly one of create/read/write/append " "mode and at most one plus"); goto error; } rwa = 1; + self->created = 1; + self->writable = 1; + flags |= O_EXCL | O_CREAT; + break; + case 'r': + if (rwa) + goto bad_mode; + rwa = 1; self->readable = 1; break; case 'w': @@ -349,19 +362,39 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) goto error; } - Py_BEGIN_ALLOW_THREADS errno = 0; + if (opener == Py_None) { + Py_BEGIN_ALLOW_THREADS #ifdef MS_WINDOWS - if (widename != NULL) - self->fd = _wopen(widename, flags, 0666); - else + if (widename != NULL) + self->fd = _wopen(widename, flags, 0666); + else #endif - self->fd = open(name, flags, 0666); - Py_END_ALLOW_THREADS + self->fd = open(name, flags, 0666); + Py_END_ALLOW_THREADS + } else { + PyObject *fdobj = PyObject_CallFunction( + opener, "Oi", nameobj, flags); + if (fdobj == NULL) + goto error; + if (!PyLong_Check(fdobj)) { + Py_DECREF(fdobj); + PyErr_SetString(PyExc_TypeError, + "expected integer from opener"); + goto error; + } + + self->fd = PyLong_AsLong(fdobj); + Py_DECREF(fdobj); + if (self->fd == -1) { + goto error; + } + } + if (self->fd < 0) { #ifdef MS_WINDOWS if (widename != NULL) - PyErr_SetFromErrnoWithUnicodeFilename(PyExc_IOError, widename); + PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, nameobj); else #endif PyErr_SetFromErrnoWithFilename(PyExc_IOError, name); @@ -533,21 +566,25 @@ fileio_readinto(fileio *self, PyObject *args) } static size_t -new_buffersize(fileio *self, size_t currentsize) +new_buffersize(fileio *self, size_t currentsize +#ifdef HAVE_FSTAT + , Py_off_t pos, Py_off_t end +#endif + ) { #ifdef HAVE_FSTAT - off_t pos, end; - struct stat st; - if (fstat(self->fd, &st) == 0) { - end = st.st_size; - pos = lseek(self->fd, 0L, SEEK_CUR); + if (end != (Py_off_t)-1) { /* Files claiming a size smaller than SMALLCHUNK may actually be streaming pseudo-files. In this case, we apply the more aggressive algorithm below. */ if (end >= SMALLCHUNK && end >= pos && pos >= 0) { /* Add 1 so if the file were to grow we'd notice. */ - return currentsize + end - pos + 1; + Py_off_t bufsize = currentsize + end - pos + 1; + if (bufsize < PY_SSIZE_T_MAX) + return (size_t)bufsize; + else + return PY_SSIZE_T_MAX; } } #endif @@ -560,9 +597,14 @@ new_buffersize(fileio *self, size_t currentsize) static PyObject * fileio_readall(fileio *self) { +#ifdef HAVE_FSTAT + struct stat st; + Py_off_t pos, end; +#endif PyObject *result; Py_ssize_t total = 0; int n; + size_t newsize; if (self->fd < 0) return err_closed(); @@ -573,8 +615,23 @@ fileio_readall(fileio *self) if (result == NULL) return NULL; +#ifdef HAVE_FSTAT +#if defined(MS_WIN64) || defined(MS_WINDOWS) + pos = _lseeki64(self->fd, 0L, SEEK_CUR); +#else + pos = lseek(self->fd, 0L, SEEK_CUR); +#endif + if (fstat(self->fd, &st) == 0) + end = st.st_size; + else + end = (Py_off_t)-1; +#endif while (1) { - size_t newsize = new_buffersize(self, total); +#ifdef HAVE_FSTAT + newsize = new_buffersize(self, total, pos, end); +#else + newsize = new_buffersize(self, total); +#endif if (newsize > PY_SSIZE_T_MAX || newsize <= 0) { PyErr_SetString(PyExc_OverflowError, "unbounded read returned more bytes " @@ -613,6 +670,9 @@ fileio_readall(fileio *self) return NULL; } total += n; +#ifdef HAVE_FSTAT + pos += n; +#endif } if (PyBytes_GET_SIZE(result) > total) { @@ -926,6 +986,12 @@ fileio_truncate(fileio *self, PyObject *args) static char * mode_string(fileio *self) { + if (self->created) { + if (self->readable) + return "xb+"; + else + return "xb"; + } if (self->readable) { if (self->writable) return "rb+"; @@ -939,12 +1005,13 @@ mode_string(fileio *self) static PyObject * fileio_repr(fileio *self) { + _Py_IDENTIFIER(name); PyObject *nameobj, *res; if (self->fd < 0) return PyUnicode_FromFormat("<_io.FileIO [closed]>"); - nameobj = PyObject_GetAttrString((PyObject *) self, "name"); + nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name); if (nameobj == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); @@ -984,13 +1051,19 @@ fileio_getstate(fileio *self) PyDoc_STRVAR(fileio_doc, -"file(name: str[, mode: str]) -> file IO object\n" +"file(name: str[, mode: str][, opener: None]) -> file IO object\n" "\n" -"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n" -"writing or appending. The file will be created if it doesn't exist\n" -"when opened for writing or appending; it will be truncated when\n" -"opened for writing. Add a '+' to the mode to allow simultaneous\n" -"reading and writing."); +"Open a file. The mode can be 'r', 'w', 'x' or 'a' for reading (default),\n" +"writing, exclusive creation or appending. The file will be created if it\n" +"doesn't exist when opened for writing or appending; it will be truncated\n" +"when opened for writing. A `FileExistsError` will be raised if it already\n" +"exists when opened for creating. Opening a file for creating implies\n" +"writing so this mode behaves in a similar way to 'w'.Add a '+' to the mode\n" +"to allow simultaneous reading and writing. A custom opener can be used by\n" +"passing a callable as *opener*. The underlying file descriptor for the file\n" +"object is then obtained by calling opener with (*name*, *flags*).\n" +"*opener* must return an open file descriptor (passing os.open as *opener*\n" +"results in functionality similar to passing None)."); PyDoc_STRVAR(read_doc, "read(size: int) -> bytes. read at most size bytes, returned as bytes.\n" diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index 35c7cdd..b30bbb6 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -59,8 +59,9 @@ PyDoc_STRVAR(iobase_doc, of the IOBase object rather than the virtual `closed` attribute as returned by whatever subclass. */ +_Py_IDENTIFIER(__IOBase_closed); #define IS_CLOSED(self) \ - PyObject_HasAttrString(self, "__IOBase_closed") + _PyObject_HasAttrId(self, &PyId___IOBase_closed) /* Internal methods */ static PyObject * @@ -97,7 +98,9 @@ PyDoc_STRVAR(iobase_tell_doc, static PyObject * iobase_tell(PyObject *self, PyObject *args) { - return PyObject_CallMethod(self, "seek", "ii", 0, 1); + _Py_IDENTIFIER(seek); + + return _PyObject_CallMethodId(self, &PyId_seek, "ii", 0, 1); } PyDoc_STRVAR(iobase_truncate_doc, @@ -156,19 +159,6 @@ iobase_closed_get(PyObject *self, void *context) return PyBool_FromLong(IS_CLOSED(self)); } -static PyObject * -iobase_get_dict(PyObject *self) -{ - PyObject **dictptr = _PyObject_GetDictPtr(self); - PyObject *dict; - assert(dictptr); - dict = *dictptr; - if (dict == NULL) - dict = *dictptr = PyDict_New(); - Py_XINCREF(dict); - return dict; -} - PyObject * _PyIOBase_check_closed(PyObject *self, PyObject *args) { @@ -190,12 +180,13 @@ static PyObject * iobase_close(PyObject *self, PyObject *args) { PyObject *res; + _Py_IDENTIFIER(__IOBase_closed); if (IS_CLOSED(self)) Py_RETURN_NONE; res = PyObject_CallMethodObjArgs(self, _PyIO_str_flush, NULL); - PyObject_SetAttrString(self, "__IOBase_closed", Py_True); + _PyObject_SetAttrId(self, &PyId___IOBase_closed, Py_True); if (res == NULL) { return NULL; } @@ -464,12 +455,14 @@ iobase_readline(PyObject *self, PyObject *args) int has_peek = 0; PyObject *buffer, *result; Py_ssize_t old_size = -1; + _Py_IDENTIFIER(read); + _Py_IDENTIFIER(peek); if (!PyArg_ParseTuple(args, "|O&:readline", &_PyIO_ConvertSsize_t, &limit)) { return NULL; } - if (PyObject_HasAttrString(self, "peek")) + if (_PyObject_HasAttrId(self, &PyId_peek)) has_peek = 1; buffer = PyByteArray_FromStringAndSize(NULL, 0); @@ -481,7 +474,9 @@ iobase_readline(PyObject *self, PyObject *args) PyObject *b; if (has_peek) { - PyObject *readahead = PyObject_CallMethod(self, "peek", "i", 1); + _Py_IDENTIFIER(peek); + PyObject *readahead = _PyObject_CallMethodId(self, &PyId_peek, "i", 1); + if (readahead == NULL) goto fail; if (!PyBytes_Check(readahead)) { @@ -515,7 +510,7 @@ iobase_readline(PyObject *self, PyObject *args) Py_DECREF(readahead); } - b = PyObject_CallMethod(self, "read", "n", nreadahead); + b = _PyObject_CallMethodId(self, &PyId_read, "n", nreadahead); if (b == NULL) goto fail; if (!PyBytes_Check(b)) { @@ -601,7 +596,9 @@ iobase_readlines(PyObject *self, PyObject *args) /* XXX special-casing this made sense in the Python version in order to remove the bytecode interpretation overhead, but it could probably be removed here. */ - PyObject *ret = PyObject_CallMethod(result, "extend", "O", self); + _Py_IDENTIFIER(extend); + PyObject *ret = _PyObject_CallMethodId(result, &PyId_extend, "O", self); + if (ret == NULL) { Py_DECREF(result); return NULL; @@ -704,7 +701,7 @@ static PyMethodDef iobase_methods[] = { }; static PyGetSetDef iobase_getset[] = { - {"__dict__", (getter)iobase_get_dict, NULL, NULL}, + {"__dict__", PyObject_GenericGetDict, NULL, NULL}, {"closed", (getter)iobase_closed_get, NULL, NULL}, {NULL} }; @@ -781,8 +778,11 @@ rawiobase_read(PyObject *self, PyObject *args) return NULL; } - if (n < 0) - return PyObject_CallMethod(self, "readall", NULL); + if (n < 0) { + _Py_IDENTIFIER(readall); + + return _PyObject_CallMethodId(self, &PyId_readall, NULL); + } /* TODO: allocate a bytes object directly instead and manually construct a writable memoryview pointing to it. */ @@ -823,8 +823,9 @@ rawiobase_readall(PyObject *self, PyObject *args) return NULL; while (1) { - PyObject *data = PyObject_CallMethod(self, "read", - "i", DEFAULT_BUFFER_SIZE); + _Py_IDENTIFIER(read); + PyObject *data = _PyObject_CallMethodId(self, &PyId_read, + "i", DEFAULT_BUFFER_SIZE); if (!data) { Py_DECREF(chunks); return NULL; diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index c9d14b1..a1c31c0 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -1,19 +1,32 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "structmember.h" +#include "accu.h" #include "_iomodule.h" /* Implementation note: the buffer is always at least one character longer than the enclosed string, for proper functioning of _PyIO_find_line_ending. */ +#define STATE_REALIZED 1 +#define STATE_ACCUMULATING 2 + typedef struct { PyObject_HEAD - Py_UNICODE *buf; + Py_UCS4 *buf; Py_ssize_t pos; Py_ssize_t string_size; size_t buf_size; + /* The stringio object can be in two states: accumulating or realized. + In accumulating state, the internal buffer contains nothing and + the contents are given by the embedded _PyAccu structure. + In realized state, the internal buffer is meaningful and the + _PyAccu is destroyed. + */ + int state; + _PyAccu accu; + char ok; /* initialized? */ char closed; char readuniversal; @@ -21,7 +34,7 @@ typedef struct { PyObject *decoder; PyObject *readnl; PyObject *writenl; - + PyObject *dict; PyObject *weakreflist; } stringio; @@ -40,6 +53,11 @@ typedef struct { return NULL; \ } +#define ENSURE_REALIZED(self) \ + if (realize(self) < 0) { \ + return NULL; \ + } + PyDoc_STRVAR(stringio_doc, "Text I/O implementation using an in-memory buffer.\n" "\n" @@ -56,7 +74,7 @@ resize_buffer(stringio *self, size_t size) /* Here, unsigned types are used to avoid dealing with signed integer overflow, which is undefined in C. */ size_t alloc = self->buf_size; - Py_UNICODE *new_buf = NULL; + Py_UCS4 *new_buf = NULL; assert(self->buf != NULL); @@ -84,10 +102,9 @@ resize_buffer(stringio *self, size_t size) alloc = size + 1; } - if (alloc > ((size_t)-1) / sizeof(Py_UNICODE)) + if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4)) goto overflow; - new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf, - alloc * sizeof(Py_UNICODE)); + new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4)); if (new_buf == NULL) { PyErr_NoMemory(); return -1; @@ -103,14 +120,62 @@ resize_buffer(stringio *self, size_t size) return -1; } +static PyObject * +make_intermediate(stringio *self) +{ + PyObject *intermediate = _PyAccu_Finish(&self->accu); + self->state = STATE_REALIZED; + if (intermediate == NULL) + return NULL; + if (_PyAccu_Init(&self->accu) || + _PyAccu_Accumulate(&self->accu, intermediate)) { + Py_DECREF(intermediate); + return NULL; + } + self->state = STATE_ACCUMULATING; + return intermediate; +} + +static int +realize(stringio *self) +{ + Py_ssize_t len; + PyObject *intermediate; + + if (self->state == STATE_REALIZED) + return 0; + assert(self->state == STATE_ACCUMULATING); + self->state = STATE_REALIZED; + + intermediate = _PyAccu_Finish(&self->accu); + if (intermediate == NULL) + return -1; + + /* Append the intermediate string to the internal buffer. + The length should be equal to the current cursor position. + */ + len = PyUnicode_GET_LENGTH(intermediate); + if (resize_buffer(self, len) < 0) { + Py_DECREF(intermediate); + return -1; + } + if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) { + Py_DECREF(intermediate); + return -1; + } + + Py_DECREF(intermediate); + return 0; +} + /* Internal routine for writing a whole PyUnicode object to the buffer of a StringIO object. Returns 0 on success, or -1 on error. */ static Py_ssize_t write_str(stringio *self, PyObject *obj) { - Py_UNICODE *str; Py_ssize_t len; PyObject *decoded = NULL; + assert(self->buf != NULL); assert(self->pos >= 0); @@ -132,9 +197,11 @@ write_str(stringio *self, PyObject *obj) return -1; assert(PyUnicode_Check(decoded)); - str = PyUnicode_AS_UNICODE(decoded); - len = PyUnicode_GET_SIZE(decoded); - + if (PyUnicode_READY(decoded)) { + Py_DECREF(decoded); + return -1; + } + len = PyUnicode_GET_LENGTH(decoded); assert(len >= 0); /* This overflow check is not strictly necessary. However, it avoids us to @@ -145,6 +212,17 @@ write_str(stringio *self, PyObject *obj) "new position too large"); goto fail; } + + if (self->state == STATE_ACCUMULATING) { + if (self->string_size == self->pos) { + if (_PyAccu_Accumulate(&self->accu, decoded)) + goto fail; + goto success; + } + if (realize(self)) + goto fail; + } + if (self->pos + len > self->string_size) { if (resize_buffer(self, self->pos + len) < 0) goto fail; @@ -161,18 +239,22 @@ write_str(stringio *self, PyObject *obj) */ memset(self->buf + self->string_size, '\0', - (self->pos - self->string_size) * sizeof(Py_UNICODE)); + (self->pos - self->string_size) * sizeof(Py_UCS4)); } /* Copy the data to the internal buffer, overwriting some of the existing data if self->pos < self->string_size. */ - memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE)); - self->pos += len; + if (!PyUnicode_AsUCS4(decoded, + self->buf + self->pos, + self->buf_size - self->pos, + 0)) + goto fail; +success: /* Set the new length of the internal string if it has changed. */ - if (self->string_size < self->pos) { + self->pos += len; + if (self->string_size < self->pos) self->string_size = self->pos; - } Py_DECREF(decoded); return 0; @@ -190,7 +272,10 @@ stringio_getvalue(stringio *self) { CHECK_INITIALIZED(self); CHECK_CLOSED(self); - return PyUnicode_FromUnicode(self->buf, self->string_size); + if (self->state == STATE_ACCUMULATING) + return make_intermediate(self); + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf, + self->string_size); } PyDoc_STRVAR(stringio_tell_doc, @@ -214,7 +299,7 @@ static PyObject * stringio_read(stringio *self, PyObject *args) { Py_ssize_t size, n; - Py_UNICODE *output; + Py_UCS4 *output; PyObject *arg = Py_None; CHECK_INITIALIZED(self); @@ -245,21 +330,29 @@ stringio_read(stringio *self, PyObject *args) size = 0; } + /* Optimization for seek(0); read() */ + if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) { + PyObject *result = make_intermediate(self); + self->pos = self->string_size; + return result; + } + + ENSURE_REALIZED(self); output = self->buf + self->pos; self->pos += size; - return PyUnicode_FromUnicode(output, size); + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size); } /* Internal helper, used by stringio_readline and stringio_iternext */ static PyObject * _stringio_readline(stringio *self, Py_ssize_t limit) { - Py_UNICODE *start, *end, old_char; + Py_UCS4 *start, *end, old_char; Py_ssize_t len, consumed; /* In case of overseek, return the empty string */ if (self->pos >= self->string_size) - return PyUnicode_FromString(""); + return PyUnicode_New(0, 0); start = self->buf + self->pos; if (limit < 0 || limit > self->string_size - self->pos) @@ -270,14 +363,14 @@ _stringio_readline(stringio *self, Py_ssize_t limit) *end = '\0'; len = _PyIO_find_line_ending( self->readtranslate, self->readuniversal, self->readnl, - start, end, &consumed); + PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed); *end = old_char; /* If we haven't found any line ending, we just return everything (`consumed` is ignored). */ if (len < 0) len = limit; self->pos += len; - return PyUnicode_FromUnicode(start, len); + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len); } PyDoc_STRVAR(stringio_readline_doc, @@ -295,6 +388,7 @@ stringio_readline(stringio *self, PyObject *args) if (!PyArg_ParseTuple(args, "|O:readline", &arg)) return NULL; CHECK_CLOSED(self); + ENSURE_REALIZED(self); if (PyNumber_Check(arg)) { limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError); @@ -316,6 +410,7 @@ stringio_iternext(stringio *self) CHECK_INITIALIZED(self); CHECK_CLOSED(self); + ENSURE_REALIZED(self); if (Py_TYPE(self) == &PyStringIO_Type) { /* Skip method call overhead for speed */ @@ -337,7 +432,7 @@ stringio_iternext(stringio *self) if (line == NULL) return NULL; - if (PyUnicode_GET_SIZE(line) == 0) { + if (PyUnicode_GET_LENGTH(line) == 0) { /* Reached EOF */ Py_DECREF(line); return NULL; @@ -386,6 +481,7 @@ stringio_truncate(stringio *self, PyObject *args) } if (size < self->string_size) { + ENSURE_REALIZED(self); if (resize_buffer(self, size) < 0) return NULL; self->string_size = size; @@ -462,8 +558,10 @@ stringio_write(stringio *self, PyObject *obj) Py_TYPE(obj)->tp_name); return NULL; } + if (PyUnicode_READY(obj)) + return NULL; CHECK_CLOSED(self); - size = PyUnicode_GET_SIZE(obj); + size = PyUnicode_GET_LENGTH(obj); if (size > 0 && write_str(self, obj) < 0) return NULL; @@ -484,6 +582,7 @@ stringio_close(stringio *self) /* Free up some memory */ if (resize_buffer(self, 0) < 0) return NULL; + _PyAccu_Destroy(&self->accu); Py_CLEAR(self->readnl); Py_CLEAR(self->writenl); Py_CLEAR(self->decoder); @@ -513,6 +612,7 @@ stringio_dealloc(stringio *self) PyMem_Free(self->buf); self->buf = NULL; } + _PyAccu_Destroy(&self->accu); Py_CLEAR(self->readnl); Py_CLEAR(self->writenl); Py_CLEAR(self->decoder); @@ -535,7 +635,7 @@ stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) /* tp_alloc initializes all the fields to zero. So we don't have to initialize them here. */ - self->buf = (Py_UNICODE *)PyMem_Malloc(0); + self->buf = (Py_UCS4 *)PyMem_Malloc(0); if (self->buf == NULL) { Py_DECREF(self); return PyErr_NoMemory(); @@ -551,6 +651,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds) PyObject *value = NULL; PyObject *newline_obj = NULL; char *newline = "\n"; + Py_ssize_t value_len; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:__init__", kwlist, &value, &newline_obj)) @@ -592,6 +693,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds) self->ok = 0; + _PyAccu_Destroy(&self->accu); Py_CLEAR(self->readnl); Py_CLEAR(self->writenl); Py_CLEAR(self->decoder); @@ -628,19 +730,27 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds) /* Now everything is set up, resize buffer to size of initial value, and copy it */ self->string_size = 0; - if (value && value != Py_None) { - Py_ssize_t len = PyUnicode_GetSize(value); + if (value && value != Py_None) + value_len = PyUnicode_GetLength(value); + else + value_len = 0; + if (value_len > 0) { /* This is a heuristic, for newline translation might change the string length. */ - if (resize_buffer(self, len) < 0) + if (resize_buffer(self, 0) < 0) return -1; + self->state = STATE_REALIZED; self->pos = 0; if (write_str(self, value) < 0) return -1; } else { + /* Empty stringio object, we can start by accumulating */ if (resize_buffer(self, 0) < 0) return -1; + if (_PyAccu_Init(&self->accu)) + return -1; + self->state = STATE_ACCUMULATING; } self->pos = 0; @@ -747,11 +857,22 @@ stringio_setstate(stringio *self, PyObject *state) once by __init__. So we do not take any chance and replace object's buffer completely. */ { - Py_UNICODE *buf = PyUnicode_AS_UNICODE(PyTuple_GET_ITEM(state, 0)); - Py_ssize_t bufsize = PyUnicode_GET_SIZE(PyTuple_GET_ITEM(state, 0)); - if (resize_buffer(self, bufsize) < 0) + PyObject *item; + Py_UCS4 *buf; + Py_ssize_t bufsize; + + item = PyTuple_GET_ITEM(state, 0); + buf = PyUnicode_AsUCS4Copy(item); + if (buf == NULL) return NULL; - memcpy(self->buf, buf, bufsize * sizeof(Py_UNICODE)); + bufsize = PyUnicode_GET_LENGTH(item); + + if (resize_buffer(self, bufsize) < 0) { + PyMem_Free(buf); + return NULL; + } + memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4)); + PyMem_Free(buf); self->string_size = bufsize; } diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index baf0a97..833a527 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -11,6 +11,27 @@ #include "structmember.h" #include "_iomodule.h" +_Py_IDENTIFIER(close); +_Py_IDENTIFIER(_dealloc_warn); +_Py_IDENTIFIER(decode); +_Py_IDENTIFIER(fileno); +_Py_IDENTIFIER(flush); +_Py_IDENTIFIER(getpreferredencoding); +_Py_IDENTIFIER(isatty); +_Py_IDENTIFIER(mode); +_Py_IDENTIFIER(name); +_Py_IDENTIFIER(raw); +_Py_IDENTIFIER(read); +_Py_IDENTIFIER(read1); +_Py_IDENTIFIER(readable); +_Py_IDENTIFIER(replace); +_Py_IDENTIFIER(reset); +_Py_IDENTIFIER(seek); +_Py_IDENTIFIER(seekable); +_Py_IDENTIFIER(setstate); +_Py_IDENTIFIER(tell); +_Py_IDENTIFIER(writable); + /* TextIOBase */ PyDoc_STRVAR(textiobase_doc, @@ -274,18 +295,26 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, goto error; } - output_len = PyUnicode_GET_SIZE(output); + if (PyUnicode_READY(output) == -1) + goto error; + + output_len = PyUnicode_GET_LENGTH(output); if (self->pendingcr && (final || output_len > 0)) { - Py_UNICODE *out; - PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1); + /* Prefix output with CR */ + int kind; + PyObject *modified; + char *out; + + modified = PyUnicode_New(output_len + 1, + PyUnicode_MAX_CHAR_VALUE(output)); if (modified == NULL) goto error; - out = PyUnicode_AS_UNICODE(modified); - out[0] = '\r'; - memcpy(out + 1, PyUnicode_AS_UNICODE(output), - output_len * sizeof(Py_UNICODE)); + kind = PyUnicode_KIND(modified); + out = PyUnicode_DATA(modified); + PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r'); + memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); Py_DECREF(output); - output = modified; + output = modified; /* output remains ready */ self->pendingcr = 0; output_len++; } @@ -295,21 +324,13 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, */ if (!final) { if (output_len > 0 - && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') { - - if (Py_REFCNT(output) == 1) { - if (PyUnicode_Resize(&output, output_len - 1) < 0) - goto error; - } - else { - PyObject *modified = PyUnicode_FromUnicode( - PyUnicode_AS_UNICODE(output), - output_len - 1); - if (modified == NULL) - goto error; - Py_DECREF(output); - output = modified; - } + && PyUnicode_READ_CHAR(output, output_len - 1) == '\r') + { + PyObject *modified = PyUnicode_Substring(output, 0, output_len -1); + if (modified == NULL) + goto error; + Py_DECREF(output); + output = modified; self->pendingcr = 1; } } @@ -317,13 +338,15 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, /* Record which newlines are read and do newline translation if desired, all in one pass. */ { - Py_UNICODE *in_str; + void *in_str; Py_ssize_t len; int seennl = self->seennl; int only_lf = 0; + int kind; - in_str = PyUnicode_AS_UNICODE(output); - len = PyUnicode_GET_SIZE(output); + in_str = PyUnicode_DATA(output); + len = PyUnicode_GET_LENGTH(output); + kind = PyUnicode_KIND(output); if (len == 0) return output; @@ -332,7 +355,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, for the \r *byte* with the libc's optimized memchr. */ if (seennl == SEEN_LF || seennl == 0) { - only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL); + only_lf = (memchr(in_str, '\r', kind * len) == NULL); } if (only_lf) { @@ -340,51 +363,51 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, (there's nothing else to be done, even when in translation mode) */ if (seennl == 0 && - memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) { - Py_UNICODE *s, *end; - s = in_str; - end = in_str + len; - for (;;) { - Py_UNICODE c; - /* Fast loop for non-control characters */ - while (*s > '\n') - s++; - c = *s++; - if (c == '\n') { - seennl |= SEEN_LF; - break; + memchr(in_str, '\n', kind * len) != NULL) { + if (kind == PyUnicode_1BYTE_KIND) + seennl |= SEEN_LF; + else { + Py_ssize_t i = 0; + for (;;) { + Py_UCS4 c; + /* Fast loop for non-control characters */ + while (PyUnicode_READ(kind, in_str, i) > '\n') + i++; + c = PyUnicode_READ(kind, in_str, i++); + if (c == '\n') { + seennl |= SEEN_LF; + break; + } + if (i >= len) + break; } - if (s > end) - break; } } /* Finished: we have scanned for newlines, and none of them need translating */ } else if (!self->translate) { - Py_UNICODE *s, *end; + Py_ssize_t i = 0; /* We have already seen all newline types, no need to scan again */ if (seennl == SEEN_ALL) goto endscan; - s = in_str; - end = in_str + len; for (;;) { - Py_UNICODE c; + Py_UCS4 c; /* Fast loop for non-control characters */ - while (*s > '\r') - s++; - c = *s++; + while (PyUnicode_READ(kind, in_str, i) > '\r') + i++; + c = PyUnicode_READ(kind, in_str, i++); if (c == '\n') seennl |= SEEN_LF; else if (c == '\r') { - if (*s == '\n') { + if (PyUnicode_READ(kind, in_str, i) == '\n') { seennl |= SEEN_CRLF; - s++; + i++; } else seennl |= SEEN_CR; } - if (s > end) + if (i >= len) break; if (seennl == SEEN_ALL) break; @@ -393,61 +416,51 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, ; } else { - PyObject *translated = NULL; - Py_UNICODE *out_str; - Py_UNICODE *in, *out, *end; - if (Py_REFCNT(output) != 1) { - /* We could try to optimize this so that we only do a copy - when there is something to translate. On the other hand, - most decoders should only output non-shared strings, i.e. - translation is done in place. */ - translated = PyUnicode_FromUnicode(NULL, len); - if (translated == NULL) - goto error; - assert(Py_REFCNT(translated) == 1); - memcpy(PyUnicode_AS_UNICODE(translated), - PyUnicode_AS_UNICODE(output), - len * sizeof(Py_UNICODE)); - } - else { - translated = output; + void *translated; + int kind = PyUnicode_KIND(output); + void *in_str = PyUnicode_DATA(output); + Py_ssize_t in, out; + /* XXX: Previous in-place translation here is disabled as + resizing is not possible anymore */ + /* We could try to optimize this so that we only do a copy + when there is something to translate. On the other hand, + we already know there is a \r byte, so chances are high + that something needs to be done. */ + translated = PyMem_Malloc(kind * len); + if (translated == NULL) { + PyErr_NoMemory(); + goto error; } - out_str = PyUnicode_AS_UNICODE(translated); - in = in_str; - out = out_str; - end = in_str + len; + in = out = 0; for (;;) { - Py_UNICODE c; + Py_UCS4 c; /* Fast loop for non-control characters */ - while ((c = *in++) > '\r') - *out++ = c; + while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r') + PyUnicode_WRITE(kind, translated, out++, c); if (c == '\n') { - *out++ = c; + PyUnicode_WRITE(kind, translated, out++, c); seennl |= SEEN_LF; continue; } if (c == '\r') { - if (*in == '\n') { + if (PyUnicode_READ(kind, in_str, in) == '\n') { in++; seennl |= SEEN_CRLF; } else seennl |= SEEN_CR; - *out++ = '\n'; + PyUnicode_WRITE(kind, translated, out++, '\n'); continue; } - if (in > end) + if (in > len) break; - *out++ = c; - } - if (translated != output) { - Py_DECREF(output); - output = translated; - } - if (out - out_str != len) { - if (PyUnicode_Resize(&output, out - out_str) < 0) - goto error; + PyUnicode_WRITE(kind, translated, out++, c); } + Py_DECREF(output); + output = PyUnicode_FromKindAndData(kind, translated, out); + PyMem_Free(translated); + if (!output) + goto error; } self->seennl |= seennl; } @@ -514,8 +527,8 @@ incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state) flag >>= 1; if (self->decoder != Py_None) - return PyObject_CallMethod(self->decoder, - "setstate", "((OK))", buffer, flag); + return _PyObject_CallMethodId(self->decoder, + &PyId_setstate, "((OK))", buffer, flag); else Py_RETURN_NONE; } @@ -680,12 +693,16 @@ typedef struct PyObject *pending_bytes; /* list of bytes objects waiting to be written, or NULL */ Py_ssize_t pending_bytes_count; - PyObject *snapshot; + /* snapshot is either None, or a tuple (dec_flags, next_input) where * dec_flags is the second (integer) item of the decoder state and * next_input is the chunk of input bytes that comes next after the * snapshot point. We use this to reconstruct decoder states in tell(). */ + PyObject *snapshot; + /* Bytes-to-characters ratio for the current chunk. Serves as input for + the heuristic in tell(). */ + double b2cratio; /* Cache raw object if it's a FileIO object */ PyObject *raw; @@ -701,25 +718,21 @@ typedef struct static PyObject * ascii_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors)); + return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors)); } static PyObject * utf16be_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors), 1); + return _PyUnicode_EncodeUTF16(text, + PyBytes_AS_STRING(self->errors), 1); } static PyObject * utf16le_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors), -1); + return _PyUnicode_EncodeUTF16(text, + PyBytes_AS_STRING(self->errors), -1); } static PyObject * @@ -733,25 +746,22 @@ utf16_encode(textio *self, PyObject *text) return utf16le_encode(self, text); #endif } - return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors), 0); + return _PyUnicode_EncodeUTF16(text, + PyBytes_AS_STRING(self->errors), 0); } static PyObject * utf32be_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors), 1); + return _PyUnicode_EncodeUTF32(text, + PyBytes_AS_STRING(self->errors), 1); } static PyObject * utf32le_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors), -1); + return _PyUnicode_EncodeUTF32(text, + PyBytes_AS_STRING(self->errors), -1); } static PyObject * @@ -765,25 +775,20 @@ utf32_encode(textio *self, PyObject *text) return utf32le_encode(self, text); #endif } - return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors), 0); + return _PyUnicode_EncodeUTF32(text, + PyBytes_AS_STRING(self->errors), 0); } static PyObject * utf8_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors)); + return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors)); } static PyObject * latin1_encode(textio *self, PyObject *text) { - return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), - PyBytes_AS_STRING(self->errors)); + return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors)); } /* Map normalized encoding names onto the specialized encoding funcs */ @@ -852,11 +857,12 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) self->decoded_chars_used = 0; self->pending_bytes_count = 0; self->encodefunc = NULL; + self->b2cratio = 0.0; if (encoding == NULL) { /* Try os.device_encoding(fileno) */ PyObject *fileno; - fileno = PyObject_CallMethod(buffer, "fileno", NULL); + fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL); /* Ignore only AttributeError and UnsupportedOperation */ if (fileno == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError) || @@ -868,9 +874,13 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) } } else { - self->encoding = PyObject_CallMethod(state->os_module, - "device_encoding", - "N", fileno); + int fd = (int) PyLong_AsLong(fileno); + Py_DECREF(fileno); + if (fd == -1 && PyErr_Occurred()) { + goto error; + } + + self->encoding = _Py_device_encoding(fd); if (self->encoding == NULL) goto error; else if (!PyUnicode_Check(self->encoding)) @@ -887,8 +897,8 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) } else { use_locale: - self->encoding = PyObject_CallMethod( - state->locale_module, "getpreferredencoding", NULL); + self->encoding = _PyObject_CallMethodId( + state->locale_module, &PyId_getpreferredencoding, NULL); if (self->encoding == NULL) { catch_ImportError: /* @@ -953,7 +963,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) #endif /* Build the decoder object */ - res = PyObject_CallMethod(buffer, "readable", NULL); + res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL); if (res == NULL) goto error; r = PyObject_IsTrue(res); @@ -978,7 +988,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) } /* Build the encoder object */ - res = PyObject_CallMethod(buffer, "writable", NULL); + res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL); if (res == NULL) goto error; r = PyObject_IsTrue(res); @@ -995,7 +1005,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) ci = _PyCodec_Lookup(encoding); if (ci == NULL) goto error; - res = PyObject_GetAttrString(ci, "name"); + res = _PyObject_GetAttrId(ci, &PyId_name); Py_DECREF(ci); if (res == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) @@ -1022,7 +1032,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) if (Py_TYPE(buffer) == &PyBufferedReader_Type || Py_TYPE(buffer) == &PyBufferedWriter_Type || Py_TYPE(buffer) == &PyBufferedRandom_Type) { - raw = PyObject_GetAttrString(buffer, "raw"); + raw = _PyObject_GetAttrId(buffer, &PyId_raw); /* Cache the raw FileIO object to speed up 'closed' checks */ if (raw == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) @@ -1036,13 +1046,13 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) Py_DECREF(raw); } - res = PyObject_CallMethod(buffer, "seekable", NULL); + res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL); if (res == NULL) goto error; self->seekable = self->telling = PyObject_IsTrue(res); Py_DECREF(res); - self->has_read1 = PyObject_HasAttrString(buffer, "read1"); + self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1); self->encoding_start_of_stream = 0; if (self->seekable && self->encoder) { @@ -1208,18 +1218,6 @@ textiowrapper_detach(textio *self) return buffer; } -Py_LOCAL_INLINE(const Py_UNICODE *) -findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch) -{ - /* like wcschr, but doesn't stop at NULL characters */ - while (size-- > 0) { - if (*s == ch) - return s; - s++; - } - return NULL; -} - /* Flush the internal write buffer. This doesn't explicitly flush the underlying buffered object, though. */ static int @@ -1264,6 +1262,9 @@ textiowrapper_write(textio *self, PyObject *args) return NULL; } + if (PyUnicode_READY(text) == -1) + return NULL; + CHECK_CLOSED(self); if (self->encoder == NULL) @@ -1271,16 +1272,15 @@ textiowrapper_write(textio *self, PyObject *args) Py_INCREF(text); - textlen = PyUnicode_GetSize(text); + textlen = PyUnicode_GET_LENGTH(text); if ((self->writetranslate && self->writenl != NULL) || self->line_buffering) - if (findchar(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), '\n')) + if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1) haslf = 1; if (haslf && self->writetranslate && self->writenl != NULL) { - PyObject *newtext = PyObject_CallMethod( - text, "replace", "ss", "\n", self->writenl); + PyObject *newtext = _PyObject_CallMethodId( + text, &PyId_replace, "ss", "\n", self->writenl); Py_DECREF(text); if (newtext == NULL) return NULL; @@ -1291,8 +1291,7 @@ textiowrapper_write(textio *self, PyObject *args) needflush = 1; else if (self->line_buffering && (haslf || - findchar(PyUnicode_AS_UNICODE(text), - PyUnicode_GET_SIZE(text), '\r'))) + PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1)) needflush = 1; /* XXX What if we were just reading? */ @@ -1336,7 +1335,7 @@ textiowrapper_write(textio *self, PyObject *args) Py_CLEAR(self->snapshot); if (self->decoder) { - ret = PyObject_CallMethod(self->decoder, "reset", NULL); + ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL); if (ret == NULL) return NULL; Py_DECREF(ret); @@ -1364,7 +1363,8 @@ textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) if (self->decoded_chars == NULL) return PyUnicode_FromStringAndSize(NULL, 0); - avail = (PyUnicode_GET_SIZE(self->decoded_chars) + /* decoded_chars is guaranteed to be "ready". */ + avail = (PyUnicode_GET_LENGTH(self->decoded_chars) - self->decoded_chars_used); assert(avail >= 0); @@ -1373,9 +1373,9 @@ textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) n = avail; if (self->decoded_chars_used > 0 || n < avail) { - chars = PyUnicode_FromUnicode( - PyUnicode_AS_UNICODE(self->decoded_chars) - + self->decoded_chars_used, n); + chars = PyUnicode_Substring(self->decoded_chars, + self->decoded_chars_used, + self->decoded_chars_used + n); if (chars == NULL) return NULL; } @@ -1391,12 +1391,13 @@ textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) /* Read and decode the next chunk of data from the BufferedReader. */ static int -textiowrapper_read_chunk(textio *self) +textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) { PyObject *dec_buffer = NULL; PyObject *dec_flags = NULL; PyObject *input_chunk = NULL; PyObject *decoded_chars, *chunk_size; + Py_ssize_t nbytes, nchars; int eof; /* The return value is True unless EOF was reached. The decoded string is @@ -1432,7 +1433,10 @@ textiowrapper_read_chunk(textio *self) } /* Read a chunk, decode it, and put the result in self._decoded_chars. */ - chunk_size = PyLong_FromSsize_t(self->chunk_size); + if (size_hint > 0) { + size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint); + } + chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint)); if (chunk_size == NULL) goto fail; input_chunk = PyObject_CallMethodObjArgs(self->buffer, @@ -1443,7 +1447,8 @@ textiowrapper_read_chunk(textio *self) goto fail; assert(PyBytes_Check(input_chunk)); - eof = (PyBytes_Size(input_chunk) == 0); + nbytes = PyBytes_Size(input_chunk); + eof = (nbytes == 0); if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { decoded_chars = _PyIncrementalNewlineDecoder_decode( @@ -1457,8 +1462,15 @@ textiowrapper_read_chunk(textio *self) /* TODO sanity check: isinstance(decoded_chars, unicode) */ if (decoded_chars == NULL) goto fail; + if (PyUnicode_READY(decoded_chars) == -1) + goto fail; textiowrapper_set_decoded_chars(self, decoded_chars); - if (PyUnicode_GET_SIZE(decoded_chars) > 0) + nchars = PyUnicode_GET_LENGTH(decoded_chars); + if (nchars > 0) + self->b2cratio = (double) nbytes / nchars; + else + self->b2cratio = 0.0; + if (nchars > 0) eof = 0; if (self->telling) { @@ -1505,12 +1517,17 @@ textiowrapper_read(textio *self, PyObject *args) if (n < 0) { /* Read everything */ - PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL); + PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL); PyObject *decoded; if (bytes == NULL) goto fail; - decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode, - bytes, Py_True, NULL); + + if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) + decoded = _PyIncrementalNewlineDecoder_decode(self->decoder, + bytes, 1); + else + decoded = PyObject_CallMethodObjArgs( + self->decoder, _PyIO_str_decode, bytes, Py_True, NULL); Py_DECREF(bytes); if (decoded == NULL) goto fail; @@ -1536,11 +1553,13 @@ textiowrapper_read(textio *self, PyObject *args) result = textiowrapper_get_decoded_chars(self, n); if (result == NULL) goto fail; - remaining -= PyUnicode_GET_SIZE(result); + if (PyUnicode_READY(result) == -1) + goto fail; + remaining -= PyUnicode_GET_LENGTH(result); /* Keep reading chunks until we have n characters to return */ while (remaining > 0) { - res = textiowrapper_read_chunk(self); + res = textiowrapper_read_chunk(self, remaining); if (res < 0) goto fail; if (res == 0) /* EOF */ @@ -1550,13 +1569,14 @@ textiowrapper_read(textio *self, PyObject *args) if (chunks == NULL) goto fail; } - if (PyList_Append(chunks, result) < 0) + if (PyUnicode_GET_LENGTH(result) > 0 && + PyList_Append(chunks, result) < 0) goto fail; Py_DECREF(result); result = textiowrapper_get_decoded_chars(self, remaining); if (result == NULL) goto fail; - remaining -= PyUnicode_GET_SIZE(result); + remaining -= PyUnicode_GET_LENGTH(result); } if (chunks != NULL) { if (result != NULL && PyList_Append(chunks, result) < 0) @@ -1576,36 +1596,39 @@ textiowrapper_read(textio *self, PyObject *args) } -/* NOTE: `end` must point to the real end of the Py_UNICODE storage, +/* NOTE: `end` must point to the real end of the Py_UCS4 storage, that is to the NUL character. Otherwise the function will produce incorrect results. */ -static Py_UNICODE * -find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch) +static char * +find_control_char(int kind, char *s, char *end, Py_UCS4 ch) { - Py_UNICODE *s = start; + if (kind == PyUnicode_1BYTE_KIND) { + assert(ch < 256); + return (char *) memchr((void *) s, (char) ch, end - s); + } for (;;) { - while (*s > ch) - s++; - if (*s == ch) + while (PyUnicode_READ(kind, s, 0) > ch) + s += kind; + if (PyUnicode_READ(kind, s, 0) == ch) return s; if (s == end) return NULL; - s++; + s += kind; } } Py_ssize_t _PyIO_find_line_ending( int translated, int universal, PyObject *readnl, - Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed) + int kind, char *start, char *end, Py_ssize_t *consumed) { - Py_ssize_t len = end - start; + Py_ssize_t len = ((char*)end - (char*)start)/kind; if (translated) { /* Newlines are already translated, only search for \n */ - Py_UNICODE *pos = find_control_char(start, end, '\n'); + char *pos = find_control_char(kind, start, end, '\n'); if (pos != NULL) - return pos - start + 1; + return (pos - start)/kind + 1; else { *consumed = len; return -1; @@ -1615,63 +1638,66 @@ _PyIO_find_line_ending( /* Universal newline search. Find any of \r, \r\n, \n * The decoder ensures that \r\n are not split in two pieces */ - Py_UNICODE *s = start; + char *s = start; for (;;) { - Py_UNICODE ch; + Py_UCS4 ch; /* Fast path for non-control chars. The loop always ends - since the Py_UNICODE storage is NUL-terminated. */ - while (*s > '\r') - s++; + since the Unicode string is NUL-terminated. */ + while (PyUnicode_READ(kind, s, 0) > '\r') + s += kind; if (s >= end) { *consumed = len; return -1; } - ch = *s++; + ch = PyUnicode_READ(kind, s, 0); + s += kind; if (ch == '\n') - return s - start; + return (s - start)/kind; if (ch == '\r') { - if (*s == '\n') - return s - start + 1; + if (PyUnicode_READ(kind, s, 0) == '\n') + return (s - start)/kind + 1; else - return s - start; + return (s - start)/kind; } } } else { /* Non-universal mode. */ - Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl); - Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl); + Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); + char *nl = PyUnicode_DATA(readnl); + /* Assume that readnl is an ASCII character. */ + assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); if (readnl_len == 1) { - Py_UNICODE *pos = find_control_char(start, end, nl[0]); + char *pos = find_control_char(kind, start, end, nl[0]); if (pos != NULL) - return pos - start + 1; + return (pos - start)/kind + 1; *consumed = len; return -1; } else { - Py_UNICODE *s = start; - Py_UNICODE *e = end - readnl_len + 1; - Py_UNICODE *pos; + char *s = start; + char *e = end - (readnl_len - 1)*kind; + char *pos; if (e < s) e = s; while (s < e) { Py_ssize_t i; - Py_UNICODE *pos = find_control_char(s, end, nl[0]); + char *pos = find_control_char(kind, s, end, nl[0]); if (pos == NULL || pos >= e) break; for (i = 1; i < readnl_len; i++) { - if (pos[i] != nl[i]) + if (PyUnicode_READ(kind, pos, i) != nl[i]) break; } if (i == readnl_len) - return pos - start + readnl_len; - s = pos + 1; + return (pos - start)/kind + readnl_len; + s = pos + kind; } - pos = find_control_char(e, end, nl[0]); + pos = find_control_char(kind, e, end, nl[0]); if (pos == NULL) *consumed = len; else - *consumed = pos - start; + *consumed = (pos - start)/kind; return -1; } } @@ -1692,15 +1718,16 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) chunked = 0; while (1) { - Py_UNICODE *ptr; + char *ptr; Py_ssize_t line_len; + int kind; Py_ssize_t consumed = 0; /* First, get some data if necessary */ res = 1; while (!self->decoded_chars || - !PyUnicode_GET_SIZE(self->decoded_chars)) { - res = textiowrapper_read_chunk(self); + !PyUnicode_GET_LENGTH(self->decoded_chars)) { + res = textiowrapper_read_chunk(self, 0); if (res < 0) goto error; if (res == 0) @@ -1724,18 +1751,24 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) assert(self->decoded_chars_used == 0); line = PyUnicode_Concat(remaining, self->decoded_chars); start = 0; - offset_to_buffer = PyUnicode_GET_SIZE(remaining); + offset_to_buffer = PyUnicode_GET_LENGTH(remaining); Py_CLEAR(remaining); if (line == NULL) goto error; + if (PyUnicode_READY(line) == -1) + goto error; } - ptr = PyUnicode_AS_UNICODE(line); - line_len = PyUnicode_GET_SIZE(line); + ptr = PyUnicode_DATA(line); + line_len = PyUnicode_GET_LENGTH(line); + kind = PyUnicode_KIND(line); endpos = _PyIO_find_line_ending( self->readtranslate, self->readuniversal, self->readnl, - ptr + start, ptr + line_len, &consumed); + kind, + ptr + kind * start, + ptr + kind * line_len, + &consumed); if (endpos >= 0) { endpos += start; if (limit >= 0 && (endpos - start) + chunked >= limit) @@ -1759,21 +1792,20 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) if (chunks == NULL) goto error; } - s = PyUnicode_FromUnicode(ptr + start, endpos - start); + s = PyUnicode_Substring(line, start, endpos); if (s == NULL) goto error; if (PyList_Append(chunks, s) < 0) { Py_DECREF(s); goto error; } - chunked += PyUnicode_GET_SIZE(s); + chunked += PyUnicode_GET_LENGTH(s); Py_DECREF(s); } /* There may be some remaining bytes we'll have to prepend to the next chunk of data */ if (endpos < line_len) { - remaining = PyUnicode_FromUnicode( - ptr + endpos, line_len - endpos); + remaining = PyUnicode_Substring(line, endpos, line_len); if (remaining == NULL) goto error; } @@ -1785,19 +1817,12 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) if (line != NULL) { /* Our line ends in the current buffer */ self->decoded_chars_used = endpos - offset_to_buffer; - if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) { - if (start == 0 && Py_REFCNT(line) == 1) { - if (PyUnicode_Resize(&line, endpos) < 0) - goto error; - } - else { - PyObject *s = PyUnicode_FromUnicode( - PyUnicode_AS_UNICODE(line) + start, endpos - start); - Py_CLEAR(line); - if (s == NULL) - goto error; - line = s; - } + if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) { + PyObject *s = PyUnicode_Substring(line, start, endpos); + Py_CLEAR(line); + if (s == NULL) + goto error; + line = s; } } if (remaining != NULL) { @@ -1811,16 +1836,20 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) Py_CLEAR(remaining); } if (chunks != NULL) { - if (line != NULL && PyList_Append(chunks, line) < 0) - goto error; - Py_CLEAR(line); + if (line != NULL) { + if (PyList_Append(chunks, line) < 0) + goto error; + Py_DECREF(line); + } line = PyUnicode_Join(_PyIO_empty_str, chunks); if (line == NULL) goto error; - Py_DECREF(chunks); + Py_CLEAR(chunks); + } + if (line == NULL) { + Py_INCREF(_PyIO_empty_str); + line = _PyIO_empty_str; } - if (line == NULL) - line = PyUnicode_FromStringAndSize(NULL, 0); return line; @@ -1943,8 +1972,8 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) if (cookie->start_pos == 0 && cookie->dec_flags == 0) res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL); else - res = PyObject_CallMethod(self->decoder, "setstate", - "((yi))", "", cookie->dec_flags); + res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, + "((yi))", "", cookie->dec_flags); if (res == NULL) return -1; Py_DECREF(res); @@ -2008,13 +2037,12 @@ textiowrapper_seek(textio *self, PyObject *args) * sync the underlying buffer with the current position. */ Py_DECREF(cookieObj); - cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL); + cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL); if (cookieObj == NULL) goto fail; } else if (whence == 2) { /* seek relative to end of file */ - cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); if (cmp < 0) goto fail; @@ -2024,7 +2052,7 @@ textiowrapper_seek(textio *self, PyObject *args) goto fail; } - res = PyObject_CallMethod((PyObject *)self, "flush", NULL); + res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL); if (res == NULL) goto fail; Py_DECREF(res); @@ -2032,13 +2060,13 @@ textiowrapper_seek(textio *self, PyObject *args) textiowrapper_set_decoded_chars(self, NULL); Py_CLEAR(self->snapshot); if (self->decoder) { - res = PyObject_CallMethod(self->decoder, "reset", NULL); + res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL); if (res == NULL) goto fail; Py_DECREF(res); } - res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2); + res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2); Py_XDECREF(cookieObj); return res; } @@ -2091,8 +2119,8 @@ textiowrapper_seek(textio *self, PyObject *args) if (cookie.chars_to_skip) { /* Just like _read_chunk, feed the decoder and save a snapshot. */ - PyObject *input_chunk = PyObject_CallMethod( - self->buffer, "read", "i", cookie.bytes_to_feed); + PyObject *input_chunk = _PyObject_CallMethodId( + self->buffer, &PyId_read, "i", cookie.bytes_to_feed); PyObject *decoded; if (input_chunk == NULL) @@ -2106,16 +2134,20 @@ textiowrapper_seek(textio *self, PyObject *args) goto fail; } - decoded = PyObject_CallMethod(self->decoder, "decode", - "Oi", input_chunk, (int)cookie.need_eof); + decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode, + "Oi", input_chunk, (int)cookie.need_eof); if (decoded == NULL) goto fail; + if (PyUnicode_READY(decoded) == -1) { + Py_DECREF(decoded); + goto fail; + } textiowrapper_set_decoded_chars(self, decoded); /* Skip chars_to_skip of the decoded characters. */ - if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) { + if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) { PyErr_SetString(PyExc_IOError, "can't restore logical file position"); goto fail; } @@ -2147,8 +2179,12 @@ textiowrapper_tell(textio *self, PyObject *args) cookie_type cookie = {0,0,0,0,0}; PyObject *next_input; Py_ssize_t chars_to_skip, chars_decoded; + Py_ssize_t skip_bytes, skip_back; PyObject *saved_state = NULL; char *input, *input_end; + char *dec_buffer; + Py_ssize_t dec_buffer_len; + int dec_flags; CHECK_INITIALIZED(self); CHECK_CLOSED(self); @@ -2165,17 +2201,17 @@ textiowrapper_tell(textio *self, PyObject *args) if (_textiowrapper_writeflush(self) < 0) return NULL; - res = PyObject_CallMethod((PyObject *)self, "flush", NULL); + res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL); if (res == NULL) goto fail; Py_DECREF(res); - posobj = PyObject_CallMethod(self->buffer, "tell", NULL); + posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL); if (posobj == NULL) goto fail; if (self->decoder == NULL || self->snapshot == NULL) { - assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0); + assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0); return posobj; } @@ -2184,6 +2220,7 @@ textiowrapper_tell(textio *self, PyObject *args) #else cookie.start_pos = PyLong_AsLong(posobj); #endif + Py_DECREF(posobj); if (PyErr_Occurred()) goto fail; @@ -2198,57 +2235,99 @@ textiowrapper_tell(textio *self, PyObject *args) /* How many decoded characters have been used up since the snapshot? */ if (self->decoded_chars_used == 0) { /* We haven't moved from the snapshot point. */ - Py_DECREF(posobj); return textiowrapper_build_cookie(&cookie); } chars_to_skip = self->decoded_chars_used; - /* Starting from the snapshot position, we will walk the decoder - * forward until it gives us enough decoded characters. - */ + /* Decoder state will be restored at the end */ saved_state = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_getstate, NULL); if (saved_state == NULL) goto fail; - /* Note our initial start point. */ - if (_textiowrapper_decoder_setstate(self, &cookie) < 0) - goto fail; +#define DECODER_GETSTATE() do { \ + PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \ + _PyIO_str_getstate, NULL); \ + if (_state == NULL) \ + goto fail; \ + if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \ + Py_DECREF(_state); \ + goto fail; \ + } \ + Py_DECREF(_state); \ + } while (0) + + /* TODO: replace assert with exception */ +#define DECODER_DECODE(start, len, res) do { \ + PyObject *_decoded = _PyObject_CallMethodId( \ + self->decoder, &PyId_decode, "y#", start, len); \ + if (_decoded == NULL) \ + goto fail; \ + assert (PyUnicode_Check(_decoded)); \ + res = PyUnicode_GET_LENGTH(_decoded); \ + Py_DECREF(_decoded); \ + } while (0) + + /* Fast search for an acceptable start point, close to our + current pos */ + skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); + skip_back = 1; + assert(skip_back <= PyBytes_GET_SIZE(next_input)); + input = PyBytes_AS_STRING(next_input); + while (skip_bytes > 0) { + /* Decode up to temptative start point */ + if (_textiowrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + DECODER_DECODE(input, skip_bytes, chars_decoded); + if (chars_decoded <= chars_to_skip) { + DECODER_GETSTATE(); + if (dec_buffer_len == 0) { + /* Before pos and no bytes buffered in decoder => OK */ + cookie.dec_flags = dec_flags; + chars_to_skip -= chars_decoded; + break; + } + /* Skip back by buffered amount and reset heuristic */ + skip_bytes -= dec_buffer_len; + skip_back = 1; + } + else { + /* We're too far ahead, skip back a bit */ + skip_bytes -= skip_back; + skip_back *= 2; + } + } + if (skip_bytes <= 0) { + skip_bytes = 0; + if (_textiowrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + } - /* Feed the decoder one byte at a time. As we go, note the - * nearest "safe start point" before the current location - * (a point where the decoder has nothing buffered, so seek() + /* Note our initial start point. */ + cookie.start_pos += skip_bytes; + cookie.chars_to_skip = chars_to_skip; + if (chars_to_skip == 0) + goto finally; + + /* We should be close to the desired position. Now feed the decoder one + * byte at a time until we reach the `chars_to_skip` target. + * As we go, note the nearest "safe start point" before the current + * location (a point where the decoder has nothing buffered, so seek() * can safely start from there and advance to this location). */ chars_decoded = 0; input = PyBytes_AS_STRING(next_input); input_end = input + PyBytes_GET_SIZE(next_input); + input += skip_bytes; while (input < input_end) { - PyObject *state; - char *dec_buffer; - Py_ssize_t dec_buffer_len; - int dec_flags; - - PyObject *decoded = PyObject_CallMethod( - self->decoder, "decode", "y#", input, 1); - if (decoded == NULL) - goto fail; - assert (PyUnicode_Check(decoded)); - chars_decoded += PyUnicode_GET_SIZE(decoded); - Py_DECREF(decoded); + Py_ssize_t n; + DECODER_DECODE(input, 1, n); + /* We got n chars for 1 byte */ + chars_decoded += n; cookie.bytes_to_feed += 1; - - state = PyObject_CallMethodObjArgs(self->decoder, - _PyIO_str_getstate, NULL); - if (state == NULL) - goto fail; - if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { - Py_DECREF(state); - goto fail; - } - Py_DECREF(state); + DECODER_GETSTATE(); if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) { /* Decoder buffer is empty, so this is a safe start point. */ @@ -2264,12 +2343,12 @@ textiowrapper_tell(textio *self, PyObject *args) } if (input == input_end) { /* We didn't get enough decoded data; signal EOF to get more. */ - PyObject *decoded = PyObject_CallMethod( - self->decoder, "decode", "yi", "", /* final = */ 1); + PyObject *decoded = _PyObject_CallMethodId( + self->decoder, &PyId_decode, "yi", "", /* final = */ 1); if (decoded == NULL) goto fail; assert (PyUnicode_Check(decoded)); - chars_decoded += PyUnicode_GET_SIZE(decoded); + chars_decoded += PyUnicode_GET_LENGTH(decoded); Py_DECREF(decoded); cookie.need_eof = 1; @@ -2280,9 +2359,8 @@ textiowrapper_tell(textio *self, PyObject *args) } } - /* finally */ - Py_XDECREF(posobj); - res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state); +finally: + res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state); Py_DECREF(saved_state); if (res == NULL) return NULL; @@ -2292,13 +2370,12 @@ textiowrapper_tell(textio *self, PyObject *args) cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); return textiowrapper_build_cookie(&cookie); - fail: - Py_XDECREF(posobj); +fail: if (saved_state) { PyObject *type, *value, *traceback; PyErr_Fetch(&type, &value, &traceback); - res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state); + res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state); Py_DECREF(saved_state); if (res == NULL) return NULL; @@ -2338,7 +2415,7 @@ textiowrapper_repr(textio *self) res = PyUnicode_FromString("<_io.TextIOWrapper"); if (res == NULL) return NULL; - nameobj = PyObject_GetAttrString((PyObject *) self, "name"); + nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name); if (nameobj == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); @@ -2354,7 +2431,7 @@ textiowrapper_repr(textio *self) if (res == NULL) return NULL; } - modeobj = PyObject_GetAttrString((PyObject *) self, "mode"); + modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode); if (modeobj == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); @@ -2386,35 +2463,35 @@ static PyObject * textiowrapper_fileno(textio *self, PyObject *args) { CHECK_INITIALIZED(self); - return PyObject_CallMethod(self->buffer, "fileno", NULL); + return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL); } static PyObject * textiowrapper_seekable(textio *self, PyObject *args) { CHECK_INITIALIZED(self); - return PyObject_CallMethod(self->buffer, "seekable", NULL); + return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL); } static PyObject * textiowrapper_readable(textio *self, PyObject *args) { CHECK_INITIALIZED(self); - return PyObject_CallMethod(self->buffer, "readable", NULL); + return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL); } static PyObject * textiowrapper_writable(textio *self, PyObject *args) { CHECK_INITIALIZED(self); - return PyObject_CallMethod(self->buffer, "writable", NULL); + return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL); } static PyObject * textiowrapper_isatty(textio *self, PyObject *args) { CHECK_INITIALIZED(self); - return PyObject_CallMethod(self->buffer, "isatty", NULL); + return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL); } static PyObject * @@ -2433,7 +2510,7 @@ textiowrapper_flush(textio *self, PyObject *args) self->telling = self->seekable; if (_textiowrapper_writeflush(self) < 0) return NULL; - return PyObject_CallMethod(self->buffer, "flush", NULL); + return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL); } static PyObject * @@ -2456,20 +2533,20 @@ textiowrapper_close(textio *self, PyObject *args) } else { if (self->deallocating) { - res = PyObject_CallMethod(self->buffer, "_dealloc_warn", "O", self); + res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self); if (res) Py_DECREF(res); else PyErr_Clear(); } - res = PyObject_CallMethod((PyObject *)self, "flush", NULL); + res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL); if (res == NULL) { return NULL; } else Py_DECREF(res); - return PyObject_CallMethod(self->buffer, "close", NULL); + return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL); } } @@ -2497,10 +2574,10 @@ textiowrapper_iternext(textio *self) } } - if (line == NULL) + if (line == NULL || PyUnicode_READY(line) == -1) return NULL; - if (PyUnicode_GET_SIZE(line) == 0) { + if (PyUnicode_GET_LENGTH(line) == 0) { /* Reached EOF or would have blocked */ Py_DECREF(line); Py_CLEAR(self->snapshot); @@ -2515,7 +2592,7 @@ static PyObject * textiowrapper_name_get(textio *self, void *context) { CHECK_INITIALIZED(self); - return PyObject_GetAttrString(self->buffer, "name"); + return _PyObject_GetAttrId(self->buffer, &PyId_name); } static PyObject * |