diff options
author | INADA Naoki <methane@users.noreply.github.com> | 2017-12-21 00:59:53 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-21 00:59:53 (GMT) |
commit | 507434fd504f3ebc1da72aa77544edc0d73f136e (patch) | |
tree | 521c4ca2eeff16a7cee8594cbca095d9bdec9181 /Modules/_io | |
parent | 31e99080f6f8cf7faaba9fe3a4e0996e49163317 (diff) | |
download | cpython-507434fd504f3ebc1da72aa77544edc0d73f136e.zip cpython-507434fd504f3ebc1da72aa77544edc0d73f136e.tar.gz cpython-507434fd504f3ebc1da72aa77544edc0d73f136e.tar.bz2 |
bpo-15216: io: TextIOWrapper.reconfigure() accepts encoding, errors and newline (GH-2343)
Diffstat (limited to 'Modules/_io')
-rw-r--r-- | Modules/_io/clinic/textio.c.h | 25 | ||||
-rw-r--r-- | Modules/_io/textio.c | 454 |
2 files changed, 329 insertions, 150 deletions
diff --git a/Modules/_io/clinic/textio.c.h b/Modules/_io/clinic/textio.c.h index 53ac0de..60f5dab 100644 --- a/Modules/_io/clinic/textio.c.h +++ b/Modules/_io/clinic/textio.c.h @@ -149,7 +149,7 @@ PyDoc_STRVAR(_io_TextIOWrapper___init____doc__, static int _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, - const char *encoding, const char *errors, + const char *encoding, PyObject *errors, const char *newline, int line_buffering, int write_through); @@ -158,10 +158,10 @@ _io_TextIOWrapper___init__(PyObject *self, PyObject *args, PyObject *kwargs) { int return_value = -1; static const char * const _keywords[] = {"buffer", "encoding", "errors", "newline", "line_buffering", "write_through", NULL}; - static _PyArg_Parser _parser = {"O|zzzii:TextIOWrapper", _keywords, 0}; + static _PyArg_Parser _parser = {"O|zOzii:TextIOWrapper", _keywords, 0}; PyObject *buffer; const char *encoding = NULL; - const char *errors = NULL; + PyObject *errors = Py_None; const char *newline = NULL; int line_buffering = 0; int write_through = 0; @@ -177,7 +177,8 @@ exit: } PyDoc_STRVAR(_io_TextIOWrapper_reconfigure__doc__, -"reconfigure($self, /, *, line_buffering=None, write_through=None)\n" +"reconfigure($self, /, *, encoding=None, errors=None, newline=None,\n" +" line_buffering=None, write_through=None)\n" "--\n" "\n" "Reconfigure the text stream with new parameters.\n" @@ -188,7 +189,8 @@ PyDoc_STRVAR(_io_TextIOWrapper_reconfigure__doc__, {"reconfigure", (PyCFunction)_io_TextIOWrapper_reconfigure, METH_FASTCALL|METH_KEYWORDS, _io_TextIOWrapper_reconfigure__doc__}, static PyObject * -_io_TextIOWrapper_reconfigure_impl(textio *self, +_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding, + PyObject *errors, PyObject *newline_obj, PyObject *line_buffering_obj, PyObject *write_through_obj); @@ -196,16 +198,19 @@ static PyObject * _io_TextIOWrapper_reconfigure(textio *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; - static const char * const _keywords[] = {"line_buffering", "write_through", NULL}; - static _PyArg_Parser _parser = {"|$OO:reconfigure", _keywords, 0}; + static const char * const _keywords[] = {"encoding", "errors", "newline", "line_buffering", "write_through", NULL}; + static _PyArg_Parser _parser = {"|$OOOOO:reconfigure", _keywords, 0}; + PyObject *encoding = Py_None; + PyObject *errors = Py_None; + PyObject *newline_obj = NULL; PyObject *line_buffering_obj = Py_None; PyObject *write_through_obj = Py_None; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &line_buffering_obj, &write_through_obj)) { + &encoding, &errors, &newline_obj, &line_buffering_obj, &write_through_obj)) { goto exit; } - return_value = _io_TextIOWrapper_reconfigure_impl(self, line_buffering_obj, write_through_obj); + return_value = _io_TextIOWrapper_reconfigure_impl(self, encoding, errors, newline_obj, line_buffering_obj, write_through_obj); exit: return return_value; @@ -499,4 +504,4 @@ _io_TextIOWrapper_close(textio *self, PyObject *Py_UNUSED(ignored)) { return _io_TextIOWrapper_close_impl(self); } -/*[clinic end generated code: output=679b3ac5284df4e0 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b5be870b0039d577 input=a9049054013a1b77]*/ diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 5239e85..6800d2d 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -36,6 +36,7 @@ _Py_IDENTIFIER(reset); _Py_IDENTIFIER(seek); _Py_IDENTIFIER(seekable); _Py_IDENTIFIER(setstate); +_Py_IDENTIFIER(strict); _Py_IDENTIFIER(tell); _Py_IDENTIFIER(writable); @@ -252,14 +253,14 @@ _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self, Py_INCREF(decoder); if (errors == NULL) { - self->errors = PyUnicode_FromString("strict"); + self->errors = _PyUnicode_FromId(&PyId_strict); if (self->errors == NULL) return -1; } else { - Py_INCREF(errors); self->errors = errors; } + Py_INCREF(self->errors); self->translate = translate; self->seennl = 0; @@ -647,7 +648,7 @@ typedef struct PyObject *decoder; PyObject *readnl; PyObject *errors; - const char *writenl; /* utf-8 encoded, NULL stands for \n */ + const char *writenl; /* ASCII-encoded; NULL stands for \n */ char line_buffering; char write_through; char readuniversal; @@ -700,21 +701,21 @@ typedef struct static PyObject * ascii_encode(textio *self, PyObject *text) { - return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors)); + return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors)); } static PyObject * utf16be_encode(textio *self, PyObject *text) { return _PyUnicode_EncodeUTF16(text, - PyBytes_AS_STRING(self->errors), 1); + PyUnicode_AsUTF8(self->errors), 1); } static PyObject * utf16le_encode(textio *self, PyObject *text) { return _PyUnicode_EncodeUTF16(text, - PyBytes_AS_STRING(self->errors), -1); + PyUnicode_AsUTF8(self->errors), -1); } static PyObject * @@ -729,21 +730,21 @@ utf16_encode(textio *self, PyObject *text) #endif } return _PyUnicode_EncodeUTF16(text, - PyBytes_AS_STRING(self->errors), 0); + PyUnicode_AsUTF8(self->errors), 0); } static PyObject * utf32be_encode(textio *self, PyObject *text) { return _PyUnicode_EncodeUTF32(text, - PyBytes_AS_STRING(self->errors), 1); + PyUnicode_AsUTF8(self->errors), 1); } static PyObject * utf32le_encode(textio *self, PyObject *text) { return _PyUnicode_EncodeUTF32(text, - PyBytes_AS_STRING(self->errors), -1); + PyUnicode_AsUTF8(self->errors), -1); } static PyObject * @@ -758,19 +759,19 @@ utf32_encode(textio *self, PyObject *text) #endif } return _PyUnicode_EncodeUTF32(text, - PyBytes_AS_STRING(self->errors), 0); + PyUnicode_AsUTF8(self->errors), 0); } static PyObject * utf8_encode(textio *self, PyObject *text) { - return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors)); + return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors)); } static PyObject * latin1_encode(textio *self, PyObject *text) { - return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors)); + return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors)); } /* Map normalized encoding names onto the specialized encoding funcs */ @@ -793,12 +794,198 @@ static const encodefuncentry encodefuncs[] = { {NULL, NULL} }; +static int +validate_newline(const char *newline) +{ + if (newline && newline[0] != '\0' + && !(newline[0] == '\n' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { + PyErr_Format(PyExc_ValueError, + "illegal newline value: %s", newline); + return -1; + } + return 0; +} + +static int +set_newline(textio *self, const char *newline) +{ + PyObject *old = self->readnl; + if (newline == NULL) { + self->readnl = NULL; + } + else { + self->readnl = PyUnicode_FromString(newline); + if (self->readnl == NULL) { + self->readnl = old; + return -1; + } + } + self->readuniversal = (newline == NULL || newline[0] == '\0'); + self->readtranslate = (newline == NULL); + self->writetranslate = (newline == NULL || newline[0] != '\0'); + if (!self->readuniversal && self->readnl != NULL) { + // validate_newline() accepts only ASCII newlines. + assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND); + self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl); + if (strcmp(self->writenl, "\n") == 0) { + self->writenl = NULL; + } + } + else { +#ifdef MS_WINDOWS + self->writenl = "\r\n"; +#else + self->writenl = NULL; +#endif + } + Py_XDECREF(old); + return 0; +} + +static int +_textiowrapper_set_decoder(textio *self, PyObject *codec_info, + const char *errors) +{ + PyObject *res; + int r; + + res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL); + if (res == NULL) + return -1; + + r = PyObject_IsTrue(res); + Py_DECREF(res); + if (r == -1) + return -1; + + if (r != 1) + return 0; + + Py_CLEAR(self->decoder); + self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors); + if (self->decoder == NULL) + return -1; + + if (self->readuniversal) { + PyObject *incrementalDecoder = PyObject_CallFunction( + (PyObject *)&PyIncrementalNewlineDecoder_Type, + "Oi", self->decoder, (int)self->readtranslate); + if (incrementalDecoder == NULL) + return -1; + Py_CLEAR(self->decoder); + self->decoder = incrementalDecoder; + } + + return 0; +} + +static PyObject* +_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof) +{ + PyObject *chars; + + if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type) + chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof); + else + chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes, + eof ? Py_True : Py_False, NULL); + + if (check_decoded(chars) < 0) + // check_decoded already decreases refcount + return NULL; + + return chars; +} + +static int +_textiowrapper_set_encoder(textio *self, PyObject *codec_info, + const char *errors) +{ + PyObject *res; + int r; + + res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL); + if (res == NULL) + return -1; + + r = PyObject_IsTrue(res); + Py_DECREF(res); + if (r == -1) + return -1; + + if (r != 1) + return 0; + + Py_CLEAR(self->encoder); + self->encodefunc = NULL; + self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors); + if (self->encoder == NULL) + return -1; + + /* Get the normalized named of the codec */ + res = _PyObject_GetAttrId(codec_info, &PyId_name); + if (res == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + return -1; + } + else if (PyUnicode_Check(res)) { + const encodefuncentry *e = encodefuncs; + while (e->name != NULL) { + if (_PyUnicode_EqualToASCIIString(res, e->name)) { + self->encodefunc = e->encodefunc; + break; + } + e++; + } + } + Py_XDECREF(res); + + return 0; +} + +static int +_textiowrapper_fix_encoder_state(textio *self) +{ + if (!self->seekable || !self->encoder) { + return 0; + } + + self->encoding_start_of_stream = 1; + + PyObject *cookieObj = PyObject_CallMethodObjArgs( + self->buffer, _PyIO_str_tell, NULL); + if (cookieObj == NULL) { + return -1; + } + + int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ); + Py_DECREF(cookieObj); + if (cmp < 0) { + return -1; + } + + if (cmp == 0) { + self->encoding_start_of_stream = 0; + PyObject *res = PyObject_CallMethodObjArgs( + self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL); + if (res == NULL) { + return -1; + } + Py_DECREF(res); + } + + return 0; +} /*[clinic input] _io.TextIOWrapper.__init__ buffer: object encoding: str(accept={str, NoneType}) = NULL - errors: str(accept={str, NoneType}) = NULL + errors: object = None newline: str(accept={str, NoneType}) = NULL line_buffering: bool(accept={int}) = False write_through: bool(accept={int}) = False @@ -835,10 +1022,10 @@ write contains a newline character. static int _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, - const char *encoding, const char *errors, + const char *encoding, PyObject *errors, const char *newline, int line_buffering, int write_through) -/*[clinic end generated code: output=56a83402ce2a8381 input=598d10cc5f2ed7dd]*/ +/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/ { PyObject *raw, *codec_info = NULL; _PyIO_State *state = NULL; @@ -848,12 +1035,20 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, self->ok = 0; self->detached = 0; - if (newline && newline[0] != '\0' - && !(newline[0] == '\n' && newline[1] == '\0') - && !(newline[0] == '\r' && newline[1] == '\0') - && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { - PyErr_Format(PyExc_ValueError, - "illegal newline value: %s", newline); + if (errors == Py_None) { + errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */ + } + else if (!PyUnicode_Check(errors)) { + // Check 'errors' argument here because Argument Clinic doesn't support + // 'str(accept={str, NoneType})' converter. + PyErr_Format( + PyExc_TypeError, + "TextIOWrapper() argument 'errors' must be str or None, not %.50s", + errors->ob_type->tp_name); + return -1; + } + + if (validate_newline(newline) < 0) { return -1; } @@ -955,99 +1150,29 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, * of the partially constructed object (like self->encoding) */ - if (errors == NULL) - errors = "strict"; - self->errors = PyBytes_FromString(errors); - if (self->errors == NULL) - goto error; - + Py_INCREF(errors); + self->errors = errors; self->chunk_size = 8192; - self->readuniversal = (newline == NULL || newline[0] == '\0'); self->line_buffering = line_buffering; self->write_through = write_through; - self->readtranslate = (newline == NULL); - if (newline) { - self->readnl = PyUnicode_FromString(newline); - if (self->readnl == NULL) - goto error; - } - self->writetranslate = (newline == NULL || newline[0] != '\0'); - if (!self->readuniversal && self->readnl) { - self->writenl = PyUnicode_AsUTF8(self->readnl); - if (self->writenl == NULL) - goto error; - if (!strcmp(self->writenl, "\n")) - self->writenl = NULL; + if (set_newline(self, newline) < 0) { + goto error; } -#ifdef MS_WINDOWS - else - self->writenl = "\r\n"; -#endif + + self->buffer = buffer; + Py_INCREF(buffer); /* Build the decoder object */ - res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL); - if (res == NULL) + if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) goto error; - r = PyObject_IsTrue(res); - Py_DECREF(res); - if (r == -1) - goto error; - if (r == 1) { - self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, - errors); - if (self->decoder == NULL) - goto error; - - if (self->readuniversal) { - PyObject *incrementalDecoder = PyObject_CallFunction( - (PyObject *)&PyIncrementalNewlineDecoder_Type, - "Oi", self->decoder, (int)self->readtranslate); - if (incrementalDecoder == NULL) - goto error; - Py_XSETREF(self->decoder, incrementalDecoder); - } - } /* Build the encoder object */ - res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL); - if (res == NULL) - goto error; - r = PyObject_IsTrue(res); - Py_DECREF(res); - if (r == -1) + if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) goto error; - if (r == 1) { - self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, - errors); - if (self->encoder == NULL) - goto error; - /* Get the normalized name of the codec */ - res = _PyObject_GetAttrId(codec_info, &PyId_name); - if (res == NULL) { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) - PyErr_Clear(); - else - goto error; - } - else if (PyUnicode_Check(res)) { - const encodefuncentry *e = encodefuncs; - while (e->name != NULL) { - if (_PyUnicode_EqualToASCIIString(res, e->name)) { - self->encodefunc = e->encodefunc; - break; - } - e++; - } - } - Py_XDECREF(res); - } /* Finished sorting out the codec details */ Py_CLEAR(codec_info); - self->buffer = buffer; - Py_INCREF(buffer); - if (Py_TYPE(buffer) == &PyBufferedReader_Type || Py_TYPE(buffer) == &PyBufferedWriter_Type || Py_TYPE(buffer) == &PyBufferedRandom_Type) { @@ -1077,30 +1202,8 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1); self->encoding_start_of_stream = 0; - if (self->seekable && self->encoder) { - PyObject *cookieObj; - int cmp; - - self->encoding_start_of_stream = 1; - - cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL); - if (cookieObj == NULL) - goto error; - - cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ); - Py_DECREF(cookieObj); - if (cmp < 0) { - goto error; - } - - if (cmp == 0) { - self->encoding_start_of_stream = 0; - res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate, - _PyLong_Zero, NULL); - if (res == NULL) - goto error; - Py_DECREF(res); - } + if (_textiowrapper_fix_encoder_state(self) < 0) { + goto error; } self->ok = 1; @@ -1129,10 +1232,57 @@ convert_optional_bool(PyObject *obj, int default_value) return v != 0; } +static int +textiowrapper_change_encoding(textio *self, PyObject *encoding, + PyObject *errors, int newline_changed) +{ + /* Use existing settings where new settings are not specified */ + if (encoding == Py_None && errors == Py_None && !newline_changed) { + return 0; // no change + } + + if (encoding == Py_None) { + encoding = self->encoding; + if (errors == Py_None) { + errors = self->errors; + } + } + else if (errors == Py_None) { + errors = _PyUnicode_FromId(&PyId_strict); + } + + const char *c_errors = PyUnicode_AsUTF8(errors); + if (c_errors == NULL) { + return -1; + } + + // Create new encoder & decoder + PyObject *codec_info = _PyCodec_LookupTextEncoding( + PyUnicode_AsUTF8(encoding), "codecs.open()"); + if (codec_info == NULL) { + return -1; + } + if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 || + _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) { + Py_DECREF(codec_info); + return -1; + } + Py_DECREF(codec_info); + + Py_INCREF(encoding); + Py_INCREF(errors); + Py_SETREF(self->encoding, encoding); + Py_SETREF(self->errors, errors); + + return _textiowrapper_fix_encoder_state(self); +} /*[clinic input] _io.TextIOWrapper.reconfigure * + encoding: object = None + errors: object = None + newline as newline_obj: object(c_default="NULL") = None line_buffering as line_buffering_obj: object = None write_through as write_through_obj: object = None @@ -1143,14 +1293,31 @@ This also does an implicit stream flush. [clinic start generated code]*/ static PyObject * -_io_TextIOWrapper_reconfigure_impl(textio *self, +_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding, + PyObject *errors, PyObject *newline_obj, PyObject *line_buffering_obj, PyObject *write_through_obj) -/*[clinic end generated code: output=7cdf79e7001e2856 input=baade27ecb9db7bc]*/ +/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/ { int line_buffering; int write_through; - PyObject *res; + const char *newline = NULL; + + /* Check if something is in the read buffer */ + if (self->decoded_chars != NULL) { + if (encoding != Py_None || errors != Py_None || newline_obj != NULL) { + _unsupported("It is not possible to set the encoding or newline" + "of stream after the first read"); + return NULL; + } + } + + if (newline_obj != NULL && newline_obj != Py_None) { + newline = PyUnicode_AsUTF8(newline_obj); + if (newline == NULL || validate_newline(newline) < 0) { + return NULL; + } + } line_buffering = convert_optional_bool(line_buffering_obj, self->line_buffering); @@ -1159,11 +1326,23 @@ _io_TextIOWrapper_reconfigure_impl(textio *self, if (line_buffering < 0 || write_through < 0) { return NULL; } - res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL); - Py_XDECREF(res); + + PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); if (res == NULL) { return NULL; } + Py_DECREF(res); + self->b2cratio = 0; + + if (newline_obj != NULL && set_newline(self, newline) < 0) { + return NULL; + } + + if (textiowrapper_change_encoding( + self, encoding, errors, newline_obj != NULL) < 0) { + return NULL; + } + self->line_buffering = line_buffering; self->write_through = write_through; Py_RETURN_NONE; @@ -1565,18 +1744,12 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) nbytes = input_chunk_buf.len; eof = (nbytes == 0); - if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { - decoded_chars = _PyIncrementalNewlineDecoder_decode( - self->decoder, input_chunk, eof); - } - else { - decoded_chars = PyObject_CallMethodObjArgs(self->decoder, - _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL); - } - PyBuffer_Release(&input_chunk_buf); - if (check_decoded(decoded_chars) < 0) + decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof); + PyBuffer_Release(&input_chunk_buf); + if (decoded_chars == NULL) goto fail; + textiowrapper_set_decoded_chars(self, decoded_chars); nchars = PyUnicode_GET_LENGTH(decoded_chars); if (nchars > 0) @@ -2851,7 +3024,8 @@ static PyObject * textiowrapper_errors_get(textio *self, void *context) { CHECK_INITIALIZED(self); - return PyUnicode_FromString(PyBytes_AS_STRING(self->errors)); + Py_INCREF(self->errors); + return self->errors; } static PyObject * |