From 507434fd504f3ebc1da72aa77544edc0d73f136e Mon Sep 17 00:00:00 2001
From: INADA Naoki <methane@users.noreply.github.com>
Date: Thu, 21 Dec 2017 09:59:53 +0900
Subject: bpo-15216: io: TextIOWrapper.reconfigure() accepts encoding, errors
 and newline (GH-2343)

---
 Doc/library/io.rst                                 |  18 +-
 Lib/_pyio.py                                       |  76 +++-
 Lib/test/test_io.py                                | 117 ++++++
 .../2017-09-16-02-56-33.bpo-15216.lqXCTT.rst       |   2 +
 Modules/_io/clinic/textio.c.h                      |  25 +-
 Modules/_io/textio.c                               | 454 ++++++++++++++-------
 6 files changed, 517 insertions(+), 175 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2017-09-16-02-56-33.bpo-15216.lqXCTT.rst

diff --git a/Doc/library/io.rst b/Doc/library/io.rst
index 6778058..5c71d90 100644
--- a/Doc/library/io.rst
+++ b/Doc/library/io.rst
@@ -904,7 +904,7 @@ Text I/O
       locale encoding using :func:`locale.setlocale`, use the current locale
       encoding instead of the user preferred encoding.
 
-   :class:`TextIOWrapper` provides one attribute in addition to those of
+   :class:`TextIOWrapper` provides these members in addition to those of
    :class:`TextIOBase` and its parents:
 
    .. attribute:: line_buffering
@@ -918,11 +918,19 @@ Text I/O
 
       .. versionadded:: 3.7
 
-   .. method:: reconfigure(*, line_buffering=None, write_through=None)
+   .. method:: reconfigure(*[, encoding][, errors][, newline][, \
+                           line_buffering][, write_through])
 
-      Reconfigure this text stream using new settings for *line_buffering*
-      and *write_through*.  Passing ``None`` as an argument will retain
-      the current setting for that parameter.
+      Reconfigure this text stream using new settings for *encoding*,
+      *errors*, *newline*, *line_buffering* and *write_through*.
+
+      Parameters not specified keep current settings, except
+      ``errors='strict`` is used when *encoding* is specified but
+      *errors* is not specified.
+
+      It is not possible to change the encoding or newline if some data
+      has already been read from the stream. On the other hand, changing
+      encoding after write is possible.
 
       This method does an implicit stream flush before setting the
       new parameters.
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index b59a650..c91a647 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -1938,10 +1938,7 @@ class TextIOWrapper(TextIOBase):
     # so that the signature can match the signature of the C version.
     def __init__(self, buffer, encoding=None, errors=None, newline=None,
                  line_buffering=False, write_through=False):
-        if newline is not None and not isinstance(newline, str):
-            raise TypeError("illegal newline type: %r" % (type(newline),))
-        if newline not in (None, "", "\n", "\r", "\r\n"):
-            raise ValueError("illegal newline value: %r" % (newline,))
+        self._check_newline(newline)
         if encoding is None:
             try:
                 encoding = os.device_encoding(buffer.fileno())
@@ -1971,22 +1968,38 @@ class TextIOWrapper(TextIOBase):
                 raise ValueError("invalid errors: %r" % errors)
 
         self._buffer = buffer
+        self._decoded_chars = ''  # buffer for text returned from decoder
+        self._decoded_chars_used = 0  # offset into _decoded_chars for read()
+        self._snapshot = None  # info for reconstructing decoder state
+        self._seekable = self._telling = self.buffer.seekable()
+        self._has_read1 = hasattr(self.buffer, 'read1')
+        self._configure(encoding, errors, newline,
+                        line_buffering, write_through)
+
+    def _check_newline(self, newline):
+        if newline is not None and not isinstance(newline, str):
+            raise TypeError("illegal newline type: %r" % (type(newline),))
+        if newline not in (None, "", "\n", "\r", "\r\n"):
+            raise ValueError("illegal newline value: %r" % (newline,))
+
+    def _configure(self, encoding=None, errors=None, newline=None,
+                   line_buffering=False, write_through=False):
         self._encoding = encoding
         self._errors = errors
+        self._encoder = None
+        self._decoder = None
+        self._b2cratio = 0.0
+
         self._readuniversal = not newline
         self._readtranslate = newline is None
         self._readnl = newline
         self._writetranslate = newline != ''
         self._writenl = newline or os.linesep
-        self._encoder = None
-        self._decoder = None
-        self._decoded_chars = ''  # buffer for text returned from decoder
-        self._decoded_chars_used = 0  # offset into _decoded_chars for read()
-        self._snapshot = None  # info for reconstructing decoder state
-        self._seekable = self._telling = self.buffer.seekable()
-        self._has_read1 = hasattr(self.buffer, 'read1')
-        self._b2cratio = 0.0
 
+        self._line_buffering = line_buffering
+        self._write_through = write_through
+
+        # don't write a BOM in the middle of a file
         if self._seekable and self.writable():
             position = self.buffer.tell()
             if position != 0:
@@ -1996,12 +2009,6 @@ class TextIOWrapper(TextIOBase):
                     # Sometimes the encoder doesn't exist
                     pass
 
-        self._configure(line_buffering, write_through)
-
-    def _configure(self, line_buffering=False, write_through=False):
-        self._line_buffering = line_buffering
-        self._write_through = write_through
-
     # self._snapshot is either None, or a tuple (dec_flags, next_input)
     # where dec_flags is the second (integer) item of the decoder state
     # and next_input is the chunk of input bytes that comes next after the
@@ -2048,17 +2055,46 @@ class TextIOWrapper(TextIOBase):
     def buffer(self):
         return self._buffer
 
-    def reconfigure(self, *, line_buffering=None, write_through=None):
+    def reconfigure(self, *,
+                    encoding=None, errors=None, newline=Ellipsis,
+                    line_buffering=None, write_through=None):
         """Reconfigure the text stream with new parameters.
 
         This also flushes the stream.
         """
+        if (self._decoder is not None
+                and (encoding is not None or errors is not None
+                     or newline is not Ellipsis)):
+            raise UnsupportedOperation(
+                "It is not possible to set the encoding or newline of stream "
+                "after the first read")
+
+        if errors is None:
+            if encoding is None:
+                errors = self._errors
+            else:
+                errors = 'strict'
+        elif not isinstance(errors, str):
+            raise TypeError("invalid errors: %r" % errors)
+
+        if encoding is None:
+            encoding = self._encoding
+        else:
+            if not isinstance(encoding, str):
+                raise TypeError("invalid encoding: %r" % encoding)
+
+        if newline is Ellipsis:
+            newline = self._readnl
+        self._check_newline(newline)
+
         if line_buffering is None:
             line_buffering = self.line_buffering
         if write_through is None:
             write_through = self.write_through
+
         self.flush()
-        self._configure(line_buffering, write_through)
+        self._configure(encoding, errors, newline,
+                        line_buffering, write_through)
 
     def seekable(self):
         if self.closed:
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 9bfe4b0..3aee5f1 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -3408,6 +3408,123 @@ class TextIOWrapperTest(unittest.TestCase):
         F.tell = lambda x: 0
         t = self.TextIOWrapper(F(), encoding='utf-8')
 
+    def test_reconfigure_encoding_read(self):
+        # latin1 -> utf8
+        # (latin1 can decode utf-8 encoded string)
+        data = 'abc\xe9\n'.encode('latin1') + 'd\xe9f\n'.encode('utf8')
+        raw = self.BytesIO(data)
+        txt = self.TextIOWrapper(raw, encoding='latin1', newline='\n')
+        self.assertEqual(txt.readline(), 'abc\xe9\n')
+        with self.assertRaises(self.UnsupportedOperation):
+            txt.reconfigure(encoding='utf-8')
+        with self.assertRaises(self.UnsupportedOperation):
+            txt.reconfigure(newline=None)
+
+    def test_reconfigure_write_fromascii(self):
+        # ascii has a specific encodefunc in the C implementation,
+        # but utf-8-sig has not. Make sure that we get rid of the
+        # cached encodefunc when we switch encoders.
+        raw = self.BytesIO()
+        txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n')
+        txt.write('foo\n')
+        txt.reconfigure(encoding='utf-8-sig')
+        txt.write('\xe9\n')
+        txt.flush()
+        self.assertEqual(raw.getvalue(), b'foo\n\xc3\xa9\n')
+
+    def test_reconfigure_write(self):
+        # latin -> utf8
+        raw = self.BytesIO()
+        txt = self.TextIOWrapper(raw, encoding='latin1', newline='\n')
+        txt.write('abc\xe9\n')
+        txt.reconfigure(encoding='utf-8')
+        self.assertEqual(raw.getvalue(), b'abc\xe9\n')
+        txt.write('d\xe9f\n')
+        txt.flush()
+        self.assertEqual(raw.getvalue(), b'abc\xe9\nd\xc3\xa9f\n')
+
+        # ascii -> utf-8-sig: ensure that no BOM is written in the middle of
+        # the file
+        raw = self.BytesIO()
+        txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n')
+        txt.write('abc\n')
+        txt.reconfigure(encoding='utf-8-sig')
+        txt.write('d\xe9f\n')
+        txt.flush()
+        self.assertEqual(raw.getvalue(), b'abc\nd\xc3\xa9f\n')
+
+    def test_reconfigure_write_non_seekable(self):
+        raw = self.BytesIO()
+        raw.seekable = lambda: False
+        raw.seek = None
+        txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n')
+        txt.write('abc\n')
+        txt.reconfigure(encoding='utf-8-sig')
+        txt.write('d\xe9f\n')
+        txt.flush()
+
+        # If the raw stream is not seekable, there'll be a BOM
+        self.assertEqual(raw.getvalue(),  b'abc\n\xef\xbb\xbfd\xc3\xa9f\n')
+
+    def test_reconfigure_defaults(self):
+        txt = self.TextIOWrapper(self.BytesIO(), 'ascii', 'replace', '\n')
+        txt.reconfigure(encoding=None)
+        self.assertEqual(txt.encoding, 'ascii')
+        self.assertEqual(txt.errors, 'replace')
+        txt.write('LF\n')
+
+        txt.reconfigure(newline='\r\n')
+        self.assertEqual(txt.encoding, 'ascii')
+        self.assertEqual(txt.errors, 'replace')
+
+        txt.reconfigure(errors='ignore')
+        self.assertEqual(txt.encoding, 'ascii')
+        self.assertEqual(txt.errors, 'ignore')
+        txt.write('CRLF\n')
+
+        txt.reconfigure(encoding='utf-8', newline=None)
+        self.assertEqual(txt.errors, 'strict')
+        txt.seek(0)
+        self.assertEqual(txt.read(), 'LF\nCRLF\n')
+
+        self.assertEqual(txt.detach().getvalue(), b'LF\nCRLF\r\n')
+
+    def test_reconfigure_newline(self):
+        raw = self.BytesIO(b'CR\rEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
+        txt.reconfigure(newline=None)
+        self.assertEqual(txt.readline(), 'CR\n')
+        raw = self.BytesIO(b'CR\rEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
+        txt.reconfigure(newline='')
+        self.assertEqual(txt.readline(), 'CR\r')
+        raw = self.BytesIO(b'CR\rLF\nEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\r')
+        txt.reconfigure(newline='\n')
+        self.assertEqual(txt.readline(), 'CR\rLF\n')
+        raw = self.BytesIO(b'LF\nCR\rEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
+        txt.reconfigure(newline='\r')
+        self.assertEqual(txt.readline(), 'LF\nCR\r')
+        raw = self.BytesIO(b'CR\rCRLF\r\nEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\r')
+        txt.reconfigure(newline='\r\n')
+        self.assertEqual(txt.readline(), 'CR\rCRLF\r\n')
+
+        txt = self.TextIOWrapper(self.BytesIO(), 'ascii', newline='\r')
+        txt.reconfigure(newline=None)
+        txt.write('linesep\n')
+        txt.reconfigure(newline='')
+        txt.write('LF\n')
+        txt.reconfigure(newline='\n')
+        txt.write('LF\n')
+        txt.reconfigure(newline='\r')
+        txt.write('CR\n')
+        txt.reconfigure(newline='\r\n')
+        txt.write('CRLF\n')
+        expected = 'linesep' + os.linesep + 'LF\nLF\nCR\rCRLF\r\n'
+        self.assertEqual(txt.detach().getvalue().decode('ascii'), expected)
+
 
 class MemviewBytesIO(io.BytesIO):
     '''A BytesIO object whose read method returns memoryviews
diff --git a/Misc/NEWS.d/next/Library/2017-09-16-02-56-33.bpo-15216.lqXCTT.rst b/Misc/NEWS.d/next/Library/2017-09-16-02-56-33.bpo-15216.lqXCTT.rst
new file mode 100644
index 0000000..0e9fd55
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-09-16-02-56-33.bpo-15216.lqXCTT.rst
@@ -0,0 +1,2 @@
+``TextIOWrapper.reconfigure()`` supports changing *encoding*, *errors*, and
+*newline*.
diff --git a/Modules/_io/clinic/textio.c.h b/Modules/_io/clinic/textio.c.h
index 53ac0de..60f5dab 100644
--- a/Modules/_io/clinic/textio.c.h
+++ b/Modules/_io/clinic/textio.c.h
@@ -149,7 +149,7 @@ PyDoc_STRVAR(_io_TextIOWrapper___init____doc__,
 
 static int
 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
-                                const char *encoding, const char *errors,
+                                const char *encoding, PyObject *errors,
                                 const char *newline, int line_buffering,
                                 int write_through);
 
@@ -158,10 +158,10 @@ _io_TextIOWrapper___init__(PyObject *self, PyObject *args, PyObject *kwargs)
 {
     int return_value = -1;
     static const char * const _keywords[] = {"buffer", "encoding", "errors", "newline", "line_buffering", "write_through", NULL};
-    static _PyArg_Parser _parser = {"O|zzzii:TextIOWrapper", _keywords, 0};
+    static _PyArg_Parser _parser = {"O|zOzii:TextIOWrapper", _keywords, 0};
     PyObject *buffer;
     const char *encoding = NULL;
-    const char *errors = NULL;
+    PyObject *errors = Py_None;
     const char *newline = NULL;
     int line_buffering = 0;
     int write_through = 0;
@@ -177,7 +177,8 @@ exit:
 }
 
 PyDoc_STRVAR(_io_TextIOWrapper_reconfigure__doc__,
-"reconfigure($self, /, *, line_buffering=None, write_through=None)\n"
+"reconfigure($self, /, *, encoding=None, errors=None, newline=None,\n"
+"            line_buffering=None, write_through=None)\n"
 "--\n"
 "\n"
 "Reconfigure the text stream with new parameters.\n"
@@ -188,7 +189,8 @@ PyDoc_STRVAR(_io_TextIOWrapper_reconfigure__doc__,
     {"reconfigure", (PyCFunction)_io_TextIOWrapper_reconfigure, METH_FASTCALL|METH_KEYWORDS, _io_TextIOWrapper_reconfigure__doc__},
 
 static PyObject *
-_io_TextIOWrapper_reconfigure_impl(textio *self,
+_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
+                                   PyObject *errors, PyObject *newline_obj,
                                    PyObject *line_buffering_obj,
                                    PyObject *write_through_obj);
 
@@ -196,16 +198,19 @@ static PyObject *
 _io_TextIOWrapper_reconfigure(textio *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
-    static const char * const _keywords[] = {"line_buffering", "write_through", NULL};
-    static _PyArg_Parser _parser = {"|$OO:reconfigure", _keywords, 0};
+    static const char * const _keywords[] = {"encoding", "errors", "newline", "line_buffering", "write_through", NULL};
+    static _PyArg_Parser _parser = {"|$OOOOO:reconfigure", _keywords, 0};
+    PyObject *encoding = Py_None;
+    PyObject *errors = Py_None;
+    PyObject *newline_obj = NULL;
     PyObject *line_buffering_obj = Py_None;
     PyObject *write_through_obj = Py_None;
 
     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
-        &line_buffering_obj, &write_through_obj)) {
+        &encoding, &errors, &newline_obj, &line_buffering_obj, &write_through_obj)) {
         goto exit;
     }
-    return_value = _io_TextIOWrapper_reconfigure_impl(self, line_buffering_obj, write_through_obj);
+    return_value = _io_TextIOWrapper_reconfigure_impl(self, encoding, errors, newline_obj, line_buffering_obj, write_through_obj);
 
 exit:
     return return_value;
@@ -499,4 +504,4 @@ _io_TextIOWrapper_close(textio *self, PyObject *Py_UNUSED(ignored))
 {
     return _io_TextIOWrapper_close_impl(self);
 }
-/*[clinic end generated code: output=679b3ac5284df4e0 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=b5be870b0039d577 input=a9049054013a1b77]*/
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 5239e85..6800d2d 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -36,6 +36,7 @@ _Py_IDENTIFIER(reset);
 _Py_IDENTIFIER(seek);
 _Py_IDENTIFIER(seekable);
 _Py_IDENTIFIER(setstate);
+_Py_IDENTIFIER(strict);
 _Py_IDENTIFIER(tell);
 _Py_IDENTIFIER(writable);
 
@@ -252,14 +253,14 @@ _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
     Py_INCREF(decoder);
 
     if (errors == NULL) {
-        self->errors = PyUnicode_FromString("strict");
+        self->errors = _PyUnicode_FromId(&PyId_strict);
         if (self->errors == NULL)
             return -1;
     }
     else {
-        Py_INCREF(errors);
         self->errors = errors;
     }
+    Py_INCREF(self->errors);
 
     self->translate = translate;
     self->seennl = 0;
@@ -647,7 +648,7 @@ typedef struct
     PyObject *decoder;
     PyObject *readnl;
     PyObject *errors;
-    const char *writenl; /* utf-8 encoded, NULL stands for \n */
+    const char *writenl; /* ASCII-encoded; NULL stands for \n */
     char line_buffering;
     char write_through;
     char readuniversal;
@@ -700,21 +701,21 @@ typedef struct
 static PyObject *
 ascii_encode(textio *self, PyObject *text)
 {
-    return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
+    return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
 }
 
 static PyObject *
 utf16be_encode(textio *self, PyObject *text)
 {
     return _PyUnicode_EncodeUTF16(text,
-                                  PyBytes_AS_STRING(self->errors), 1);
+                                  PyUnicode_AsUTF8(self->errors), 1);
 }
 
 static PyObject *
 utf16le_encode(textio *self, PyObject *text)
 {
     return _PyUnicode_EncodeUTF16(text,
-                                  PyBytes_AS_STRING(self->errors), -1);
+                                  PyUnicode_AsUTF8(self->errors), -1);
 }
 
 static PyObject *
@@ -729,21 +730,21 @@ utf16_encode(textio *self, PyObject *text)
 #endif
     }
     return _PyUnicode_EncodeUTF16(text,
-                                  PyBytes_AS_STRING(self->errors), 0);
+                                  PyUnicode_AsUTF8(self->errors), 0);
 }
 
 static PyObject *
 utf32be_encode(textio *self, PyObject *text)
 {
     return _PyUnicode_EncodeUTF32(text,
-                                  PyBytes_AS_STRING(self->errors), 1);
+                                  PyUnicode_AsUTF8(self->errors), 1);
 }
 
 static PyObject *
 utf32le_encode(textio *self, PyObject *text)
 {
     return _PyUnicode_EncodeUTF32(text,
-                                  PyBytes_AS_STRING(self->errors), -1);
+                                  PyUnicode_AsUTF8(self->errors), -1);
 }
 
 static PyObject *
@@ -758,19 +759,19 @@ utf32_encode(textio *self, PyObject *text)
 #endif
     }
     return _PyUnicode_EncodeUTF32(text,
-                                  PyBytes_AS_STRING(self->errors), 0);
+                                  PyUnicode_AsUTF8(self->errors), 0);
 }
 
 static PyObject *
 utf8_encode(textio *self, PyObject *text)
 {
-    return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
+    return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
 }
 
 static PyObject *
 latin1_encode(textio *self, PyObject *text)
 {
-    return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
+    return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
 }
 
 /* Map normalized encoding names onto the specialized encoding funcs */
@@ -793,12 +794,198 @@ static const encodefuncentry encodefuncs[] = {
     {NULL, NULL}
 };
 
+static int
+validate_newline(const char *newline)
+{
+    if (newline && newline[0] != '\0'
+        && !(newline[0] == '\n' && newline[1] == '\0')
+        && !(newline[0] == '\r' && newline[1] == '\0')
+        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
+        PyErr_Format(PyExc_ValueError,
+                     "illegal newline value: %s", newline);
+        return -1;
+    }
+    return 0;
+}
+
+static int
+set_newline(textio *self, const char *newline)
+{
+    PyObject *old = self->readnl;
+    if (newline == NULL) {
+        self->readnl = NULL;
+    }
+    else {
+        self->readnl = PyUnicode_FromString(newline);
+        if (self->readnl == NULL) {
+            self->readnl = old;
+            return -1;
+        }
+    }
+    self->readuniversal = (newline == NULL || newline[0] == '\0');
+    self->readtranslate = (newline == NULL);
+    self->writetranslate = (newline == NULL || newline[0] != '\0');
+    if (!self->readuniversal && self->readnl != NULL) {
+        // validate_newline() accepts only ASCII newlines.
+        assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
+        self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
+        if (strcmp(self->writenl, "\n") == 0) {
+            self->writenl = NULL;
+        }
+    }
+    else {
+#ifdef MS_WINDOWS
+        self->writenl = "\r\n";
+#else
+        self->writenl = NULL;
+#endif
+    }
+    Py_XDECREF(old);
+    return 0;
+}
+
+static int
+_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
+                           const char *errors)
+{
+    PyObject *res;
+    int r;
+
+    res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
+    if (res == NULL)
+        return -1;
+
+    r = PyObject_IsTrue(res);
+    Py_DECREF(res);
+    if (r == -1)
+        return -1;
+
+    if (r != 1)
+        return 0;
+
+    Py_CLEAR(self->decoder);
+    self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
+    if (self->decoder == NULL)
+        return -1;
+
+    if (self->readuniversal) {
+        PyObject *incrementalDecoder = PyObject_CallFunction(
+            (PyObject *)&PyIncrementalNewlineDecoder_Type,
+            "Oi", self->decoder, (int)self->readtranslate);
+        if (incrementalDecoder == NULL)
+            return -1;
+        Py_CLEAR(self->decoder);
+        self->decoder = incrementalDecoder;
+    }
+
+    return 0;
+}
+
+static PyObject*
+_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
+{
+    PyObject *chars;
+
+    if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
+        chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
+    else
+        chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
+                                           eof ? Py_True : Py_False, NULL);
+
+    if (check_decoded(chars) < 0)
+        // check_decoded already decreases refcount
+        return NULL;
+
+    return chars;
+}
+
+static int
+_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
+                           const char *errors)
+{
+    PyObject *res;
+    int r;
+
+    res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
+    if (res == NULL)
+        return -1;
+
+    r = PyObject_IsTrue(res);
+    Py_DECREF(res);
+    if (r == -1)
+        return -1;
+
+    if (r != 1)
+        return 0;
+
+    Py_CLEAR(self->encoder);
+    self->encodefunc = NULL;
+    self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
+    if (self->encoder == NULL)
+        return -1;
+
+    /* Get the normalized named of the codec */
+    res = _PyObject_GetAttrId(codec_info, &PyId_name);
+    if (res == NULL) {
+        if (PyErr_ExceptionMatches(PyExc_AttributeError))
+            PyErr_Clear();
+        else
+            return -1;
+    }
+    else if (PyUnicode_Check(res)) {
+        const encodefuncentry *e = encodefuncs;
+        while (e->name != NULL) {
+            if (_PyUnicode_EqualToASCIIString(res, e->name)) {
+                self->encodefunc = e->encodefunc;
+                break;
+            }
+            e++;
+        }
+    }
+    Py_XDECREF(res);
+
+    return 0;
+}
+
+static int
+_textiowrapper_fix_encoder_state(textio *self)
+{
+    if (!self->seekable || !self->encoder) {
+        return 0;
+    }
+
+    self->encoding_start_of_stream = 1;
+
+    PyObject *cookieObj = PyObject_CallMethodObjArgs(
+        self->buffer, _PyIO_str_tell, NULL);
+    if (cookieObj == NULL) {
+        return -1;
+    }
+
+    int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
+    Py_DECREF(cookieObj);
+    if (cmp < 0) {
+        return -1;
+    }
+
+    if (cmp == 0) {
+        self->encoding_start_of_stream = 0;
+        PyObject *res = PyObject_CallMethodObjArgs(
+            self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
+        if (res == NULL) {
+            return -1;
+        }
+        Py_DECREF(res);
+    }
+
+    return 0;
+}
 
 /*[clinic input]
 _io.TextIOWrapper.__init__
     buffer: object
     encoding: str(accept={str, NoneType}) = NULL
-    errors: str(accept={str, NoneType}) = NULL
+    errors: object = None
     newline: str(accept={str, NoneType}) = NULL
     line_buffering: bool(accept={int}) = False
     write_through: bool(accept={int}) = False
@@ -835,10 +1022,10 @@ write contains a newline character.
 
 static int
 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
-                                const char *encoding, const char *errors,
+                                const char *encoding, PyObject *errors,
                                 const char *newline, int line_buffering,
                                 int write_through)
-/*[clinic end generated code: output=56a83402ce2a8381 input=598d10cc5f2ed7dd]*/
+/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
 {
     PyObject *raw, *codec_info = NULL;
     _PyIO_State *state = NULL;
@@ -848,12 +1035,20 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
     self->ok = 0;
     self->detached = 0;
 
-    if (newline && newline[0] != '\0'
-        && !(newline[0] == '\n' && newline[1] == '\0')
-        && !(newline[0] == '\r' && newline[1] == '\0')
-        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
-        PyErr_Format(PyExc_ValueError,
-                     "illegal newline value: %s", newline);
+    if (errors == Py_None) {
+        errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
+    }
+    else if (!PyUnicode_Check(errors)) {
+        // Check 'errors' argument here because Argument Clinic doesn't support
+        // 'str(accept={str, NoneType})' converter.
+        PyErr_Format(
+            PyExc_TypeError,
+            "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
+            errors->ob_type->tp_name);
+        return -1;
+    }
+
+    if (validate_newline(newline) < 0) {
         return -1;
     }
 
@@ -955,99 +1150,29 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
      * of the partially constructed object (like self->encoding)
      */
 
-    if (errors == NULL)
-        errors = "strict";
-    self->errors = PyBytes_FromString(errors);
-    if (self->errors == NULL)
-        goto error;
-
+    Py_INCREF(errors);
+    self->errors = errors;
     self->chunk_size = 8192;
-    self->readuniversal = (newline == NULL || newline[0] == '\0');
     self->line_buffering = line_buffering;
     self->write_through = write_through;
-    self->readtranslate = (newline == NULL);
-    if (newline) {
-        self->readnl = PyUnicode_FromString(newline);
-        if (self->readnl == NULL)
-            goto error;
-    }
-    self->writetranslate = (newline == NULL || newline[0] != '\0');
-    if (!self->readuniversal && self->readnl) {
-        self->writenl = PyUnicode_AsUTF8(self->readnl);
-        if (self->writenl == NULL)
-            goto error;
-        if (!strcmp(self->writenl, "\n"))
-            self->writenl = NULL;
+    if (set_newline(self, newline) < 0) {
+        goto error;
     }
-#ifdef MS_WINDOWS
-    else
-        self->writenl = "\r\n";
-#endif
+
+    self->buffer = buffer;
+    Py_INCREF(buffer);
 
     /* Build the decoder object */
-    res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
-    if (res == NULL)
+    if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
         goto error;
-    r = PyObject_IsTrue(res);
-    Py_DECREF(res);
-    if (r == -1)
-        goto error;
-    if (r == 1) {
-        self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
-                                                           errors);
-        if (self->decoder == NULL)
-            goto error;
-
-        if (self->readuniversal) {
-            PyObject *incrementalDecoder = PyObject_CallFunction(
-                (PyObject *)&PyIncrementalNewlineDecoder_Type,
-                "Oi", self->decoder, (int)self->readtranslate);
-            if (incrementalDecoder == NULL)
-                goto error;
-            Py_XSETREF(self->decoder, incrementalDecoder);
-        }
-    }
 
     /* Build the encoder object */
-    res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
-    if (res == NULL)
-        goto error;
-    r = PyObject_IsTrue(res);
-    Py_DECREF(res);
-    if (r == -1)
+    if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
         goto error;
-    if (r == 1) {
-        self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
-                                                           errors);
-        if (self->encoder == NULL)
-            goto error;
-        /* Get the normalized name of the codec */
-        res = _PyObject_GetAttrId(codec_info, &PyId_name);
-        if (res == NULL) {
-            if (PyErr_ExceptionMatches(PyExc_AttributeError))
-                PyErr_Clear();
-            else
-                goto error;
-        }
-        else if (PyUnicode_Check(res)) {
-            const encodefuncentry *e = encodefuncs;
-            while (e->name != NULL) {
-                if (_PyUnicode_EqualToASCIIString(res, e->name)) {
-                    self->encodefunc = e->encodefunc;
-                    break;
-                }
-                e++;
-            }
-        }
-        Py_XDECREF(res);
-    }
 
     /* Finished sorting out the codec details */
     Py_CLEAR(codec_info);
 
-    self->buffer = buffer;
-    Py_INCREF(buffer);
-
     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
         Py_TYPE(buffer) == &PyBufferedRandom_Type) {
@@ -1077,30 +1202,8 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
     self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
 
     self->encoding_start_of_stream = 0;
-    if (self->seekable && self->encoder) {
-        PyObject *cookieObj;
-        int cmp;
-
-        self->encoding_start_of_stream = 1;
-
-        cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
-        if (cookieObj == NULL)
-            goto error;
-
-        cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
-        Py_DECREF(cookieObj);
-        if (cmp < 0) {
-            goto error;
-        }
-
-        if (cmp == 0) {
-            self->encoding_start_of_stream = 0;
-            res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
-                                             _PyLong_Zero, NULL);
-            if (res == NULL)
-                goto error;
-            Py_DECREF(res);
-        }
+    if (_textiowrapper_fix_encoder_state(self) < 0) {
+        goto error;
     }
 
     self->ok = 1;
@@ -1129,10 +1232,57 @@ convert_optional_bool(PyObject *obj, int default_value)
     return v != 0;
 }
 
+static int
+textiowrapper_change_encoding(textio *self, PyObject *encoding,
+                              PyObject *errors, int newline_changed)
+{
+    /* Use existing settings where new settings are not specified */
+    if (encoding == Py_None && errors == Py_None && !newline_changed) {
+        return 0;  // no change
+    }
+
+    if (encoding == Py_None) {
+        encoding = self->encoding;
+        if (errors == Py_None) {
+            errors = self->errors;
+        }
+    }
+    else if (errors == Py_None) {
+        errors = _PyUnicode_FromId(&PyId_strict);
+    }
+
+    const char *c_errors = PyUnicode_AsUTF8(errors);
+    if (c_errors == NULL) {
+        return -1;
+    }
+
+    // Create new encoder & decoder
+    PyObject *codec_info = _PyCodec_LookupTextEncoding(
+        PyUnicode_AsUTF8(encoding), "codecs.open()");
+    if (codec_info == NULL) {
+        return -1;
+    }
+    if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
+            _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
+        Py_DECREF(codec_info);
+        return -1;
+    }
+    Py_DECREF(codec_info);
+
+    Py_INCREF(encoding);
+    Py_INCREF(errors);
+    Py_SETREF(self->encoding, encoding);
+    Py_SETREF(self->errors, errors);
+
+    return _textiowrapper_fix_encoder_state(self);
+}
 
 /*[clinic input]
 _io.TextIOWrapper.reconfigure
     *
+    encoding: object = None
+    errors: object = None
+    newline as newline_obj: object(c_default="NULL") = None
     line_buffering as line_buffering_obj: object = None
     write_through as write_through_obj: object = None
 
@@ -1143,14 +1293,31 @@ This also does an implicit stream flush.
 [clinic start generated code]*/
 
 static PyObject *
-_io_TextIOWrapper_reconfigure_impl(textio *self,
+_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
+                                   PyObject *errors, PyObject *newline_obj,
                                    PyObject *line_buffering_obj,
                                    PyObject *write_through_obj)
-/*[clinic end generated code: output=7cdf79e7001e2856 input=baade27ecb9db7bc]*/
+/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
 {
     int line_buffering;
     int write_through;
-    PyObject *res;
+    const char *newline = NULL;
+
+    /* Check if something is in the read buffer */
+    if (self->decoded_chars != NULL) {
+        if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
+            _unsupported("It is not possible to set the encoding or newline"
+                         "of stream after the first read");
+            return NULL;
+        }
+    }
+
+    if (newline_obj != NULL && newline_obj != Py_None) {
+        newline = PyUnicode_AsUTF8(newline_obj);
+        if (newline == NULL || validate_newline(newline) < 0) {
+            return NULL;
+        }
+    }
 
     line_buffering = convert_optional_bool(line_buffering_obj,
                                            self->line_buffering);
@@ -1159,11 +1326,23 @@ _io_TextIOWrapper_reconfigure_impl(textio *self,
     if (line_buffering < 0 || write_through < 0) {
         return NULL;
     }
-    res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
-    Py_XDECREF(res);
+
+    PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
     if (res == NULL) {
         return NULL;
     }
+    Py_DECREF(res);
+    self->b2cratio = 0;
+
+    if (newline_obj != NULL && set_newline(self, newline) < 0) {
+        return NULL;
+    }
+
+    if (textiowrapper_change_encoding(
+            self, encoding, errors, newline_obj != NULL) < 0) {
+        return NULL;
+    }
+
     self->line_buffering = line_buffering;
     self->write_through = write_through;
     Py_RETURN_NONE;
@@ -1565,18 +1744,12 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
 
     nbytes = input_chunk_buf.len;
     eof = (nbytes == 0);
-    if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
-        decoded_chars = _PyIncrementalNewlineDecoder_decode(
-            self->decoder, input_chunk, eof);
-    }
-    else {
-        decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
-            _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
-    }
-    PyBuffer_Release(&input_chunk_buf);
 
-    if (check_decoded(decoded_chars) < 0)
+    decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
+    PyBuffer_Release(&input_chunk_buf);
+    if (decoded_chars == NULL)
         goto fail;
+
     textiowrapper_set_decoded_chars(self, decoded_chars);
     nchars = PyUnicode_GET_LENGTH(decoded_chars);
     if (nchars > 0)
@@ -2851,7 +3024,8 @@ static PyObject *
 textiowrapper_errors_get(textio *self, void *context)
 {
     CHECK_INITIALIZED(self);
-    return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
+    Py_INCREF(self->errors);
+    return self->errors;
 }
 
 static PyObject *
-- 
cgit v0.12