summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/codecs.h20
-rw-r--r--Lib/_pyio.py5
-rw-r--r--Lib/test/test_io.py33
-rw-r--r--Misc/NEWS6
-rw-r--r--Modules/_io/textio.c34
-rw-r--r--Python/codecs.c84
6 files changed, 141 insertions, 41 deletions
diff --git a/Include/codecs.h b/Include/codecs.h
index 8f0014e..611964c 100644
--- a/Include/codecs.h
+++ b/Include/codecs.h
@@ -104,7 +104,14 @@ PyAPI_FUNC(PyObject *) PyCodec_Decode(
Please note that these APIs are internal and should not
be used in Python C extensions.
+ XXX (ncoghlan): should we make these, or something like them, public
+ in Python 3.5+?
+
*/
+PyAPI_FUNC(PyObject *) _PyCodec_LookupTextEncoding(
+ const char *encoding,
+ const char *alternate_command
+ );
PyAPI_FUNC(PyObject *) _PyCodec_EncodeText(
PyObject *object,
@@ -117,6 +124,19 @@ PyAPI_FUNC(PyObject *) _PyCodec_DecodeText(
const char *encoding,
const char *errors
);
+
+/* These two aren't actually text encoding specific, but _io.TextIOWrapper
+ * is the only current API consumer.
+ */
+PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalDecoder(
+ PyObject *codec_info,
+ const char *errors
+ );
+
+PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalEncoder(
+ PyObject *codec_info,
+ const char *errors
+ );
#endif
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 3961969..b04d23a 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -1503,6 +1503,11 @@ class TextIOWrapper(TextIOBase):
if not isinstance(encoding, str):
raise ValueError("invalid encoding: %r" % encoding)
+ if not codecs.lookup(encoding)._is_text_encoding:
+ msg = ("%r is not a text encoding; "
+ "use codecs.open() to handle arbitrary codecs")
+ raise LookupError(msg % encoding)
+
if errors is None:
errors = "strict"
else:
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 355a33e..d5b274c 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -1929,6 +1929,15 @@ class TextIOWrapperTest(unittest.TestCase):
self.assertRaises(TypeError, t.__init__, b, newline=42)
self.assertRaises(ValueError, t.__init__, b, newline='xyzzy')
+ def test_non_text_encoding_codecs_are_rejected(self):
+ # Ensure the constructor complains if passed a codec that isn't
+ # marked as a text encoding
+ # http://bugs.python.org/issue20404
+ r = self.BytesIO()
+ b = self.BufferedWriter(r)
+ with self.assertRaisesRegex(LookupError, "is not a text encoding"):
+ self.TextIOWrapper(b, encoding="hex")
+
def test_detach(self):
r = self.BytesIO()
b = self.BufferedWriter(r)
@@ -2579,15 +2588,22 @@ class TextIOWrapperTest(unittest.TestCase):
def test_illegal_decoder(self):
# Issue #17106
+ # Bypass the early encoding check added in issue 20404
+ def _make_illegal_wrapper():
+ quopri = codecs.lookup("quopri")
+ quopri._is_text_encoding = True
+ try:
+ t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'),
+ newline='\n', encoding="quopri")
+ finally:
+ quopri._is_text_encoding = False
+ return t
# Crash when decoder returns non-string
- t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
- encoding='quopri_codec')
+ t = _make_illegal_wrapper()
self.assertRaises(TypeError, t.read, 1)
- t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
- encoding='quopri_codec')
+ t = _make_illegal_wrapper()
self.assertRaises(TypeError, t.readline)
- t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
- encoding='quopri_codec')
+ t = _make_illegal_wrapper()
self.assertRaises(TypeError, t.read)
def _check_create_at_shutdown(self, **kwargs):
@@ -2616,8 +2632,7 @@ class TextIOWrapperTest(unittest.TestCase):
if err:
# Can error out with a RuntimeError if the module state
# isn't found.
- self.assertIn("RuntimeError: could not find io module state",
- err.decode())
+ self.assertIn(self.shutdown_error, err.decode())
else:
self.assertEqual("ok", out.decode().strip())
@@ -2630,6 +2645,7 @@ class TextIOWrapperTest(unittest.TestCase):
class CTextIOWrapperTest(TextIOWrapperTest):
io = io
+ shutdown_error = "RuntimeError: could not find io module state"
def test_initialization(self):
r = self.BytesIO(b"\xc3\xa9\n\n")
@@ -2674,6 +2690,7 @@ class CTextIOWrapperTest(TextIOWrapperTest):
class PyTextIOWrapperTest(TextIOWrapperTest):
io = pyio
+ shutdown_error = "LookupError: unknown encoding: ascii"
class IncrementalNewlineDecoderTest(unittest.TestCase):
diff --git a/Misc/NEWS b/Misc/NEWS
index 9938367..91e3b53 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,12 @@ Release date: 2014-02-09
Core and Builtins
-----------------
+- Issue #20404: io.TextIOWrapper (and hence the open() builtin) now uses the
+ internal codec marking system added for issue #19619 to throw LookupError
+ for known non-text encodings at stream construction time. The existing
+ output type checks remain in place to deal with unmarked third party
+ codecs.
+
- Issue #17162: Add PyType_GetSlot.
- Issue #20162: Fix an alignment issue in the siphash24() hash function which
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 747f623..5739bc4 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -849,7 +849,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
char *kwlist[] = {"buffer", "encoding", "errors",
"newline", "line_buffering", "write_through",
NULL};
- PyObject *buffer, *raw;
+ PyObject *buffer, *raw, *codec_info = NULL;
char *encoding = NULL;
char *errors = NULL;
char *newline = NULL;
@@ -961,6 +961,17 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
"could not determine default encoding");
}
+ /* Check we have been asked for a real text encoding */
+ codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
+ if (codec_info == NULL) {
+ Py_CLEAR(self->encoding);
+ goto error;
+ }
+
+ /* XXX: Failures beyond this point have the potential to leak elements
+ * of the partially constructed object (like self->encoding)
+ */
+
if (errors == NULL)
errors = "strict";
self->errors = PyBytes_FromString(errors);
@@ -975,7 +986,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
if (newline) {
self->readnl = PyUnicode_FromString(newline);
if (self->readnl == NULL)
- return -1;
+ goto error;
}
self->writetranslate = (newline == NULL || newline[0] != '\0');
if (!self->readuniversal && self->readnl) {
@@ -999,8 +1010,8 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
if (r == -1)
goto error;
if (r == 1) {
- self->decoder = PyCodec_IncrementalDecoder(
- encoding, errors);
+ self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
+ errors);
if (self->decoder == NULL)
goto error;
@@ -1024,17 +1035,12 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
if (r == -1)
goto error;
if (r == 1) {
- PyObject *ci;
- self->encoder = PyCodec_IncrementalEncoder(
- encoding, errors);
+ self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
+ errors);
if (self->encoder == NULL)
goto error;
/* Get the normalized named of the codec */
- ci = _PyCodec_Lookup(encoding);
- if (ci == NULL)
- goto error;
- res = _PyObject_GetAttrId(ci, &PyId_name);
- Py_DECREF(ci);
+ res = _PyObject_GetAttrId(codec_info, &PyId_name);
if (res == NULL) {
if (PyErr_ExceptionMatches(PyExc_AttributeError))
PyErr_Clear();
@@ -1054,6 +1060,9 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Py_XDECREF(res);
}
+ /* Finished sorting out the codec details */
+ Py_DECREF(codec_info);
+
self->buffer = buffer;
Py_INCREF(buffer);
@@ -1116,6 +1125,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
return 0;
error:
+ Py_XDECREF(codec_info);
return -1;
}
diff --git a/Python/codecs.c b/Python/codecs.c
index 5ff41b5..e06d6e0 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -243,20 +243,15 @@ PyObject *codec_getitem(const char *encoding, int index)
return v;
}
-/* Helper function to create an incremental codec. */
-
+/* Helper functions to create an incremental codec. */
static
-PyObject *codec_getincrementalcodec(const char *encoding,
- const char *errors,
- const char *attrname)
+PyObject *codec_makeincrementalcodec(PyObject *codec_info,
+ const char *errors,
+ const char *attrname)
{
- PyObject *codecs, *ret, *inccodec;
+ PyObject *ret, *inccodec;
- codecs = _PyCodec_Lookup(encoding);
- if (codecs == NULL)
- return NULL;
- inccodec = PyObject_GetAttrString(codecs, attrname);
- Py_DECREF(codecs);
+ inccodec = PyObject_GetAttrString(codec_info, attrname);
if (inccodec == NULL)
return NULL;
if (errors)
@@ -267,6 +262,21 @@ PyObject *codec_getincrementalcodec(const char *encoding,
return ret;
}
+static
+PyObject *codec_getincrementalcodec(const char *encoding,
+ const char *errors,
+ const char *attrname)
+{
+ PyObject *codec_info, *ret;
+
+ codec_info = _PyCodec_Lookup(encoding);
+ if (codec_info == NULL)
+ return NULL;
+ ret = codec_makeincrementalcodec(codec_info, errors, attrname);
+ Py_DECREF(codec_info);
+ return ret;
+}
+
/* Helper function to create a stream codec. */
static
@@ -290,6 +300,24 @@ PyObject *codec_getstreamcodec(const char *encoding,
return streamcodec;
}
+/* Helpers to work with the result of _PyCodec_Lookup
+
+ */
+PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info,
+ const char *errors)
+{
+ return codec_makeincrementalcodec(codec_info, errors,
+ "incrementaldecoder");
+}
+
+PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info,
+ const char *errors)
+{
+ return codec_makeincrementalcodec(codec_info, errors,
+ "incrementalencoder");
+}
+
+
/* Convenience APIs to query the Codec registry.
All APIs return a codec object with incremented refcount.
@@ -467,15 +495,12 @@ PyObject *PyCodec_Decode(PyObject *object,
}
/* Text encoding/decoding API */
-static
-PyObject *codec_getitem_checked(const char *encoding,
- const char *operation_name,
- int index)
+PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
+ const char *alternate_command)
{
_Py_IDENTIFIER(_is_text_encoding);
PyObject *codec;
PyObject *attr;
- PyObject *v;
int is_text_codec;
codec = _PyCodec_Lookup(encoding);
@@ -502,27 +527,44 @@ PyObject *codec_getitem_checked(const char *encoding,
Py_DECREF(codec);
PyErr_Format(PyExc_LookupError,
"'%.400s' is not a text encoding; "
- "use codecs.%s() to handle arbitrary codecs",
- encoding, operation_name);
+ "use %s to handle arbitrary codecs",
+ encoding, alternate_command);
return NULL;
}
}
}
+ /* This appears to be a valid text encoding */
+ return codec;
+}
+
+
+static
+PyObject *codec_getitem_checked(const char *encoding,
+ const char *alternate_command,
+ int index)
+{
+ PyObject *codec;
+ PyObject *v;
+
+ codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
+ if (codec == NULL)
+ return NULL;
+
v = PyTuple_GET_ITEM(codec, index);
- Py_DECREF(codec);
Py_INCREF(v);
+ Py_DECREF(codec);
return v;
}
static PyObject * _PyCodec_TextEncoder(const char *encoding)
{
- return codec_getitem_checked(encoding, "encode", 0);
+ return codec_getitem_checked(encoding, "codecs.encode()", 0);
}
static PyObject * _PyCodec_TextDecoder(const char *encoding)
{
- return codec_getitem_checked(encoding, "decode", 1);
+ return codec_getitem_checked(encoding, "codecs.decode()", 1);
}
PyObject *_PyCodec_EncodeText(PyObject *object,