summaryrefslogtreecommitdiffstats
path: root/Python/codecs.c
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-05-31 17:21:00 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-05-31 17:21:00 (GMT)
commitc7797dc7482035ee166ca2e941b623382b92e1fc (patch)
tree526e26fa4dac506f02859fdbe946d33ed4165f5e /Python/codecs.c
parentcfb7028df4bdf12325786e48ebef3b4982efa119 (diff)
downloadcpython-c7797dc7482035ee166ca2e941b623382b92e1fc.zip
cpython-c7797dc7482035ee166ca2e941b623382b92e1fc.tar.gz
cpython-c7797dc7482035ee166ca2e941b623382b92e1fc.tar.bz2
Issue #19543: Emit deprecation warning for known non-text encodings.
Backported issues #19619: encode() and decode() methods and constructors of str, unicode and bytearray classes now emit deprecation warning for known non-text encodings when Python is ran with the -3 option. Backported issues #20404: io.TextIOWrapper (and hence io.open()) now uses the internal codec marking system added to emit deprecation warning for known non-text encodings at stream construction time when Python is ran with the -3 option.
Diffstat (limited to 'Python/codecs.c')
-rw-r--r--Python/codecs.c209
1 files changed, 182 insertions, 27 deletions
diff --git a/Python/codecs.c b/Python/codecs.c
index 184d147..d672362 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -217,20 +217,15 @@ PyObject *codec_getitem(const char *encoding, int index)
return v;
}
-/* Helper function to create an incremental codec. */
-
+/* Helper functions to create an incremental codec. */
static
-PyObject *codec_getincrementalcodec(const char *encoding,
- const char *errors,
- const char *attrname)
+PyObject *codec_makeincrementalcodec(PyObject *codec_info,
+ const char *errors,
+ const char *attrname)
{
- PyObject *codecs, *ret, *inccodec;
+ PyObject *ret, *inccodec;
- codecs = _PyCodec_Lookup(encoding);
- if (codecs == NULL)
- return NULL;
- inccodec = PyObject_GetAttrString(codecs, attrname);
- Py_DECREF(codecs);
+ inccodec = PyObject_GetAttrString(codec_info, attrname);
if (inccodec == NULL)
return NULL;
if (errors)
@@ -241,6 +236,21 @@ PyObject *codec_getincrementalcodec(const char *encoding,
return ret;
}
+static
+PyObject *codec_getincrementalcodec(const char *encoding,
+ const char *errors,
+ const char *attrname)
+{
+ PyObject *codec_info, *ret;
+
+ codec_info = _PyCodec_Lookup(encoding);
+ if (codec_info == NULL)
+ return NULL;
+ ret = codec_makeincrementalcodec(codec_info, errors, attrname);
+ Py_DECREF(codec_info);
+ return ret;
+}
+
/* Helper function to create a stream codec. */
static
@@ -264,6 +274,24 @@ PyObject *codec_getstreamcodec(const char *encoding,
return streamcodec;
}
+/* Helpers to work with the result of _PyCodec_Lookup
+
+ */
+PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info,
+ const char *errors)
+{
+ return codec_makeincrementalcodec(codec_info, errors,
+ "incrementaldecoder");
+}
+
+PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info,
+ const char *errors)
+{
+ return codec_makeincrementalcodec(codec_info, errors,
+ "incrementalencoder");
+}
+
+
/* Convenience APIs to query the Codec registry.
All APIs return a codec object with incremented refcount.
@@ -311,18 +339,15 @@ PyObject *PyCodec_StreamWriter(const char *encoding,
errors is passed to the encoder factory as argument if non-NULL. */
-PyObject *PyCodec_Encode(PyObject *object,
- const char *encoding,
- const char *errors)
+static PyObject *
+_PyCodec_EncodeInternal(PyObject *object,
+ PyObject *encoder,
+ const char *encoding,
+ const char *errors)
{
- PyObject *encoder = NULL;
PyObject *args = NULL, *result = NULL;
PyObject *v;
- encoder = PyCodec_Encoder(encoding);
- if (encoder == NULL)
- goto onError;
-
args = args_tuple(object, errors);
if (args == NULL)
goto onError;
@@ -358,18 +383,15 @@ PyObject *PyCodec_Encode(PyObject *object,
errors is passed to the decoder factory as argument if non-NULL. */
-PyObject *PyCodec_Decode(PyObject *object,
- const char *encoding,
- const char *errors)
+static PyObject *
+_PyCodec_DecodeInternal(PyObject *object,
+ PyObject *decoder,
+ const char *encoding,
+ const char *errors)
{
- PyObject *decoder = NULL;
PyObject *args = NULL, *result = NULL;
PyObject *v;
- decoder = PyCodec_Decoder(encoding);
- if (decoder == NULL)
- goto onError;
-
args = args_tuple(object, errors);
if (args == NULL)
goto onError;
@@ -399,6 +421,139 @@ PyObject *PyCodec_Decode(PyObject *object,
return NULL;
}
+/* Generic encoding/decoding API */
+PyObject *PyCodec_Encode(PyObject *object,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *encoder;
+
+ encoder = PyCodec_Encoder(encoding);
+ if (encoder == NULL)
+ return NULL;
+
+ return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
+}
+
+PyObject *PyCodec_Decode(PyObject *object,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *decoder;
+
+ decoder = PyCodec_Decoder(encoding);
+ if (decoder == NULL)
+ return NULL;
+
+ return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
+}
+
+/* Text encoding/decoding API */
+PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
+ const char *alternate_command)
+{
+ PyObject *codec;
+ PyObject *attr;
+ int is_text_codec;
+
+ codec = _PyCodec_Lookup(encoding);
+ if (codec == NULL)
+ return NULL;
+
+ /* Backwards compatibility: assume any raw tuple describes a text
+ * encoding, and the same for anything lacking the private
+ * attribute.
+ */
+ if (Py_Py3kWarningFlag && !PyTuple_CheckExact(codec)) {
+ attr = PyObject_GetAttrString(codec, "_is_text_encoding");
+ if (attr == NULL) {
+ if (!PyErr_ExceptionMatches(PyExc_AttributeError))
+ goto onError;
+ PyErr_Clear();
+ } else {
+ is_text_codec = PyObject_IsTrue(attr);
+ Py_DECREF(attr);
+ if (is_text_codec < 0)
+ goto onError;
+ if (!is_text_codec) {
+ PyObject *msg = PyString_FromFormat(
+ "'%.400s' is not a text encoding; "
+ "use %s to handle arbitrary codecs",
+ encoding, alternate_command);
+ if (msg == NULL)
+ goto onError;
+ if (PyErr_WarnPy3k(PyString_AS_STRING(msg), 1) < 0) {
+ Py_DECREF(msg);
+ goto onError;
+ }
+ Py_DECREF(msg);
+ }
+ }
+ }
+
+ /* This appears to be a valid text encoding */
+ return codec;
+
+ onError:
+ Py_DECREF(codec);
+ return NULL;
+}
+
+
+static
+PyObject *codec_getitem_checked(const char *encoding,
+ const char *alternate_command,
+ int index)
+{
+ PyObject *codec;
+ PyObject *v;
+
+ codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
+ if (codec == NULL)
+ return NULL;
+
+ v = PyTuple_GET_ITEM(codec, index);
+ Py_INCREF(v);
+ Py_DECREF(codec);
+ return v;
+}
+
+static PyObject * _PyCodec_TextEncoder(const char *encoding)
+{
+ return codec_getitem_checked(encoding, "codecs.encode()", 0);
+}
+
+static PyObject * _PyCodec_TextDecoder(const char *encoding)
+{
+ return codec_getitem_checked(encoding, "codecs.decode()", 1);
+}
+
+PyObject *_PyCodec_EncodeText(PyObject *object,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *encoder;
+
+ encoder = _PyCodec_TextEncoder(encoding);
+ if (encoder == NULL)
+ return NULL;
+
+ return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
+}
+
+PyObject *_PyCodec_DecodeText(PyObject *object,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *decoder;
+
+ decoder = _PyCodec_TextDecoder(encoding);
+ if (decoder == NULL)
+ return NULL;
+
+ return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
+}
+
/* Register the error handling callback function error under the name
name. This function will be called by the codec when it encounters
an unencodable characters/undecodable bytes and doesn't know the