summaryrefslogtreecommitdiffstats
path: root/Modules/_codecsmodule.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_codecsmodule.c')
-rw-r--r--Modules/_codecsmodule.c161
1 files changed, 88 insertions, 73 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 11d7cd0..eb740f9 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -94,15 +94,8 @@ codec_encode(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
return NULL;
-#ifdef Py_USING_UNICODE
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
-#else
- if (encoding == NULL) {
- PyErr_SetString(PyExc_ValueError, "no encoding specified");
- return NULL;
- }
-#endif
/* Encode via the codec registry */
return PyCodec_Encode(v, encoding, errors);
@@ -128,15 +121,8 @@ codec_decode(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
return NULL;
-#ifdef Py_USING_UNICODE
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
-#else
- if (encoding == NULL) {
- PyErr_SetString(PyExc_ValueError, "no encoding specified");
- return NULL;
- }
-#endif
/* Decode via the codec registry */
return PyCodec_Decode(v, encoding, errors);
@@ -168,7 +154,7 @@ escape_decode(PyObject *self,
if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
&data, &size, &errors))
return NULL;
- return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
+ return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
size);
}
@@ -176,31 +162,64 @@ static PyObject *
escape_encode(PyObject *self,
PyObject *args)
{
+ static const char *hexdigits = "0123456789abcdef";
PyObject *str;
+ Py_ssize_t size;
+ Py_ssize_t newsize;
const char *errors = NULL;
- char *buf;
- Py_ssize_t consumed, len;
+ PyObject *v;
- if (!PyArg_ParseTuple(args, "S|z:escape_encode",
- &str, &errors))
+ if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
+ &PyBytes_Type, &str, &errors))
return NULL;
- consumed = PyString_GET_SIZE(str);
- str = PyString_Repr(str, 0);
- if (!str)
- return NULL;
+ size = PyBytes_GET_SIZE(str);
+ newsize = 4*size;
+ if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
+ PyErr_SetString(PyExc_OverflowError,
+ "string is too large to encode");
+ return NULL;
+ }
+ v = PyBytes_FromStringAndSize(NULL, newsize);
- /* The string will be quoted. Unquote, similar to unicode-escape. */
- buf = PyString_AS_STRING (str);
- len = PyString_GET_SIZE (str);
- memmove(buf, buf+1, len-2);
- if (_PyString_Resize(&str, len-2) < 0)
+ if (v == NULL) {
return NULL;
+ }
+ else {
+ register Py_ssize_t i;
+ register char c;
+ register char *p = PyBytes_AS_STRING(v);
+
+ for (i = 0; i < size; i++) {
+ /* There's at least enough room for a hex escape */
+ assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
+ c = PyBytes_AS_STRING(str)[i];
+ if (c == '\'' || c == '\\')
+ *p++ = '\\', *p++ = c;
+ else if (c == '\t')
+ *p++ = '\\', *p++ = 't';
+ else if (c == '\n')
+ *p++ = '\\', *p++ = 'n';
+ else if (c == '\r')
+ *p++ = '\\', *p++ = 'r';
+ else if (c < ' ' || c >= 0x7f) {
+ *p++ = '\\';
+ *p++ = 'x';
+ *p++ = hexdigits[(c & 0xf0) >> 4];
+ *p++ = hexdigits[c & 0xf];
+ }
+ else
+ *p++ = c;
+ }
+ *p = '\0';
+ if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
+ return NULL;
+ }
+ }
- return codec_tuple(str, consumed);
+ return codec_tuple(v, size);
}
-#ifdef Py_USING_UNICODE
/* --- Decoder ------------------------------------------------------------ */
static PyObject *
@@ -239,7 +258,7 @@ utf_7_decode(PyObject *self,
Py_ssize_t consumed;
PyObject *decoded = NULL;
- if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
+ if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
&pbuf, &errors, &final))
return NULL;
consumed = pbuf.len;
@@ -262,7 +281,7 @@ utf_8_decode(PyObject *self,
Py_ssize_t consumed;
PyObject *decoded = NULL;
- if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
+ if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
&pbuf, &errors, &final))
return NULL;
consumed = pbuf.len;
@@ -286,7 +305,7 @@ utf_16_decode(PyObject *self,
Py_ssize_t consumed;
PyObject *decoded;
- if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
+ if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
&pbuf, &errors, &final))
return NULL;
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -309,7 +328,7 @@ utf_16_le_decode(PyObject *self,
Py_ssize_t consumed;
PyObject *decoded = NULL;
- if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
+ if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
&pbuf, &errors, &final))
return NULL;
@@ -333,7 +352,7 @@ utf_16_be_decode(PyObject *self,
Py_ssize_t consumed;
PyObject *decoded = NULL;
- if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
+ if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
&pbuf, &errors, &final))
return NULL;
@@ -365,7 +384,7 @@ utf_16_ex_decode(PyObject *self,
int final = 0;
Py_ssize_t consumed;
- if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
+ if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
&pbuf, &errors, &byteorder, &final))
return NULL;
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -390,7 +409,7 @@ utf_32_decode(PyObject *self,
Py_ssize_t consumed;
PyObject *decoded;
- if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
+ if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
&pbuf, &errors, &final))
return NULL;
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -413,7 +432,7 @@ utf_32_le_decode(PyObject *self,
Py_ssize_t consumed;
PyObject *decoded;
- if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
+ if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
&pbuf, &errors, &final))
return NULL;
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -436,7 +455,7 @@ utf_32_be_decode(PyObject *self,
Py_ssize_t consumed;
PyObject *decoded;
- if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
+ if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
&pbuf, &errors, &final))
return NULL;
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -467,7 +486,7 @@ utf_32_ex_decode(PyObject *self,
int final = 0;
Py_ssize_t consumed;
- if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
+ if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
&pbuf, &errors, &byteorder, &final))
return NULL;
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -523,7 +542,7 @@ latin_1_decode(PyObject *self,
PyObject *unicode;
const char *errors = NULL;
- if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
+ if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
&pbuf, &errors))
return NULL;
@@ -540,7 +559,7 @@ ascii_decode(PyObject *self,
PyObject *unicode;
const char *errors = NULL;
- if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
+ if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
&pbuf, &errors))
return NULL;
@@ -558,7 +577,7 @@ charmap_decode(PyObject *self,
const char *errors = NULL;
PyObject *mapping = NULL;
- if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
+ if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
&pbuf, &errors, &mapping))
return NULL;
if (mapping == Py_None)
@@ -581,7 +600,7 @@ mbcs_decode(PyObject *self,
Py_ssize_t consumed;
PyObject *decoded = NULL;
- if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
+ if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
&pbuf, &errors, &final))
return NULL;
consumed = pbuf.len;
@@ -602,32 +621,21 @@ static PyObject *
readbuffer_encode(PyObject *self,
PyObject *args)
{
+ Py_buffer pdata;
const char *data;
Py_ssize_t size;
const char *errors = NULL;
+ PyObject *result;
- if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
- &data, &size, &errors))
- return NULL;
-
- return codec_tuple(PyString_FromStringAndSize(data, size),
- size);
-}
-
-static PyObject *
-charbuffer_encode(PyObject *self,
- PyObject *args)
-{
- const char *data;
- Py_ssize_t size;
- const char *errors = NULL;
-
- if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
- &data, &size, &errors))
+ if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
+ &pdata, &errors))
return NULL;
+ data = pdata.buf;
+ size = pdata.len;
- return codec_tuple(PyString_FromStringAndSize(data, size),
- size);
+ result = PyBytes_FromStringAndSize(data, size);
+ PyBuffer_Release(&pdata);
+ return codec_tuple(result, size);
}
static PyObject *
@@ -646,14 +654,13 @@ unicode_internal_encode(PyObject *self,
if (PyUnicode_Check(obj)) {
data = PyUnicode_AS_DATA(obj);
size = PyUnicode_GET_DATA_SIZE(obj);
- return codec_tuple(PyString_FromStringAndSize(data, size),
+ return codec_tuple(PyBytes_FromStringAndSize(data, size),
PyUnicode_GET_SIZE(obj));
}
else {
if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
return NULL;
- return codec_tuple(PyString_FromStringAndSize(data, size),
- size);
+ return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
}
}
@@ -1008,7 +1015,6 @@ mbcs_encode(PyObject *self,
}
#endif /* MS_WINDOWS */
-#endif /* Py_USING_UNICODE */
/* --- Error handler registry --------------------------------------------- */
@@ -1063,7 +1069,6 @@ static PyMethodDef _codecs_functions[] = {
decode__doc__},
{"escape_encode", escape_encode, METH_VARARGS},
{"escape_decode", escape_decode, METH_VARARGS},
-#ifdef Py_USING_UNICODE
{"utf_8_encode", utf_8_encode, METH_VARARGS},
{"utf_8_decode", utf_8_decode, METH_VARARGS},
{"utf_7_encode", utf_7_encode, METH_VARARGS},
@@ -1096,12 +1101,10 @@ static PyMethodDef _codecs_functions[] = {
{"charmap_decode", charmap_decode, METH_VARARGS},
{"charmap_build", charmap_build, METH_VARARGS},
{"readbuffer_encode", readbuffer_encode, METH_VARARGS},
- {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
{"mbcs_encode", mbcs_encode, METH_VARARGS},
{"mbcs_decode", mbcs_decode, METH_VARARGS},
#endif
-#endif /* Py_USING_UNICODE */
{"register_error", register_error, METH_VARARGS,
register_error__doc__},
{"lookup_error", lookup_error, METH_VARARGS,
@@ -1109,8 +1112,20 @@ static PyMethodDef _codecs_functions[] = {
{NULL, NULL} /* sentinel */
};
+static struct PyModuleDef codecsmodule = {
+ PyModuleDef_HEAD_INIT,
+ "_codecs",
+ NULL,
+ -1,
+ _codecs_functions,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
PyMODINIT_FUNC
-init_codecs(void)
+PyInit__codecs(void)
{
- Py_InitModule("_codecs", _codecs_functions);
+ return PyModule_Create(&codecsmodule);
}