summaryrefslogtreecommitdiffstats
path: root/Modules/_codecsmodule.c
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-05-18 17:15:44 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-05-18 17:15:44 (GMT)
commit1ab833082738ced53318aca05901e596d5ede683 (patch)
tree0ff2b4c1fcbab3233e012f04bce801cadfd6d7f9 /Modules/_codecsmodule.c
parent14176a56d3fe36388115688d0b5acae0c759c044 (diff)
downloadcpython-1ab833082738ced53318aca05901e596d5ede683.zip
cpython-1ab833082738ced53318aca05901e596d5ede683.tar.gz
cpython-1ab833082738ced53318aca05901e596d5ede683.tar.bz2
Add functions PyUnicode_Append() and PyUnicode_AppendAndDel() that mirror
PyString_Concat() and PyString_ConcatAndDel() (the name PyUnicode_Concat() was already taken). Change PyObject_Repr() to always return a unicode object. Update all repr implementations to return unicode objects. Add a function PyObject_ReprStr8() that calls PyObject_Repr() and converts the result to an 8bit string. Use PyObject_ReprStr8() where using PyObject_Repr() can't be done straightforward.
Diffstat (limited to 'Modules/_codecsmodule.c')
-rw-r--r--Modules/_codecsmodule.c58
1 files changed, 47 insertions, 11 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index b165f97..cd766c3 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -161,27 +161,63 @@ static PyObject *
escape_encode(PyObject *self,
PyObject *args)
{
+ static const char *hexdigits = "0123456789abcdef";
PyObject *str;
+ Py_ssize_t size;
+ Py_ssize_t newsize;
const char *errors = NULL;
- char *buf;
- Py_ssize_t len;
+ PyObject *v;
if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
&PyString_Type, &str, &errors))
return NULL;
- str = PyString_Repr(str, 0);
- if (!str)
- return NULL;
+ size = PyUnicode_GET_SIZE(str);
+ newsize = 4*size;
+ if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
+ PyErr_SetString(PyExc_OverflowError,
+ "string is too large to encode");
+ return NULL;
+ }
+ v = PyBytes_FromStringAndSize(NULL, newsize);
- /* The string will be quoted. Unquote, similar to unicode-escape. */
- buf = PyString_AS_STRING (str);
- len = PyString_GET_SIZE (str);
- memmove(buf, buf+1, len-2);
- if (_PyString_Resize(&str, len-2) < 0)
+ if (v == NULL) {
return NULL;
+ }
+ else {
+ register Py_ssize_t i;
+ register char c;
+ register char *p = PyBytes_AS_STRING(v);
+
+ for (i = 0; i < size; i++) {
+ /* There's at least enough room for a hex escape */
+ assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
+ c = PyString_AS_STRING(str)[i];
+ if (c == '\'' || c == '\\')
+ *p++ = '\\', *p++ = c;
+ else if (c == '\t')
+ *p++ = '\\', *p++ = 't';
+ else if (c == '\n')
+ *p++ = '\\', *p++ = 'n';
+ else if (c == '\r')
+ *p++ = '\\', *p++ = 'r';
+ else if (c < ' ' || c >= 0x7f) {
+ *p++ = '\\';
+ *p++ = 'x';
+ *p++ = hexdigits[(c & 0xf0) >> 4];
+ *p++ = hexdigits[c & 0xf];
+ }
+ else
+ *p++ = c;
+ }
+ *p = '\0';
+ if (PyBytes_Resize(v, (p - PyBytes_AS_STRING(v)))) {
+ Py_DECREF(v);
+ return NULL;
+ }
+ }
- return codec_tuple(str, PyString_Size(str));
+ return codec_tuple(v, PyBytes_Size(v));
}
/* --- Decoder ------------------------------------------------------------ */