diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-05-18 17:15:44 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-05-18 17:15:44 (GMT) |
commit | 1ab833082738ced53318aca05901e596d5ede683 (patch) | |
tree | 0ff2b4c1fcbab3233e012f04bce801cadfd6d7f9 /Modules/_codecsmodule.c | |
parent | 14176a56d3fe36388115688d0b5acae0c759c044 (diff) | |
download | cpython-1ab833082738ced53318aca05901e596d5ede683.zip cpython-1ab833082738ced53318aca05901e596d5ede683.tar.gz cpython-1ab833082738ced53318aca05901e596d5ede683.tar.bz2 |
Add functions PyUnicode_Append() and PyUnicode_AppendAndDel() that mirror
PyString_Concat() and PyString_ConcatAndDel() (the name PyUnicode_Concat()
was already taken).
Change PyObject_Repr() to always return a unicode object.
Update all repr implementations to return unicode objects.
Add a function PyObject_ReprStr8() that calls PyObject_Repr() and converts
the result to an 8bit string.
Use PyObject_ReprStr8() where using PyObject_Repr() can't be done
straightforward.
Diffstat (limited to 'Modules/_codecsmodule.c')
-rw-r--r-- | Modules/_codecsmodule.c | 58 |
1 files changed, 47 insertions, 11 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index b165f97..cd766c3 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -161,27 +161,63 @@ static PyObject * escape_encode(PyObject *self, PyObject *args) { + static const char *hexdigits = "0123456789abcdef"; PyObject *str; + Py_ssize_t size; + Py_ssize_t newsize; const char *errors = NULL; - char *buf; - Py_ssize_t len; + PyObject *v; if (!PyArg_ParseTuple(args, "O!|z:escape_encode", &PyString_Type, &str, &errors)) return NULL; - str = PyString_Repr(str, 0); - if (!str) - return NULL; + size = PyUnicode_GET_SIZE(str); + newsize = 4*size; + if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) { + PyErr_SetString(PyExc_OverflowError, + "string is too large to encode"); + return NULL; + } + v = PyBytes_FromStringAndSize(NULL, newsize); - /* The string will be quoted. Unquote, similar to unicode-escape. */ - buf = PyString_AS_STRING (str); - len = PyString_GET_SIZE (str); - memmove(buf, buf+1, len-2); - if (_PyString_Resize(&str, len-2) < 0) + if (v == NULL) { return NULL; + } + else { + register Py_ssize_t i; + register char c; + register char *p = PyBytes_AS_STRING(v); + + for (i = 0; i < size; i++) { + /* There's at least enough room for a hex escape */ + assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4); + c = PyString_AS_STRING(str)[i]; + if (c == '\'' || c == '\\') + *p++ = '\\', *p++ = c; + else if (c == '\t') + *p++ = '\\', *p++ = 't'; + else if (c == '\n') + *p++ = '\\', *p++ = 'n'; + else if (c == '\r') + *p++ = '\\', *p++ = 'r'; + else if (c < ' ' || c >= 0x7f) { + *p++ = '\\'; + *p++ = 'x'; + *p++ = hexdigits[(c & 0xf0) >> 4]; + *p++ = hexdigits[c & 0xf]; + } + else + *p++ = c; + } + *p = '\0'; + if (PyBytes_Resize(v, (p - PyBytes_AS_STRING(v)))) { + Py_DECREF(v); + return NULL; + } + } - return codec_tuple(str, PyString_Size(str)); + return codec_tuple(v, PyBytes_Size(v)); } /* --- Decoder ------------------------------------------------------------ */ |