From ab1d16b4561390022efc0a45fae1ce6cbb119a58 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 22 Nov 2011 01:45:37 +0100 Subject: Issue #13093: Fix error handling on PyUnicode_EncodeDecimal() * Add tests for PyUnicode_EncodeDecimal() and PyUnicode_TransformDecimalToASCII() * Remove the unused "e" variable in replace() --- Lib/test/test_unicode.py | 36 +++++++++++++++++++++++++++++++++ Modules/_testcapimodule.c | 51 +++++++++++++++++++++++++++++++++++++++++++++-- Objects/unicodeobject.c | 10 ++++------ 3 files changed, 89 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 591a297..259a181 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1767,6 +1767,42 @@ class StringModuleTest(unittest.TestCase): ]]) self.assertRaises(TypeError, _string.formatter_field_name_split, 1) + def test_encode_decimal(self): + from _testcapi import unicode_encodedecimal + self.assertEqual(unicode_encodedecimal('123'), + b'123') + self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'), + b'3.14') + self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"), + b' 3.14 ') + self.assertRaises(UnicodeEncodeError, + unicode_encodedecimal, "123\u20ac", "strict") + self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"), + b'123?') + self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"), + b'123') + self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"), + b'123€') + self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"), + b'123\\u20ac') + self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"), + b'123? ') + self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"), + b'123??') + self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"), + b'123?0') + + def test_transform_decimal(self): + from _testcapi import unicode_transformdecimaltoascii as transform_decimal + self.assertEqual(transform_decimal('123'), + '123') + self.assertEqual(transform_decimal('\u0663.\u0661\u0664'), + '3.14') + self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"), + "\N{EM SPACE}3.14\N{EN SPACE}") + self.assertEqual(transform_decimal('123\u20ac'), + '123\u20ac') + def test_main(): support.run_unittest(__name__) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index f19d0df..6c61f7d 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1438,6 +1438,51 @@ unicode_aswidecharstring(PyObject *self, PyObject *args) } static PyObject * +unicode_encodedecimal(PyObject *self, PyObject *args) +{ + Py_UNICODE *unicode; + Py_ssize_t length; + char *errors = NULL; + PyObject *decimal; + Py_ssize_t decimal_length, new_length; + int res; + + if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length, &errors)) + return NULL; + + decimal_length = length * 7; /* len('€') */ + decimal = PyBytes_FromStringAndSize(NULL, decimal_length); + if (decimal == NULL) + return NULL; + + res = PyUnicode_EncodeDecimal(unicode, length, + PyBytes_AS_STRING(decimal), + errors); + if (res < 0) { + Py_DECREF(decimal); + return NULL; + } + + new_length = strlen(PyBytes_AS_STRING(decimal)); + assert(new_length <= decimal_length); + res = _PyBytes_Resize(&decimal, new_length); + if (res < 0) + return NULL; + + return decimal; +} + +static PyObject * +unicode_transformdecimaltoascii(PyObject *self, PyObject *args) +{ + Py_UNICODE *unicode; + Py_ssize_t length; + if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length)) + return NULL; + return PyUnicode_TransformDecimalToASCII(unicode, length); +} + +static PyObject * getargs_w_star(PyObject *self, PyObject *args) { Py_buffer buffer; @@ -2320,8 +2365,10 @@ static PyMethodDef TestMethods[] = { {"test_u_code", (PyCFunction)test_u_code, METH_NOARGS}, {"test_Z_code", (PyCFunction)test_Z_code, METH_NOARGS}, {"test_widechar", (PyCFunction)test_widechar, METH_NOARGS}, - {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, - {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, + {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, + {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, + {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, + {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, #ifdef WITH_THREAD {"_test_thread_state", test_thread_state, METH_VARARGS}, {"_pending_threadfunc", pending_threadfunc, METH_VARARGS}, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8680726..d13c547 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6323,11 +6323,10 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, } /* All other characters are considered unencodable */ collstart = p; - collend = p+1; - while (collend < end) { + for (collend = p+1; collend < end; collend++) { if ((0 < *collend && *collend < 256) || - !Py_UNICODE_ISSPACE(*collend) || - Py_UNICODE_TODECIMAL(*collend)) + Py_UNICODE_ISSPACE(*collend) || + 0 <= Py_UNICODE_TODECIMAL(*collend)) break; } /* cache callback name lookup @@ -7004,7 +7003,7 @@ PyObject *replace(PyUnicodeObject *self, } } else { - Py_ssize_t n, i, j, e; + Py_ssize_t n, i, j; Py_ssize_t product, new_size, delta; Py_UNICODE *p; @@ -7036,7 +7035,6 @@ PyObject *replace(PyUnicodeObject *self, return NULL; i = 0; p = u->str; - e = self->length - str1->length; if (str1->length > 0) { while (n-- > 0) { /* look for next match */ -- cgit v0.12