summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-11-22 00:45:37 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-11-22 00:45:37 (GMT)
commitab1d16b4561390022efc0a45fae1ce6cbb119a58 (patch)
treee7307b10973f0167102e0baf7d0363815af0fe25
parent58fcf9f801d590b999401533154b63f4eb26ce6c (diff)
downloadcpython-ab1d16b4561390022efc0a45fae1ce6cbb119a58.zip
cpython-ab1d16b4561390022efc0a45fae1ce6cbb119a58.tar.gz
cpython-ab1d16b4561390022efc0a45fae1ce6cbb119a58.tar.bz2
Issue #13093: Fix error handling on PyUnicode_EncodeDecimal()
* Add tests for PyUnicode_EncodeDecimal() and PyUnicode_TransformDecimalToASCII() * Remove the unused "e" variable in replace()
-rw-r--r--Lib/test/test_unicode.py36
-rw-r--r--Modules/_testcapimodule.c51
-rw-r--r--Objects/unicodeobject.c10
3 files changed, 89 insertions, 8 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 591a297..259a181 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1767,6 +1767,42 @@ class StringModuleTest(unittest.TestCase):
]])
self.assertRaises(TypeError, _string.formatter_field_name_split, 1)
+ def test_encode_decimal(self):
+ from _testcapi import unicode_encodedecimal
+ self.assertEqual(unicode_encodedecimal('123'),
+ b'123')
+ self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
+ b'3.14')
+ self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+ b' 3.14 ')
+ self.assertRaises(UnicodeEncodeError,
+ unicode_encodedecimal, "123\u20ac", "strict")
+ self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"),
+ b'123?')
+ self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"),
+ b'123')
+ self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
+ b'123&#8364;')
+ self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
+ b'123\\u20ac')
+ self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
+ b'123? ')
+ self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
+ b'123??')
+ self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
+ b'123?0')
+
+ def test_transform_decimal(self):
+ from _testcapi import unicode_transformdecimaltoascii as transform_decimal
+ self.assertEqual(transform_decimal('123'),
+ '123')
+ self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
+ '3.14')
+ self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+ "\N{EM SPACE}3.14\N{EN SPACE}")
+ self.assertEqual(transform_decimal('123\u20ac'),
+ '123\u20ac')
+
def test_main():
support.run_unittest(__name__)
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index f19d0df..6c61f7d 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1438,6 +1438,51 @@ unicode_aswidecharstring(PyObject *self, PyObject *args)
}
static PyObject *
+unicode_encodedecimal(PyObject *self, PyObject *args)
+{
+ Py_UNICODE *unicode;
+ Py_ssize_t length;
+ char *errors = NULL;
+ PyObject *decimal;
+ Py_ssize_t decimal_length, new_length;
+ int res;
+
+ if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length, &errors))
+ return NULL;
+
+ decimal_length = length * 7; /* len('&#8364;') */
+ decimal = PyBytes_FromStringAndSize(NULL, decimal_length);
+ if (decimal == NULL)
+ return NULL;
+
+ res = PyUnicode_EncodeDecimal(unicode, length,
+ PyBytes_AS_STRING(decimal),
+ errors);
+ if (res < 0) {
+ Py_DECREF(decimal);
+ return NULL;
+ }
+
+ new_length = strlen(PyBytes_AS_STRING(decimal));
+ assert(new_length <= decimal_length);
+ res = _PyBytes_Resize(&decimal, new_length);
+ if (res < 0)
+ return NULL;
+
+ return decimal;
+}
+
+static PyObject *
+unicode_transformdecimaltoascii(PyObject *self, PyObject *args)
+{
+ Py_UNICODE *unicode;
+ Py_ssize_t length;
+ if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length))
+ return NULL;
+ return PyUnicode_TransformDecimalToASCII(unicode, length);
+}
+
+static PyObject *
getargs_w_star(PyObject *self, PyObject *args)
{
Py_buffer buffer;
@@ -2320,8 +2365,10 @@ static PyMethodDef TestMethods[] = {
{"test_u_code", (PyCFunction)test_u_code, METH_NOARGS},
{"test_Z_code", (PyCFunction)test_Z_code, METH_NOARGS},
{"test_widechar", (PyCFunction)test_widechar, METH_NOARGS},
- {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
- {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
+ {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
+ {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
+ {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
+ {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
#ifdef WITH_THREAD
{"_test_thread_state", test_thread_state, METH_VARARGS},
{"_pending_threadfunc", pending_threadfunc, METH_VARARGS},
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 8680726..d13c547 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6323,11 +6323,10 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
}
/* All other characters are considered unencodable */
collstart = p;
- collend = p+1;
- while (collend < end) {
+ for (collend = p+1; collend < end; collend++) {
if ((0 < *collend && *collend < 256) ||
- !Py_UNICODE_ISSPACE(*collend) ||
- Py_UNICODE_TODECIMAL(*collend))
+ Py_UNICODE_ISSPACE(*collend) ||
+ 0 <= Py_UNICODE_TODECIMAL(*collend))
break;
}
/* cache callback name lookup
@@ -7004,7 +7003,7 @@ PyObject *replace(PyUnicodeObject *self,
}
} else {
- Py_ssize_t n, i, j, e;
+ Py_ssize_t n, i, j;
Py_ssize_t product, new_size, delta;
Py_UNICODE *p;
@@ -7036,7 +7035,6 @@ PyObject *replace(PyUnicodeObject *self,
return NULL;
i = 0;
p = u->str;
- e = self->length - str1->length;
if (str1->length > 0) {
while (n-- > 0) {
/* look for next match */