diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-22 00:54:19 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-22 00:54:19 (GMT) |
commit | 975134e2a233db5d7a28de63d0e4047894379490 (patch) | |
tree | b2f628f78fec92997a89c7319ce994fb1588a08b | |
parent | 5aa7df320f35cb0d5acb81ff313b1cc02480c9b8 (diff) | |
download | cpython-975134e2a233db5d7a28de63d0e4047894379490.zip cpython-975134e2a233db5d7a28de63d0e4047894379490.tar.gz cpython-975134e2a233db5d7a28de63d0e4047894379490.tar.bz2 |
Issue #13093: Fix error handling on PyUnicode_EncodeDecimal()
Add tests for PyUnicode_EncodeDecimal()
-rw-r--r-- | Lib/test/test_unicode.py | 25 | ||||
-rw-r--r-- | Modules/_testcapimodule.c | 36 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 7 |
3 files changed, 64 insertions, 4 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 5c1858c..fda44da 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1606,6 +1606,31 @@ class UnicodeTest( self.assertEqual("%s" % u, u'__unicode__ overridden') self.assertEqual("{}".format(u), '__unicode__ overridden') + def test_encode_decimal(self): + from _testcapi import unicode_encodedecimal + self.assertEqual(unicode_encodedecimal(u'123'), + b'123') + self.assertEqual(unicode_encodedecimal(u'\u0663.\u0661\u0664'), + b'3.14') + self.assertEqual(unicode_encodedecimal(u"\N{EM SPACE}3.14\N{EN SPACE}"), + b' 3.14 ') + self.assertRaises(UnicodeEncodeError, + unicode_encodedecimal, u"123\u20ac", "strict") + self.assertEqual(unicode_encodedecimal(u"123\u20ac", "replace"), + b'123?') + self.assertEqual(unicode_encodedecimal(u"123\u20ac", "ignore"), + b'123') + self.assertEqual(unicode_encodedecimal(u"123\u20ac", "xmlcharrefreplace"), + b'123€') + self.assertEqual(unicode_encodedecimal(u"123\u20ac", "backslashreplace"), + b'123\\u20ac') + self.assertEqual(unicode_encodedecimal(u"123\u20ac\N{EM SPACE}", "replace"), + b'123? ') + self.assertEqual(unicode_encodedecimal(u"123\u20ac\u20ac", "replace"), + b'123??') + self.assertEqual(unicode_encodedecimal(u"123\u20ac\u0660", "replace"), + b'123?0') + def test_main(): test_support.run_unittest(__name__) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 9c45274..bb1cc83 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1106,6 +1106,41 @@ test_widechar(PyObject *self) } static PyObject * +unicode_encodedecimal(PyObject *self, PyObject *args) +{ + Py_UNICODE *unicode; + Py_ssize_t length; + char *errors = NULL; + PyObject *decimal; + Py_ssize_t decimal_length, new_length; + int res; + + if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length, &errors)) + return NULL; + + decimal_length = length * 7; /* len('€') */ + decimal = PyBytes_FromStringAndSize(NULL, decimal_length); + if (decimal == NULL) + return NULL; + + res = PyUnicode_EncodeDecimal(unicode, length, + PyBytes_AS_STRING(decimal), + errors); + if (res < 0) { + Py_DECREF(decimal); + return NULL; + } + + new_length = strlen(PyBytes_AS_STRING(decimal)); + assert(new_length <= decimal_length); + res = _PyBytes_Resize(&decimal, new_length); + if (res < 0) + return NULL; + + return decimal; +} + +static PyObject * test_empty_argparse(PyObject *self) { /* Test that formats can begin with '|'. See issue #4720. */ @@ -1698,6 +1733,7 @@ static PyMethodDef TestMethods[] = { #ifdef Py_USING_UNICODE {"test_u_code", (PyCFunction)test_u_code, METH_NOARGS}, {"test_widechar", (PyCFunction)test_widechar, METH_NOARGS}, + {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, #endif #ifdef WITH_THREAD {"_test_thread_state", test_thread_state, METH_VARARGS}, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5ce879d..8225e82 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5160,11 +5160,10 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, } /* All other characters are considered unencodable */ collstart = p; - collend = p+1; - while (collend < end) { + for (collend = p+1; collend < end; collend++) { if ((0 < *collend && *collend < 256) || - !Py_UNICODE_ISSPACE(*collend) || - Py_UNICODE_TODECIMAL(*collend)) + Py_UNICODE_ISSPACE(*collend) || + 0 <= Py_UNICODE_TODECIMAL(*collend)) break; } /* cache callback name lookup |