diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-25 19:09:01 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-25 19:09:01 (GMT) |
commit | 6345be9a141642c2e95eec417844f8702775b700 (patch) | |
tree | 5cfa88e01498c964c857ca184f7010924e69de25 | |
parent | e7ede067576e9beaf0787e1fb3104cf6202d8aa0 (diff) | |
download | cpython-6345be9a141642c2e95eec417844f8702775b700.zip cpython-6345be9a141642c2e95eec417844f8702775b700.tar.gz cpython-6345be9a141642c2e95eec417844f8702775b700.tar.bz2 |
Close #13093: PyUnicode_EncodeDecimal() doesn't support error handlers
different than "strict" anymore. The caller was unable to compute the
size of the output buffer: it depends on the error handler.
-rw-r--r-- | Lib/test/test_unicode.py | 18 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 131 |
3 files changed, 26 insertions, 127 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index b20f878..72aae8f 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1816,20 +1816,10 @@ class UnicodeTest(string_tests.CommonTest, b' 3.14 ') self.assertRaises(UnicodeEncodeError, unicode_encodedecimal, "123\u20ac", "strict") - self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"), - b'123?') - self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"), - b'123') - self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"), - b'123€') - self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"), - b'123\\u20ac') - self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"), - b'123? ') - self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"), - b'123??') - self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"), - b'123?0') + self.assertRaisesRegex( + ValueError, + "^'decimal' codec can't encode character", + unicode_encodedecimal, "123\u20ac", "replace") def test_transform_decimal(self): from _testcapi import unicode_transformdecimaltoascii as transform_decimal @@ -10,6 +10,10 @@ What's New in Python 3.3 Alpha 1? Core and Builtins ----------------- +- Issue #13093: PyUnicode_EncodeDecimal() doesn't support error handlers + different than "strict" anymore. The caller was unable to compute the + size of the output buffer: it depends on the error handler. + - PEP 3155 / issue #13448: Qualified name for classes and functions. - Issue #13436: Fix a bogus error message when an AST object was passed diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2fefdbe..a9bf677 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8839,15 +8839,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, char *output, const char *errors) { - PyObject *errorHandler = NULL; - PyObject *exc = NULL; PyObject *unicode; - const char *encoding = "decimal"; - const char *reason = "invalid decimal Unicode string"; - /* the following variable is used for caching string comparisons - * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ - int known_errorHandler = -1; - Py_ssize_t i, j; + Py_ssize_t i; enum PyUnicode_Kind kind; void *data; @@ -8860,15 +8853,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, if (unicode == NULL) return -1; - if (PyUnicode_READY(unicode) < 0) - goto onError; + if (PyUnicode_READY(unicode) < 0) { + Py_DECREF(unicode); + return -1; + } kind = PyUnicode_KIND(unicode); data = PyUnicode_DATA(unicode); for (i=0; i < length; ) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); + PyObject *exc; + Py_UCS4 ch; int decimal; - Py_ssize_t startpos, endpos; + Py_ssize_t startpos; + + ch = PyUnicode_READ(kind, data, i); if (Py_UNICODE_ISSPACE(ch)) { *output++ = ' '; @@ -8886,113 +8884,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, i++; continue; } - /* All other characters are considered unencodable */ - startpos = i; - endpos = i+1; - for (; endpos < length; endpos++) { - ch = PyUnicode_READ(kind, data, endpos); - if ((0 < ch && ch < 256) || - Py_UNICODE_ISSPACE(ch) || - 0 <= Py_UNICODE_TODECIMAL(ch)) - break; - } - /* cache callback name lookup - * (if not done yet, i.e. it's the first error) */ - if (known_errorHandler==-1) { - if ((errors==NULL) || (!strcmp(errors, "strict"))) - known_errorHandler = 1; - else if (!strcmp(errors, "replace")) - known_errorHandler = 2; - else if (!strcmp(errors, "ignore")) - known_errorHandler = 3; - else if (!strcmp(errors, "xmlcharrefreplace")) - known_errorHandler = 4; - else - known_errorHandler = 0; - } - switch (known_errorHandler) { - case 1: /* strict */ - raise_encode_exception(&exc, encoding, unicode, startpos, endpos, reason); - goto onError; - case 2: /* replace */ - for (j=startpos; j < endpos; j++) - *output++ = '?'; - i = endpos; - break; - case 3: /* ignore */ - i = endpos; - break; - case 4: /* xmlcharrefreplace */ - /* generate replacement */ - for (j=startpos; j < endpos; j++) { - ch = PyUnicode_READ(kind, data, i); - output += sprintf(output, "&#%d;", (int)ch); - i++; - } - break; - default: - { - PyObject *repunicode; - Py_ssize_t repsize, newpos, k; - enum PyUnicode_Kind repkind; - void *repdata; - - repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, - encoding, reason, unicode, &exc, - startpos, endpos, &newpos); - if (repunicode == NULL) - goto onError; - if (!PyUnicode_Check(repunicode)) { - /* Byte results not supported, since they have no decimal property. */ - PyErr_SetString(PyExc_TypeError, "error handler should return unicode"); - Py_DECREF(repunicode); - goto onError; - } - if (PyUnicode_READY(repunicode) < 0) { - Py_DECREF(repunicode); - goto onError; - } - repkind = PyUnicode_KIND(repunicode); - repdata = PyUnicode_DATA(repunicode); - /* generate replacement */ - repsize = PyUnicode_GET_SIZE(repunicode); - for (k=0; k<repsize; k++) { - ch = PyUnicode_READ(repkind, repdata, k); - if (Py_UNICODE_ISSPACE(ch)) - *output++ = ' '; - else { - decimal = Py_UNICODE_TODECIMAL(ch); - if (decimal >= 0) - *output++ = '0' + decimal; - else if (0 < ch && ch < 256) - *output++ = (char)ch; - else { - Py_DECREF(repunicode); - raise_encode_exception(&exc, encoding, - unicode, startpos, endpos, - reason); - goto onError; - } - } - } - i = newpos; - Py_DECREF(repunicode); - } - } + startpos = i; + exc = NULL; + raise_encode_exception(&exc, "decimal", unicode, + startpos, startpos+1, + "invalid decimal Unicode string"); + Py_XDECREF(exc); + Py_DECREF(unicode); + return -1; } /* 0-terminate the output string */ *output++ = '\0'; - Py_XDECREF(exc); - Py_XDECREF(errorHandler); Py_DECREF(unicode); return 0; - - onError: - Py_XDECREF(exc); - Py_XDECREF(errorHandler); - Py_DECREF(unicode); - return -1; } /* --- Helpers ------------------------------------------------------------ */ |