summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-11-25 19:09:01 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-11-25 19:09:01 (GMT)
commit6345be9a141642c2e95eec417844f8702775b700 (patch)
tree5cfa88e01498c964c857ca184f7010924e69de25
parente7ede067576e9beaf0787e1fb3104cf6202d8aa0 (diff)
downloadcpython-6345be9a141642c2e95eec417844f8702775b700.zip
cpython-6345be9a141642c2e95eec417844f8702775b700.tar.gz
cpython-6345be9a141642c2e95eec417844f8702775b700.tar.bz2
Close #13093: PyUnicode_EncodeDecimal() doesn't support error handlers
different than "strict" anymore. The caller was unable to compute the size of the output buffer: it depends on the error handler.
-rw-r--r--Lib/test/test_unicode.py18
-rw-r--r--Misc/NEWS4
-rw-r--r--Objects/unicodeobject.c131
3 files changed, 26 insertions, 127 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index b20f878..72aae8f 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1816,20 +1816,10 @@ class UnicodeTest(string_tests.CommonTest,
b' 3.14 ')
self.assertRaises(UnicodeEncodeError,
unicode_encodedecimal, "123\u20ac", "strict")
- self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"),
- b'123?')
- self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"),
- b'123')
- self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
- b'123&#8364;')
- self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
- b'123\\u20ac')
- self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
- b'123? ')
- self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
- b'123??')
- self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
- b'123?0')
+ self.assertRaisesRegex(
+ ValueError,
+ "^'decimal' codec can't encode character",
+ unicode_encodedecimal, "123\u20ac", "replace")
def test_transform_decimal(self):
from _testcapi import unicode_transformdecimaltoascii as transform_decimal
diff --git a/Misc/NEWS b/Misc/NEWS
index b789b5f..d919e2e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
+- Issue #13093: PyUnicode_EncodeDecimal() doesn't support error handlers
+ different than "strict" anymore. The caller was unable to compute the
+ size of the output buffer: it depends on the error handler.
+
- PEP 3155 / issue #13448: Qualified name for classes and functions.
- Issue #13436: Fix a bogus error message when an AST object was passed
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2fefdbe..a9bf677 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8839,15 +8839,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
char *output,
const char *errors)
{
- PyObject *errorHandler = NULL;
- PyObject *exc = NULL;
PyObject *unicode;
- const char *encoding = "decimal";
- const char *reason = "invalid decimal Unicode string";
- /* the following variable is used for caching string comparisons
- * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
- int known_errorHandler = -1;
- Py_ssize_t i, j;
+ Py_ssize_t i;
enum PyUnicode_Kind kind;
void *data;
@@ -8860,15 +8853,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
if (unicode == NULL)
return -1;
- if (PyUnicode_READY(unicode) < 0)
- goto onError;
+ if (PyUnicode_READY(unicode) < 0) {
+ Py_DECREF(unicode);
+ return -1;
+ }
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode);
for (i=0; i < length; ) {
- Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+ PyObject *exc;
+ Py_UCS4 ch;
int decimal;
- Py_ssize_t startpos, endpos;
+ Py_ssize_t startpos;
+
+ ch = PyUnicode_READ(kind, data, i);
if (Py_UNICODE_ISSPACE(ch)) {
*output++ = ' ';
@@ -8886,113 +8884,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
i++;
continue;
}
- /* All other characters are considered unencodable */
- startpos = i;
- endpos = i+1;
- for (; endpos < length; endpos++) {
- ch = PyUnicode_READ(kind, data, endpos);
- if ((0 < ch && ch < 256) ||
- Py_UNICODE_ISSPACE(ch) ||
- 0 <= Py_UNICODE_TODECIMAL(ch))
- break;
- }
- /* cache callback name lookup
- * (if not done yet, i.e. it's the first error) */
- if (known_errorHandler==-1) {
- if ((errors==NULL) || (!strcmp(errors, "strict")))
- known_errorHandler = 1;
- else if (!strcmp(errors, "replace"))
- known_errorHandler = 2;
- else if (!strcmp(errors, "ignore"))
- known_errorHandler = 3;
- else if (!strcmp(errors, "xmlcharrefreplace"))
- known_errorHandler = 4;
- else
- known_errorHandler = 0;
- }
- switch (known_errorHandler) {
- case 1: /* strict */
- raise_encode_exception(&exc, encoding, unicode, startpos, endpos, reason);
- goto onError;
- case 2: /* replace */
- for (j=startpos; j < endpos; j++)
- *output++ = '?';
- i = endpos;
- break;
- case 3: /* ignore */
- i = endpos;
- break;
- case 4: /* xmlcharrefreplace */
- /* generate replacement */
- for (j=startpos; j < endpos; j++) {
- ch = PyUnicode_READ(kind, data, i);
- output += sprintf(output, "&#%d;", (int)ch);
- i++;
- }
- break;
- default:
- {
- PyObject *repunicode;
- Py_ssize_t repsize, newpos, k;
- enum PyUnicode_Kind repkind;
- void *repdata;
-
- repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
- encoding, reason, unicode, &exc,
- startpos, endpos, &newpos);
- if (repunicode == NULL)
- goto onError;
- if (!PyUnicode_Check(repunicode)) {
- /* Byte results not supported, since they have no decimal property. */
- PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
- Py_DECREF(repunicode);
- goto onError;
- }
- if (PyUnicode_READY(repunicode) < 0) {
- Py_DECREF(repunicode);
- goto onError;
- }
- repkind = PyUnicode_KIND(repunicode);
- repdata = PyUnicode_DATA(repunicode);
- /* generate replacement */
- repsize = PyUnicode_GET_SIZE(repunicode);
- for (k=0; k<repsize; k++) {
- ch = PyUnicode_READ(repkind, repdata, k);
- if (Py_UNICODE_ISSPACE(ch))
- *output++ = ' ';
- else {
- decimal = Py_UNICODE_TODECIMAL(ch);
- if (decimal >= 0)
- *output++ = '0' + decimal;
- else if (0 < ch && ch < 256)
- *output++ = (char)ch;
- else {
- Py_DECREF(repunicode);
- raise_encode_exception(&exc, encoding,
- unicode, startpos, endpos,
- reason);
- goto onError;
- }
- }
- }
- i = newpos;
- Py_DECREF(repunicode);
- }
- }
+ startpos = i;
+ exc = NULL;
+ raise_encode_exception(&exc, "decimal", unicode,
+ startpos, startpos+1,
+ "invalid decimal Unicode string");
+ Py_XDECREF(exc);
+ Py_DECREF(unicode);
+ return -1;
}
/* 0-terminate the output string */
*output++ = '\0';
- Py_XDECREF(exc);
- Py_XDECREF(errorHandler);
Py_DECREF(unicode);
return 0;
-
- onError:
- Py_XDECREF(exc);
- Py_XDECREF(errorHandler);
- Py_DECREF(unicode);
- return -1;
}
/* --- Helpers ------------------------------------------------------------ */