summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_unicode.py6
-rw-r--r--Objects/unicodeobject.c12
2 files changed, 14 insertions, 4 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index c63364a..b20f878 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1824,6 +1824,12 @@ class UnicodeTest(string_tests.CommonTest,
b'123€')
self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
b'123\\u20ac')
+ self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
+ b'123? ')
+ self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
+ b'123??')
+ self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
+ b'123?0')
def test_transform_decimal(self):
from _testcapi import unicode_transformdecimaltoascii as transform_decimal
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7f079e7..16db801 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8875,22 +8875,25 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode);
- for (i=0; i < length; i++) {
+ for (i=0; i < length; ) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
int decimal;
Py_ssize_t startpos, endpos;
if (Py_UNICODE_ISSPACE(ch)) {
*output++ = ' ';
+ i++;
continue;
}
decimal = Py_UNICODE_TODECIMAL(ch);
if (decimal >= 0) {
*output++ = '0' + decimal;
+ i++;
continue;
}
if (0 < ch && ch < 256) {
*output++ = (char)ch;
+ i++;
continue;
}
/* All other characters are considered unencodable */
@@ -8899,8 +8902,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
for (; endpos < length; endpos++) {
ch = PyUnicode_READ(kind, data, endpos);
if ((0 < ch && ch < 256) ||
- !Py_UNICODE_ISSPACE(ch) ||
- Py_UNICODE_TODECIMAL(ch))
+ Py_UNICODE_ISSPACE(ch) ||
+ 0 <= Py_UNICODE_TODECIMAL(ch))
break;
}
/* cache callback name lookup
@@ -8924,7 +8927,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
case 2: /* replace */
for (j=startpos; j < endpos; j++)
*output++ = '?';
- /* fall through */
+ i = endpos;
+ break;
case 3: /* ignore */
i = endpos;
break;