diff options
-rw-r--r-- | Lib/test/test_unicode.py | 6 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 12 |
2 files changed, 14 insertions, 4 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index c63364a..b20f878 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1824,6 +1824,12 @@ class UnicodeTest(string_tests.CommonTest, b'123€') self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"), b'123\\u20ac') + self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"), + b'123? ') + self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"), + b'123??') + self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"), + b'123?0') def test_transform_decimal(self): from _testcapi import unicode_transformdecimaltoascii as transform_decimal diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7f079e7..16db801 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8875,22 +8875,25 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, kind = PyUnicode_KIND(unicode); data = PyUnicode_DATA(unicode); - for (i=0; i < length; i++) { + for (i=0; i < length; ) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); int decimal; Py_ssize_t startpos, endpos; if (Py_UNICODE_ISSPACE(ch)) { *output++ = ' '; + i++; continue; } decimal = Py_UNICODE_TODECIMAL(ch); if (decimal >= 0) { *output++ = '0' + decimal; + i++; continue; } if (0 < ch && ch < 256) { *output++ = (char)ch; + i++; continue; } /* All other characters are considered unencodable */ @@ -8899,8 +8902,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, for (; endpos < length; endpos++) { ch = PyUnicode_READ(kind, data, endpos); if ((0 < ch && ch < 256) || - !Py_UNICODE_ISSPACE(ch) || - Py_UNICODE_TODECIMAL(ch)) + Py_UNICODE_ISSPACE(ch) || + 0 <= Py_UNICODE_TODECIMAL(ch)) break; } /* cache callback name lookup @@ -8924,7 +8927,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, case 2: /* replace */ for (j=startpos; j < endpos; j++) *output++ = '?'; - /* fall through */ + i = endpos; + break; case 3: /* ignore */ i = endpos; break; |