From ece58deb9fd72674b84ef7a01c944b5eed6b37a1 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 23 Apr 2012 23:36:38 +0200 Subject: Close #14648: Compute correctly maxchar in str.format() for substrin --- Include/unicodeobject.h | 9 +++++++++ Lib/test/test_unicode.py | 10 ++++++++-- Objects/unicodeobject.c | 31 +++++++++++++++++++++++++++++++ Python/formatter_unicode.c | 6 ++---- 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 8f74995..486d4fa 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -710,6 +710,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_Substring( Py_ssize_t start, Py_ssize_t end); +#ifndef Py_LIMITED_API +/* Compute the maximum character of the substring unicode[start:end]. + Return 127 for an empty string. */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t end); +#endif + /* Copy the string into a UCS4 buffer including the null character if copy_null is set. Return NULL and raise an exception on error. Raise a ValueError if the buffer is smaller than the string. Return buffer on success. diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 7b0397e..8468fbf 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -924,6 +924,14 @@ class UnicodeTest(string_tests.CommonTest, self.assertRaises(ValueError, format, '', '#') self.assertRaises(ValueError, format, '', '#20') + # Non-ASCII + self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"), + 'ABC\u0410\u0411\u0412') + self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"), + 'ABC') + self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"), + '') + def test_format_map(self): self.assertEqual(''.format_map({}), '') self.assertEqual('a'.format_map({}), 'a') @@ -1056,8 +1064,6 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual('%f' % INF, 'inf') self.assertEqual('%F' % INF, 'INF') - self.assertEqual(format("\u0410\u0411\u0412", "s"), "АБВ") - def test_startswith_endswith_errors(self): for meth in ('foo'.startswith, 'foo'.endswith): with self.assertRaises(TypeError) as cm: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7e73bc2..2b90cfa 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1957,6 +1957,37 @@ PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size) } } +Py_UCS4 +_PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end) +{ + enum PyUnicode_Kind kind; + void *startptr, *endptr; + + assert(PyUnicode_IS_READY(unicode)); + assert(0 <= start); + assert(end <= PyUnicode_GET_LENGTH(unicode)); + assert(start <= end); + + if (start == 0 && end == PyUnicode_GET_LENGTH(unicode)) + return PyUnicode_MAX_CHAR_VALUE(unicode); + + if (start == end) + return 127; + + kind = PyUnicode_KIND(unicode); + startptr = PyUnicode_DATA(unicode); + endptr = (char*)startptr + end * kind; + if (start) + startptr = (char*)startptr + start * kind; + switch(kind) + { + case PyUnicode_1BYTE_KIND: return ucs1lib_find_max_char(startptr, endptr); + case PyUnicode_2BYTE_KIND: return ucs2lib_find_max_char(startptr, endptr); + default: + case PyUnicode_4BYTE_KIND: return ucs4lib_find_max_char(startptr, endptr); + } +} + /* Ensure that a string uses the most efficient storage, if it is not the case: create a new string with of the right kind. Write NULL into *p_unicode on error. */ diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 5e5b19f..e1c00df 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -716,7 +716,7 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format) Py_ssize_t pos; Py_ssize_t len = PyUnicode_GET_LENGTH(value); PyObject *result = NULL; - Py_UCS4 maxchar = 127; + Py_UCS4 maxchar; /* sign is not allowed on strings */ if (format->sign != '\0') { @@ -747,11 +747,9 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format) len = format->precision; } - if (len) - maxchar = PyUnicode_MAX_CHAR_VALUE(value); - calc_padding(len, format->width, format->align, &lpad, &rpad, &total); + maxchar = _PyUnicode_FindMaxChar(value, 0, len); if (lpad != 0 || rpad != 0) maxchar = Py_MAX(maxchar, format->fill_char); -- cgit v0.12