summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-04-23 21:36:38 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-04-23 21:36:38 (GMT)
commitece58deb9fd72674b84ef7a01c944b5eed6b37a1 (patch)
tree040cbeefd99fc35b73a9a9505efb58a61177eda4
parent0b7d7c95448e157d4376751add831aecbd53808e (diff)
downloadcpython-ece58deb9fd72674b84ef7a01c944b5eed6b37a1.zip
cpython-ece58deb9fd72674b84ef7a01c944b5eed6b37a1.tar.gz
cpython-ece58deb9fd72674b84ef7a01c944b5eed6b37a1.tar.bz2
Close #14648: Compute correctly maxchar in str.format() for substrin
-rw-r--r--Include/unicodeobject.h9
-rw-r--r--Lib/test/test_unicode.py10
-rw-r--r--Objects/unicodeobject.c31
-rw-r--r--Python/formatter_unicode.c6
4 files changed, 50 insertions, 6 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 8f74995..486d4fa 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -710,6 +710,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_Substring(
Py_ssize_t start,
Py_ssize_t end);
+#ifndef Py_LIMITED_API
+/* Compute the maximum character of the substring unicode[start:end].
+ Return 127 for an empty string. */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
+ PyObject *unicode,
+ Py_ssize_t start,
+ Py_ssize_t end);
+#endif
+
/* Copy the string into a UCS4 buffer including the null character if copy_null
is set. Return NULL and raise an exception on error. Raise a ValueError if
the buffer is smaller than the string. Return buffer on success.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 7b0397e..8468fbf 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -924,6 +924,14 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(ValueError, format, '', '#')
self.assertRaises(ValueError, format, '', '#20')
+ # Non-ASCII
+ self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"),
+ 'ABC\u0410\u0411\u0412')
+ self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"),
+ 'ABC')
+ self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"),
+ '')
+
def test_format_map(self):
self.assertEqual(''.format_map({}), '')
self.assertEqual('a'.format_map({}), 'a')
@@ -1056,8 +1064,6 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('%f' % INF, 'inf')
self.assertEqual('%F' % INF, 'INF')
- self.assertEqual(format("\u0410\u0411\u0412", "s"), "АБВ")
-
def test_startswith_endswith_errors(self):
for meth in ('foo'.startswith, 'foo'.endswith):
with self.assertRaises(TypeError) as cm:
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7e73bc2..2b90cfa 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1957,6 +1957,37 @@ PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
}
}
+Py_UCS4
+_PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
+{
+ enum PyUnicode_Kind kind;
+ void *startptr, *endptr;
+
+ assert(PyUnicode_IS_READY(unicode));
+ assert(0 <= start);
+ assert(end <= PyUnicode_GET_LENGTH(unicode));
+ assert(start <= end);
+
+ if (start == 0 && end == PyUnicode_GET_LENGTH(unicode))
+ return PyUnicode_MAX_CHAR_VALUE(unicode);
+
+ if (start == end)
+ return 127;
+
+ kind = PyUnicode_KIND(unicode);
+ startptr = PyUnicode_DATA(unicode);
+ endptr = (char*)startptr + end * kind;
+ if (start)
+ startptr = (char*)startptr + start * kind;
+ switch(kind)
+ {
+ case PyUnicode_1BYTE_KIND: return ucs1lib_find_max_char(startptr, endptr);
+ case PyUnicode_2BYTE_KIND: return ucs2lib_find_max_char(startptr, endptr);
+ default:
+ case PyUnicode_4BYTE_KIND: return ucs4lib_find_max_char(startptr, endptr);
+ }
+}
+
/* Ensure that a string uses the most efficient storage, if it is not the
case: create a new string with of the right kind. Write NULL into *p_unicode
on error. */
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index 5e5b19f..e1c00df 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -716,7 +716,7 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
Py_ssize_t pos;
Py_ssize_t len = PyUnicode_GET_LENGTH(value);
PyObject *result = NULL;
- Py_UCS4 maxchar = 127;
+ Py_UCS4 maxchar;
/* sign is not allowed on strings */
if (format->sign != '\0') {
@@ -747,11 +747,9 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
len = format->precision;
}
- if (len)
- maxchar = PyUnicode_MAX_CHAR_VALUE(value);
-
calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
+ maxchar = _PyUnicode_FindMaxChar(value, 0, len);
if (lpad != 0 || rpad != 0)
maxchar = Py_MAX(maxchar, format->fill_char);