From 08a81df05004147ee174ece645679576ab867860 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Jan 2019 09:22:52 +0200 Subject: bpo-33817: Fix _PyString_Resize() and _PyUnicode_Resize() for empty strings. (GH-11515) --- Lib/test/test_str.py | 100 ++++++++++++++++++++- Lib/test/test_unicode.py | 6 ++ .../C API/2019-01-11-11-16-36.bpo-33817.qyYxjw.rst | 4 + Objects/stringobject.c | 22 ++++- Objects/unicodeobject.c | 19 +++- 5 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2019-01-11-11-16-36.bpo-33817.qyYxjw.rst diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 8b306f4..73ed542 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -474,8 +474,106 @@ class StrTest( self.assertEqual('lhs %% %r' % SubclassedStr('rhs'), "Success, self.__rmod__('lhs %% %r') was called") + +class CAPITest(unittest.TestCase): + + # Test PyString_FromFormat() + def test_from_format(self): + ctypes = test_support.import_module('ctypes') + _testcapi = test_support.import_module('_testcapi') + from ctypes import pythonapi, py_object + from ctypes import ( + c_int, c_uint, + c_long, c_ulong, + c_size_t, c_ssize_t, + c_char_p) + + PyString_FromFormat = pythonapi.PyString_FromFormat + PyString_FromFormat.restype = py_object + + # basic tests + self.assertEqual(PyString_FromFormat(b'format'), + b'format') + self.assertEqual(PyString_FromFormat(b'Hello %s !', b'world'), + b'Hello world !') + + # test formatters + self.assertEqual(PyString_FromFormat(b'c=%c', c_int(0)), + b'c=\0') + self.assertEqual(PyString_FromFormat(b'c=%c', c_int(ord('@'))), + b'c=@') + self.assertEqual(PyString_FromFormat(b'c=%c', c_int(255)), + b'c=\xff') + self.assertEqual(PyString_FromFormat(b'd=%d ld=%ld zd=%zd', + c_int(1), c_long(2), + c_size_t(3)), + b'd=1 ld=2 zd=3') + self.assertEqual(PyString_FromFormat(b'd=%d ld=%ld zd=%zd', + c_int(-1), c_long(-2), + c_size_t(-3)), + b'd=-1 ld=-2 zd=-3') + self.assertEqual(PyString_FromFormat(b'u=%u lu=%lu zu=%zu', + c_uint(123), c_ulong(456), + c_size_t(789)), + b'u=123 lu=456 zu=789') + self.assertEqual(PyString_FromFormat(b'i=%i', c_int(123)), + b'i=123') + self.assertEqual(PyString_FromFormat(b'i=%i', c_int(-123)), + b'i=-123') + self.assertEqual(PyString_FromFormat(b'x=%x', c_int(0xabc)), + b'x=abc') + + self.assertEqual(PyString_FromFormat(b's=%s', c_char_p(b'cstr')), + b's=cstr') + + # test minimum and maximum integer values + size_max = c_size_t(-1).value + for formatstr, ctypes_type, value, py_formatter in ( + (b'%d', c_int, _testcapi.INT_MIN, str), + (b'%d', c_int, _testcapi.INT_MAX, str), + (b'%ld', c_long, _testcapi.LONG_MIN, str), + (b'%ld', c_long, _testcapi.LONG_MAX, str), + (b'%lu', c_ulong, _testcapi.ULONG_MAX, str), + (b'%zd', c_ssize_t, _testcapi.PY_SSIZE_T_MIN, str), + (b'%zd', c_ssize_t, _testcapi.PY_SSIZE_T_MAX, str), + (b'%zu', c_size_t, size_max, str), + ): + self.assertEqual(PyString_FromFormat(formatstr, ctypes_type(value)), + py_formatter(value).encode('ascii')), + + # width and precision (width is currently ignored) + self.assertEqual(PyString_FromFormat(b'%5s', b'a'), + b'a') + self.assertEqual(PyString_FromFormat(b'%.3s', b'abcdef'), + b'abc') + + # '%%' formatter + self.assertEqual(PyString_FromFormat(b'%%'), + b'%') + self.assertEqual(PyString_FromFormat(b'[%%]'), + b'[%]') + self.assertEqual(PyString_FromFormat(b'%%%c', c_int(ord('_'))), + b'%_') + self.assertEqual(PyString_FromFormat(b'%%s'), + b'%s') + + # Invalid formats and partial formatting + self.assertEqual(PyString_FromFormat(b'%'), b'%') + self.assertEqual(PyString_FromFormat(b'x=%i y=%', c_int(2), c_int(3)), + b'x=2 y=%') + + self.assertEqual(PyString_FromFormat(b'%c', c_int(-1)), b'\xff') + self.assertEqual(PyString_FromFormat(b'%c', c_int(256)), b'\0') + + # Issue #33817: empty strings + self.assertEqual(PyString_FromFormat(b''), + b'') + self.assertEqual(PyString_FromFormat(b'%s', b''), + b'') + + def test_main(): - test_support.run_unittest(StrTest) + test_support.run_unittest(StrTest, CAPITest) if __name__ == "__main__": test_main() diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 560b84c..92476f6 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1824,6 +1824,12 @@ class CAPITest(unittest.TestCase): check_format(u'%s', b'%.%s', b'abc') + # Issue #33817: empty strings + check_format(u'', + b'') + check_format(u'', + b'%s', b'') + @test_support.cpython_only def test_encode_decimal(self): from _testcapi import unicode_encodedecimal diff --git a/Misc/NEWS.d/next/C API/2019-01-11-11-16-36.bpo-33817.qyYxjw.rst b/Misc/NEWS.d/next/C API/2019-01-11-11-16-36.bpo-33817.qyYxjw.rst new file mode 100644 index 0000000..c7360ec --- /dev/null +++ b/Misc/NEWS.d/next/C API/2019-01-11-11-16-36.bpo-33817.qyYxjw.rst @@ -0,0 +1,4 @@ +Fixed :c:func:`_PyString_Resize` and :c:func:`_PyUnicode_Resize` for empty +strings. This fixed also :c:func:`PyString_FromFormat` and +:c:func:`PyUnicode_FromFormat` when they return an empty string (e.g. +``PyString_FromFormat("%s", "")``). diff --git a/Objects/stringobject.c b/Objects/stringobject.c index b21afb4..efb0d14 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -3893,13 +3893,31 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize) register PyObject *v; register PyStringObject *sv; v = *pv; - if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 || - PyString_CHECK_INTERNED(v)) { + if (!PyString_Check(v) || newsize < 0) { *pv = 0; Py_DECREF(v); PyErr_BadInternalCall(); return -1; } + if (Py_SIZE(v) == 0) { + if (newsize == 0) { + return 0; + } + *pv = PyString_FromStringAndSize(NULL, newsize); + Py_DECREF(v); + return (*pv == NULL) ? -1 : 0; + } + if (Py_REFCNT(v) != 1 || PyString_CHECK_INTERNED(v)) { + *pv = 0; + Py_DECREF(v); + PyErr_BadInternalCall(); + return -1; + } + if (newsize == 0) { + *pv = PyString_FromStringAndSize(NULL, 0); + Py_DECREF(v); + return (*pv == NULL) ? -1 : 0; + } /* XXX UNREF/NEWREF interface should be more symmetrical */ _Py_DEC_REFTOTAL; _Py_ForgetReference(v); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 21d994c..a859fa0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -421,10 +421,27 @@ int _PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length) return -1; } v = *unicode; - if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) { + if (v == NULL || !PyUnicode_Check(v) || length < 0) { PyErr_BadInternalCall(); return -1; } + if (v->length == 0) { + if (length == 0) { + return 0; + } + *unicode = _PyUnicode_New(length); + Py_DECREF(v); + return (*unicode == NULL) ? -1 : 0; + } + if (Py_REFCNT(v) != 1) { + PyErr_BadInternalCall(); + return -1; + } + if (length == 0) { + *unicode = _PyUnicode_New(0); + Py_DECREF(v); + return (*unicode == NULL) ? -1 : 0; + } /* Resizing unicode_empty and single character objects is not possible since these are being shared. We simply return a fresh -- cgit v0.12