From 03dab786b2f504791ac46a9f9b9db82e634efd05 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 14 Oct 2015 00:21:35 +0200 Subject: Rewrite PyBytes_FromFormatV() using _PyBytesWriter API * Add much more unit tests on PyBytes_FromFormatV() * Remove the first loop to compute the length of the output string * Use _PyBytesWriter to handle the bytes buffer, use overallocation * Cleanup the code to make simpler and easier to review --- Lib/test/test_bytes.py | 92 ++++++++++++-- Objects/bytesobject.c | 336 ++++++++++++++++++++++++------------------------- 2 files changed, 245 insertions(+), 183 deletions(-) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 53a80f4..5fe193e 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -783,25 +783,93 @@ class BytesTest(BaseBytesTest, unittest.TestCase): # Test PyBytes_FromFormat() def test_from_format(self): test.support.import_module('ctypes') - from ctypes import pythonapi, py_object, c_int, c_char_p + _testcapi = test.support.import_module('_testcapi') + from ctypes import pythonapi, py_object + from ctypes import ( + c_int, c_uint, + c_long, c_ulong, + c_size_t, c_ssize_t, + c_char_p) + PyBytes_FromFormat = pythonapi.PyBytes_FromFormat PyBytes_FromFormat.restype = py_object + # basic tests self.assertEqual(PyBytes_FromFormat(b'format'), b'format') - + self.assertEqual(PyBytes_FromFormat(b'Hello %s !', b'world'), + b'Hello world !') + + # test formatters + self.assertEqual(PyBytes_FromFormat(b'c=%c', c_int(0)), + b'c=\0') + self.assertEqual(PyBytes_FromFormat(b'c=%c', c_int(ord('@'))), + b'c=@') + self.assertEqual(PyBytes_FromFormat(b'c=%c', c_int(255)), + b'c=\xff') + self.assertEqual(PyBytes_FromFormat(b'd=%d ld=%ld zd=%zd', + c_int(1), c_long(2), + c_size_t(3)), + b'd=1 ld=2 zd=3') + self.assertEqual(PyBytes_FromFormat(b'd=%d ld=%ld zd=%zd', + c_int(-1), c_long(-2), + c_size_t(-3)), + b'd=-1 ld=-2 zd=-3') + self.assertEqual(PyBytes_FromFormat(b'u=%u lu=%lu zu=%zu', + c_uint(123), c_ulong(456), + c_size_t(789)), + b'u=123 lu=456 zu=789') + self.assertEqual(PyBytes_FromFormat(b'i=%i', c_int(123)), + b'i=123') + self.assertEqual(PyBytes_FromFormat(b'i=%i', c_int(-123)), + b'i=-123') + self.assertEqual(PyBytes_FromFormat(b'x=%x', c_int(0xabc)), + b'x=abc') + self.assertEqual(PyBytes_FromFormat(b'ptr=%p', + c_char_p(0xabcdef)), + b'ptr=0xabcdef') + self.assertEqual(PyBytes_FromFormat(b's=%s', c_char_p(b'cstr')), + b's=cstr') + + # test minimum and maximum integer values + size_max = c_size_t(-1).value + for formatstr, ctypes_type, value, py_formatter in ( + (b'%d', c_int, _testcapi.INT_MIN, str), + (b'%d', c_int, _testcapi.INT_MAX, str), + (b'%ld', c_long, _testcapi.LONG_MIN, str), + (b'%ld', c_long, _testcapi.LONG_MAX, str), + (b'%lu', c_ulong, _testcapi.ULONG_MAX, str), + (b'%zd', c_ssize_t, _testcapi.PY_SSIZE_T_MIN, str), + (b'%zd', c_ssize_t, _testcapi.PY_SSIZE_T_MAX, str), + (b'%zu', c_size_t, size_max, str), + (b'%p', c_char_p, size_max, lambda value: '%#x' % value), + ): + self.assertEqual(PyBytes_FromFormat(formatstr, ctypes_type(value)), + py_formatter(value).encode('ascii')), + + # width and precision (width is currently ignored) + self.assertEqual(PyBytes_FromFormat(b'%5s', b'a'), + b'a') + self.assertEqual(PyBytes_FromFormat(b'%.3s', b'abcdef'), + b'abc') + + # '%%' formatter + self.assertEqual(PyBytes_FromFormat(b'%%'), + b'%') + self.assertEqual(PyBytes_FromFormat(b'[%%]'), + b'[%]') + self.assertEqual(PyBytes_FromFormat(b'%%%c', c_int(ord('_'))), + b'%_') + self.assertEqual(PyBytes_FromFormat(b'%%s'), + b'%s') + + # Invalid formats and partial formatting self.assertEqual(PyBytes_FromFormat(b'%'), b'%') - self.assertEqual(PyBytes_FromFormat(b'%%'), b'%') - self.assertEqual(PyBytes_FromFormat(b'%%s'), b'%s') - self.assertEqual(PyBytes_FromFormat(b'[%%]'), b'[%]') - self.assertEqual(PyBytes_FromFormat(b'%%%c', c_int(ord('_'))), b'%_') - - self.assertEqual(PyBytes_FromFormat(b'c:%c', c_int(255)), - b'c:\xff') - self.assertEqual(PyBytes_FromFormat(b's:%s', c_char_p(b'cstr')), - b's:cstr') + self.assertEqual(PyBytes_FromFormat(b'x=%i y=%', c_int(2), c_int(3)), + b'x=2 y=%') - # Issue #19969 + # Issue #19969: %c must raise OverflowError for values + # not in the range [0; 255] self.assertRaises(OverflowError, PyBytes_FromFormat, b'%c', c_int(-1)) self.assertRaises(OverflowError, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index e7ab503..189673c 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -174,190 +174,184 @@ PyBytes_FromString(const char *str) PyObject * PyBytes_FromFormatV(const char *format, va_list vargs) { - va_list count; - Py_ssize_t n = 0; - const char* f; char *s; - PyObject* string; + const char *f; + const char *p; + Py_ssize_t prec; + int longflag; + int size_tflag; + /* Longest 64-bit formatted numbers: + - "18446744073709551615\0" (21 bytes) + - "-9223372036854775808\0" (21 bytes) + Decimal takes the most space (it isn't enough for octal.) + + Longest 64-bit pointer representation: + "0xffffffffffffffff\0" (19 bytes). */ + char buffer[21]; + _PyBytesWriter writer; - Py_VA_COPY(count, vargs); - /* step 1: figure out how large a buffer we need */ - for (f = format; *f; f++) { - if (*f == '%') { - const char* p = f; - while (*++f && *f != '%' && !Py_ISALPHA(*f)) - ; - - /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since - * they don't affect the amount of space we reserve. - */ - if ((*f == 'l' || *f == 'z') && - (f[1] == 'd' || f[1] == 'u')) - ++f; - - switch (*f) { - case 'c': - { - int c = va_arg(count, int); - if (c < 0 || c > 255) { - PyErr_SetString(PyExc_OverflowError, - "PyBytes_FromFormatV(): %c format " - "expects an integer in range [0; 255]"); - return NULL; - } - n++; - break; - } - case '%': - n++; - break; - case 'd': case 'u': case 'i': case 'x': - (void) va_arg(count, int); - /* 20 bytes is enough to hold a 64-bit - integer. Decimal takes the most space. - This isn't enough for octal. */ - n += 20; - break; - case 's': - s = va_arg(count, char*); - n += strlen(s); - break; - case 'p': - (void) va_arg(count, int); - /* maximum 64-bit pointer representation: - * 0xffffffffffffffff - * so 19 characters is enough. - * XXX I count 18 -- what's the extra for? - */ - n += 19; - break; - default: - /* if we stumble upon an unknown - formatting code, copy the rest of - the format string to the output - string. (we cannot just skip the - code, since there's no way to know - what's in the argument list) */ - n += strlen(p); - goto expand; - } - } else - n++; - } - expand: - /* step 2: fill the buffer */ - /* Since we've analyzed how much space we need for the worst case, - use sprintf directly instead of the slower PyOS_snprintf. */ - string = PyBytes_FromStringAndSize(NULL, n); - if (!string) + _PyBytesWriter_Init(&writer); + + s = _PyBytesWriter_Alloc(&writer, strlen(format)); + if (s == NULL) return NULL; + writer.overallocate = 1; - s = PyBytes_AsString(string); +#define WRITE_BYTES(str) \ + do { \ + s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \ + if (s == NULL) \ + goto error; \ + } while (0) for (f = format; *f; f++) { - if (*f == '%') { - const char* p = f++; - Py_ssize_t i; - int longflag = 0; - int size_tflag = 0; - /* parse the width.precision part (we're only - interested in the precision value, if any) */ - n = 0; - while (Py_ISDIGIT(*f)) - n = (n*10) + *f++ - '0'; - if (*f == '.') { - f++; - n = 0; - while (Py_ISDIGIT(*f)) - n = (n*10) + *f++ - '0'; - } - while (*f && *f != '%' && !Py_ISALPHA(*f)) - f++; - /* handle the long flag, but only for %ld and %lu. - others can be added when necessary. */ - if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { - longflag = 1; - ++f; + if (*f != '%') { + *s++ = *f; + continue; + } + + p = f++; + + /* ignore the width (ex: 10 in "%10s") */ + while (Py_ISDIGIT(*f)) + f++; + + /* parse the precision (ex: 10 in "%.10s") */ + prec = 0; + if (*f == '.') { + f++; + for (; Py_ISDIGIT(*f); f++) { + prec = (prec * 10) + (*f - '0'); } - /* handle the size_t flag. */ - if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { - size_tflag = 1; - ++f; + } + + while (*f && *f != '%' && !Py_ISALPHA(*f)) + f++; + + /* handle the long flag ('l'), but only for %ld and %lu. + others can be added when necessary. */ + longflag = 0; + if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { + longflag = 1; + ++f; + } + + /* handle the size_t flag ('z'). */ + size_tflag = 0; + if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { + size_tflag = 1; + ++f; + } + + /* substract bytes preallocated for the format string + (ex: 2 for "%s") */ + writer.min_size -= (f - p + 1); + + switch (*f) { + case 'c': + { + int c = va_arg(vargs, int); + if (c < 0 || c > 255) { + PyErr_SetString(PyExc_OverflowError, + "PyBytes_FromFormatV(): %c format " + "expects an integer in range [0; 255]"); + goto error; } + writer.min_size++; + *s++ = (unsigned char)c; + break; + } - switch (*f) { - case 'c': - { - int c = va_arg(vargs, int); - /* c has been checked for overflow in the first step */ - *s++ = (unsigned char)c; - break; + case 'd': + if (longflag) + sprintf(buffer, "%ld", va_arg(vargs, long)); + else if (size_tflag) + sprintf(buffer, "%" PY_FORMAT_SIZE_T "d", + va_arg(vargs, Py_ssize_t)); + else + sprintf(buffer, "%d", va_arg(vargs, int)); + assert(strlen(buffer) < sizeof(buffer)); + WRITE_BYTES(buffer); + break; + + case 'u': + if (longflag) + sprintf(buffer, "%lu", + va_arg(vargs, unsigned long)); + else if (size_tflag) + sprintf(buffer, "%" PY_FORMAT_SIZE_T "u", + va_arg(vargs, size_t)); + else + sprintf(buffer, "%u", + va_arg(vargs, unsigned int)); + assert(strlen(buffer) < sizeof(buffer)); + WRITE_BYTES(buffer); + break; + + case 'i': + sprintf(buffer, "%i", va_arg(vargs, int)); + assert(strlen(buffer) < sizeof(buffer)); + WRITE_BYTES(buffer); + break; + + case 'x': + sprintf(buffer, "%x", va_arg(vargs, int)); + assert(strlen(buffer) < sizeof(buffer)); + WRITE_BYTES(buffer); + break; + + case 's': + { + Py_ssize_t i; + + p = va_arg(vargs, char*); + i = strlen(p); + if (prec > 0 && i > prec) + i = prec; + s = _PyBytesWriter_WriteBytes(&writer, s, p, i); + if (s == NULL) + goto error; + break; + } + + case 'p': + sprintf(buffer, "%p", va_arg(vargs, void*)); + assert(strlen(buffer) < sizeof(buffer)); + /* %p is ill-defined: ensure leading 0x. */ + if (buffer[1] == 'X') + buffer[1] = 'x'; + else if (buffer[1] != 'x') { + memmove(buffer+2, buffer, strlen(buffer)+1); + buffer[0] = '0'; + buffer[1] = 'x'; } - case 'd': - if (longflag) - sprintf(s, "%ld", va_arg(vargs, long)); - else if (size_tflag) - sprintf(s, "%" PY_FORMAT_SIZE_T "d", - va_arg(vargs, Py_ssize_t)); - else - sprintf(s, "%d", va_arg(vargs, int)); - s += strlen(s); - break; - case 'u': - if (longflag) - sprintf(s, "%lu", - va_arg(vargs, unsigned long)); - else if (size_tflag) - sprintf(s, "%" PY_FORMAT_SIZE_T "u", - va_arg(vargs, size_t)); - else - sprintf(s, "%u", - va_arg(vargs, unsigned int)); - s += strlen(s); - break; - case 'i': - sprintf(s, "%i", va_arg(vargs, int)); - s += strlen(s); - break; - case 'x': - sprintf(s, "%x", va_arg(vargs, int)); - s += strlen(s); - break; - case 's': - p = va_arg(vargs, char*); - i = strlen(p); - if (n > 0 && i > n) - i = n; - Py_MEMCPY(s, p, i); - s += i; - break; - case 'p': - sprintf(s, "%p", va_arg(vargs, void*)); - /* %p is ill-defined: ensure leading 0x. */ - if (s[1] == 'X') - s[1] = 'x'; - else if (s[1] != 'x') { - memmove(s+2, s, strlen(s)+1); - s[0] = '0'; - s[1] = 'x'; - } - s += strlen(s); - break; - case '%': - *s++ = '%'; - break; - default: - strcpy(s, p); - s += strlen(s); - goto end; + WRITE_BYTES(buffer); + break; + + case '%': + writer.min_size++; + *s++ = '%'; + break; + + default: + if (*f == 0) { + /* fix min_size if we reached the end of the format string */ + writer.min_size++; } - } else - *s++ = *f; + + /* invalid format string: copy unformatted string and exit */ + WRITE_BYTES(p); + return _PyBytesWriter_Finish(&writer, s); + } } - end: - _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string)); - return string; +#undef WRITE_BYTES + + return _PyBytesWriter_Finish(&writer, s); + + error: + _PyBytesWriter_Dealloc(&writer); + return NULL; } PyObject * -- cgit v0.12