diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2015-10-13 22:21:35 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2015-10-13 22:21:35 (GMT) |
commit | 03dab786b2f504791ac46a9f9b9db82e634efd05 (patch) | |
tree | 148e1a5f699bbdd43647370140065af1b69e78be /Objects | |
parent | b6d84832bfa1f453d3c73d6db0982aee0e6d04fe (diff) | |
download | cpython-03dab786b2f504791ac46a9f9b9db82e634efd05.zip cpython-03dab786b2f504791ac46a9f9b9db82e634efd05.tar.gz cpython-03dab786b2f504791ac46a9f9b9db82e634efd05.tar.bz2 |
Rewrite PyBytes_FromFormatV() using _PyBytesWriter API
* Add much more unit tests on PyBytes_FromFormatV()
* Remove the first loop to compute the length of the output string
* Use _PyBytesWriter to handle the bytes buffer, use overallocation
* Cleanup the code to make simpler and easier to review
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/bytesobject.c | 336 |
1 files changed, 165 insertions, 171 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index e7ab503..189673c 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -174,190 +174,184 @@ PyBytes_FromString(const char *str) PyObject * PyBytes_FromFormatV(const char *format, va_list vargs) { - va_list count; - Py_ssize_t n = 0; - const char* f; char *s; - PyObject* string; + const char *f; + const char *p; + Py_ssize_t prec; + int longflag; + int size_tflag; + /* Longest 64-bit formatted numbers: + - "18446744073709551615\0" (21 bytes) + - "-9223372036854775808\0" (21 bytes) + Decimal takes the most space (it isn't enough for octal.) + + Longest 64-bit pointer representation: + "0xffffffffffffffff\0" (19 bytes). */ + char buffer[21]; + _PyBytesWriter writer; - Py_VA_COPY(count, vargs); - /* step 1: figure out how large a buffer we need */ - for (f = format; *f; f++) { - if (*f == '%') { - const char* p = f; - while (*++f && *f != '%' && !Py_ISALPHA(*f)) - ; - - /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since - * they don't affect the amount of space we reserve. - */ - if ((*f == 'l' || *f == 'z') && - (f[1] == 'd' || f[1] == 'u')) - ++f; - - switch (*f) { - case 'c': - { - int c = va_arg(count, int); - if (c < 0 || c > 255) { - PyErr_SetString(PyExc_OverflowError, - "PyBytes_FromFormatV(): %c format " - "expects an integer in range [0; 255]"); - return NULL; - } - n++; - break; - } - case '%': - n++; - break; - case 'd': case 'u': case 'i': case 'x': - (void) va_arg(count, int); - /* 20 bytes is enough to hold a 64-bit - integer. Decimal takes the most space. - This isn't enough for octal. */ - n += 20; - break; - case 's': - s = va_arg(count, char*); - n += strlen(s); - break; - case 'p': - (void) va_arg(count, int); - /* maximum 64-bit pointer representation: - * 0xffffffffffffffff - * so 19 characters is enough. - * XXX I count 18 -- what's the extra for? - */ - n += 19; - break; - default: - /* if we stumble upon an unknown - formatting code, copy the rest of - the format string to the output - string. (we cannot just skip the - code, since there's no way to know - what's in the argument list) */ - n += strlen(p); - goto expand; - } - } else - n++; - } - expand: - /* step 2: fill the buffer */ - /* Since we've analyzed how much space we need for the worst case, - use sprintf directly instead of the slower PyOS_snprintf. */ - string = PyBytes_FromStringAndSize(NULL, n); - if (!string) + _PyBytesWriter_Init(&writer); + + s = _PyBytesWriter_Alloc(&writer, strlen(format)); + if (s == NULL) return NULL; + writer.overallocate = 1; - s = PyBytes_AsString(string); +#define WRITE_BYTES(str) \ + do { \ + s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \ + if (s == NULL) \ + goto error; \ + } while (0) for (f = format; *f; f++) { - if (*f == '%') { - const char* p = f++; - Py_ssize_t i; - int longflag = 0; - int size_tflag = 0; - /* parse the width.precision part (we're only - interested in the precision value, if any) */ - n = 0; - while (Py_ISDIGIT(*f)) - n = (n*10) + *f++ - '0'; - if (*f == '.') { - f++; - n = 0; - while (Py_ISDIGIT(*f)) - n = (n*10) + *f++ - '0'; - } - while (*f && *f != '%' && !Py_ISALPHA(*f)) - f++; - /* handle the long flag, but only for %ld and %lu. - others can be added when necessary. */ - if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { - longflag = 1; - ++f; + if (*f != '%') { + *s++ = *f; + continue; + } + + p = f++; + + /* ignore the width (ex: 10 in "%10s") */ + while (Py_ISDIGIT(*f)) + f++; + + /* parse the precision (ex: 10 in "%.10s") */ + prec = 0; + if (*f == '.') { + f++; + for (; Py_ISDIGIT(*f); f++) { + prec = (prec * 10) + (*f - '0'); } - /* handle the size_t flag. */ - if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { - size_tflag = 1; - ++f; + } + + while (*f && *f != '%' && !Py_ISALPHA(*f)) + f++; + + /* handle the long flag ('l'), but only for %ld and %lu. + others can be added when necessary. */ + longflag = 0; + if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { + longflag = 1; + ++f; + } + + /* handle the size_t flag ('z'). */ + size_tflag = 0; + if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { + size_tflag = 1; + ++f; + } + + /* substract bytes preallocated for the format string + (ex: 2 for "%s") */ + writer.min_size -= (f - p + 1); + + switch (*f) { + case 'c': + { + int c = va_arg(vargs, int); + if (c < 0 || c > 255) { + PyErr_SetString(PyExc_OverflowError, + "PyBytes_FromFormatV(): %c format " + "expects an integer in range [0; 255]"); + goto error; } + writer.min_size++; + *s++ = (unsigned char)c; + break; + } - switch (*f) { - case 'c': - { - int c = va_arg(vargs, int); - /* c has been checked for overflow in the first step */ - *s++ = (unsigned char)c; - break; + case 'd': + if (longflag) + sprintf(buffer, "%ld", va_arg(vargs, long)); + else if (size_tflag) + sprintf(buffer, "%" PY_FORMAT_SIZE_T "d", + va_arg(vargs, Py_ssize_t)); + else + sprintf(buffer, "%d", va_arg(vargs, int)); + assert(strlen(buffer) < sizeof(buffer)); + WRITE_BYTES(buffer); + break; + + case 'u': + if (longflag) + sprintf(buffer, "%lu", + va_arg(vargs, unsigned long)); + else if (size_tflag) + sprintf(buffer, "%" PY_FORMAT_SIZE_T "u", + va_arg(vargs, size_t)); + else + sprintf(buffer, "%u", + va_arg(vargs, unsigned int)); + assert(strlen(buffer) < sizeof(buffer)); + WRITE_BYTES(buffer); + break; + + case 'i': + sprintf(buffer, "%i", va_arg(vargs, int)); + assert(strlen(buffer) < sizeof(buffer)); + WRITE_BYTES(buffer); + break; + + case 'x': + sprintf(buffer, "%x", va_arg(vargs, int)); + assert(strlen(buffer) < sizeof(buffer)); + WRITE_BYTES(buffer); + break; + + case 's': + { + Py_ssize_t i; + + p = va_arg(vargs, char*); + i = strlen(p); + if (prec > 0 && i > prec) + i = prec; + s = _PyBytesWriter_WriteBytes(&writer, s, p, i); + if (s == NULL) + goto error; + break; + } + + case 'p': + sprintf(buffer, "%p", va_arg(vargs, void*)); + assert(strlen(buffer) < sizeof(buffer)); + /* %p is ill-defined: ensure leading 0x. */ + if (buffer[1] == 'X') + buffer[1] = 'x'; + else if (buffer[1] != 'x') { + memmove(buffer+2, buffer, strlen(buffer)+1); + buffer[0] = '0'; + buffer[1] = 'x'; } - case 'd': - if (longflag) - sprintf(s, "%ld", va_arg(vargs, long)); - else if (size_tflag) - sprintf(s, "%" PY_FORMAT_SIZE_T "d", - va_arg(vargs, Py_ssize_t)); - else - sprintf(s, "%d", va_arg(vargs, int)); - s += strlen(s); - break; - case 'u': - if (longflag) - sprintf(s, "%lu", - va_arg(vargs, unsigned long)); - else if (size_tflag) - sprintf(s, "%" PY_FORMAT_SIZE_T "u", - va_arg(vargs, size_t)); - else - sprintf(s, "%u", - va_arg(vargs, unsigned int)); - s += strlen(s); - break; - case 'i': - sprintf(s, "%i", va_arg(vargs, int)); - s += strlen(s); - break; - case 'x': - sprintf(s, "%x", va_arg(vargs, int)); - s += strlen(s); - break; - case 's': - p = va_arg(vargs, char*); - i = strlen(p); - if (n > 0 && i > n) - i = n; - Py_MEMCPY(s, p, i); - s += i; - break; - case 'p': - sprintf(s, "%p", va_arg(vargs, void*)); - /* %p is ill-defined: ensure leading 0x. */ - if (s[1] == 'X') - s[1] = 'x'; - else if (s[1] != 'x') { - memmove(s+2, s, strlen(s)+1); - s[0] = '0'; - s[1] = 'x'; - } - s += strlen(s); - break; - case '%': - *s++ = '%'; - break; - default: - strcpy(s, p); - s += strlen(s); - goto end; + WRITE_BYTES(buffer); + break; + + case '%': + writer.min_size++; + *s++ = '%'; + break; + + default: + if (*f == 0) { + /* fix min_size if we reached the end of the format string */ + writer.min_size++; } - } else - *s++ = *f; + + /* invalid format string: copy unformatted string and exit */ + WRITE_BYTES(p); + return _PyBytesWriter_Finish(&writer, s); + } } - end: - _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string)); - return string; +#undef WRITE_BYTES + + return _PyBytesWriter_Finish(&writer, s); + + error: + _PyBytesWriter_Dealloc(&writer); + return NULL; } PyObject * |