diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 236 |
1 files changed, 229 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7e455a5..e77b65d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -393,15 +393,9 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, return (PyObject *)unicode; } -PyObject *PyUnicode_FromString(const char *u) +PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) { PyUnicodeObject *unicode; - size_t size = strlen(u); - if (size > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, "input too long"); - return NULL; - } - /* If the Unicode data is known at construction time, we can apply some optimizations which share commonly used objects. */ if (u != NULL) { @@ -441,6 +435,17 @@ PyObject *PyUnicode_FromString(const char *u) return (PyObject *)unicode; } +PyObject *PyUnicode_FromString(const char *u) +{ + size_t size = strlen(u); + if (size > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "input too long"); + return NULL; + } + + return PyUnicode_FromStringAndSize(u, size); +} + #ifdef HAVE_WCHAR_H PyObject *PyUnicode_FromWideChar(register const wchar_t *w, @@ -473,6 +478,223 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w, return (PyObject *)unicode; } +#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;} + +PyObject * +PyUnicode_FromFormatV(const char *format, va_list vargs) +{ + va_list count; + Py_ssize_t n = 0; + const char* f; + Py_UNICODE *s; + PyObject *string; + /* used by sprintf */ + char buffer[21]; + const char *copy; + +#ifdef VA_LIST_IS_ARRAY + Py_MEMCPY(count, vargs, sizeof(va_list)); +#else +#ifdef __va_copy + __va_copy(count, vargs); +#else + count = vargs; +#endif +#endif + /* step 1: figure out how large a buffer we need */ + for (f = format; *f; f++) { + if (*f == '%') { + const char* p = f; + while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f))) + ; + + /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since + * they don't affect the amount of space we reserve. + */ + if ((*f == 'l' || *f == 'z') && + (f[1] == 'd' || f[1] == 'u')) + ++f; + + switch (*f) { + case 'c': + (void)va_arg(count, int); + /* fall through... */ + case '%': + n++; + break; + case 'd': case 'u': case 'i': case 'x': + (void) va_arg(count, int); + /* 20 bytes is enough to hold a 64-bit + integer. Decimal takes the most space. + This isn't enough for octal. */ + n += 20; + break; + case 's': + n += strlen(va_arg(count, char*)); + break; + case 'U': + { + PyObject *obj = va_arg(count, PyObject *); + assert(obj && PyUnicode_Check(obj)); + n += PyUnicode_GET_SIZE(obj); + break; + } + case 'p': + (void) va_arg(count, int); + /* maximum 64-bit pointer representation: + * 0xffffffffffffffff + * so 19 characters is enough. + * XXX I count 18 -- what's the extra for? + */ + n += 19; + break; + default: + /* if we stumble upon an unknown + formatting code, copy the rest of + the format string to the output + string. (we cannot just skip the + code, since there's no way to know + what's in the argument list) */ + n += strlen(p); + goto expand; + } + } else + n++; + } + expand: + /* step 2: fill the buffer */ + /* Since we've analyzed how much space we need for the worst case, + we don't have to resize the string. */ + string = PyUnicode_FromUnicode(NULL, n); + if (!string) + return NULL; + + s = PyUnicode_AS_UNICODE(string); + + for (f = format; *f; f++) { + if (*f == '%') { + const char* p = f++; + int longflag = 0; + int size_tflag = 0; + /* parse the width.precision part (we're only + interested in the precision value, if any) */ + n = 0; + while (isdigit(Py_CHARMASK(*f))) + n = (n*10) + *f++ - '0'; + if (*f == '.') { + f++; + n = 0; + while (isdigit(Py_CHARMASK(*f))) + n = (n*10) + *f++ - '0'; + } + while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f))) + f++; + /* handle the long flag, but only for %ld and %lu. + others can be added when necessary. */ + if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { + longflag = 1; + ++f; + } + /* handle the size_t flag. */ + if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { + size_tflag = 1; + ++f; + } + + switch (*f) { + case 'c': + *s++ = va_arg(vargs, int); + break; + case 'd': + if (longflag) + sprintf(buffer, "%ld", va_arg(vargs, long)); + else if (size_tflag) + sprintf(buffer, "%" PY_FORMAT_SIZE_T "d", + va_arg(vargs, Py_ssize_t)); + else + sprintf(buffer, "%d", va_arg(vargs, int)); + appendstring(buffer); + break; + case 'u': + if (longflag) + sprintf(buffer, "%lu", + va_arg(vargs, unsigned long)); + else if (size_tflag) + sprintf(buffer, "%" PY_FORMAT_SIZE_T "u", + va_arg(vargs, size_t)); + else + sprintf(buffer, "%u", + va_arg(vargs, unsigned int)); + appendstring(buffer); + break; + case 'i': + sprintf(buffer, "%i", va_arg(vargs, int)); + appendstring(buffer); + break; + case 'x': + sprintf(buffer, "%x", va_arg(vargs, int)); + appendstring(buffer); + break; + case 's': + p = va_arg(vargs, char*); + appendstring(p); + break; + case 'U': + { + PyObject *obj = va_arg(vargs, PyObject *); + Py_UNICODE *ucopy = PyUnicode_AS_UNICODE(obj); + Py_ssize_t usize = PyUnicode_GET_SIZE(obj); + Py_ssize_t upos; + for (upos = 0; upos<usize;) + *s++ = ucopy[upos++]; + break; + } + case 'p': + sprintf(buffer, "%p", va_arg(vargs, void*)); + /* %p is ill-defined: ensure leading 0x. */ + if (buffer[1] == 'X') + buffer[1] = 'x'; + else if (buffer[1] != 'x') { + memmove(buffer+2, buffer, strlen(buffer)+1); + buffer[0] = '0'; + buffer[1] = 'x'; + } + appendstring(buffer); + break; + case '%': + *s++ = '%'; + break; + default: + appendstring(p); + goto end; + } + } else + *s++ = *f; + } + + end: + _PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string)); + return string; +} + +#undef appendstring + +PyObject * +PyUnicode_FromFormat(const char *format, ...) +{ + PyObject* ret; + va_list vargs; + +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + ret = PyUnicode_FromFormatV(format, vargs); + va_end(vargs); + return ret; +} + Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode, wchar_t *w, Py_ssize_t size) |