summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c236
1 files changed, 229 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7e455a5..e77b65d 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -393,15 +393,9 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
return (PyObject *)unicode;
}
-PyObject *PyUnicode_FromString(const char *u)
+PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
{
PyUnicodeObject *unicode;
- size_t size = strlen(u);
- if (size > PY_SSIZE_T_MAX) {
- PyErr_SetString(PyExc_OverflowError, "input too long");
- return NULL;
- }
-
/* If the Unicode data is known at construction time, we can apply
some optimizations which share commonly used objects. */
if (u != NULL) {
@@ -441,6 +435,17 @@ PyObject *PyUnicode_FromString(const char *u)
return (PyObject *)unicode;
}
+PyObject *PyUnicode_FromString(const char *u)
+{
+ size_t size = strlen(u);
+ if (size > PY_SSIZE_T_MAX) {
+ PyErr_SetString(PyExc_OverflowError, "input too long");
+ return NULL;
+ }
+
+ return PyUnicode_FromStringAndSize(u, size);
+}
+
#ifdef HAVE_WCHAR_H
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
@@ -473,6 +478,223 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
return (PyObject *)unicode;
}
+#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
+
+PyObject *
+PyUnicode_FromFormatV(const char *format, va_list vargs)
+{
+ va_list count;
+ Py_ssize_t n = 0;
+ const char* f;
+ Py_UNICODE *s;
+ PyObject *string;
+ /* used by sprintf */
+ char buffer[21];
+ const char *copy;
+
+#ifdef VA_LIST_IS_ARRAY
+ Py_MEMCPY(count, vargs, sizeof(va_list));
+#else
+#ifdef __va_copy
+ __va_copy(count, vargs);
+#else
+ count = vargs;
+#endif
+#endif
+ /* step 1: figure out how large a buffer we need */
+ for (f = format; *f; f++) {
+ if (*f == '%') {
+ const char* p = f;
+ while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
+ ;
+
+ /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
+ * they don't affect the amount of space we reserve.
+ */
+ if ((*f == 'l' || *f == 'z') &&
+ (f[1] == 'd' || f[1] == 'u'))
+ ++f;
+
+ switch (*f) {
+ case 'c':
+ (void)va_arg(count, int);
+ /* fall through... */
+ case '%':
+ n++;
+ break;
+ case 'd': case 'u': case 'i': case 'x':
+ (void) va_arg(count, int);
+ /* 20 bytes is enough to hold a 64-bit
+ integer. Decimal takes the most space.
+ This isn't enough for octal. */
+ n += 20;
+ break;
+ case 's':
+ n += strlen(va_arg(count, char*));
+ break;
+ case 'U':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ assert(obj && PyUnicode_Check(obj));
+ n += PyUnicode_GET_SIZE(obj);
+ break;
+ }
+ case 'p':
+ (void) va_arg(count, int);
+ /* maximum 64-bit pointer representation:
+ * 0xffffffffffffffff
+ * so 19 characters is enough.
+ * XXX I count 18 -- what's the extra for?
+ */
+ n += 19;
+ break;
+ default:
+ /* if we stumble upon an unknown
+ formatting code, copy the rest of
+ the format string to the output
+ string. (we cannot just skip the
+ code, since there's no way to know
+ what's in the argument list) */
+ n += strlen(p);
+ goto expand;
+ }
+ } else
+ n++;
+ }
+ expand:
+ /* step 2: fill the buffer */
+ /* Since we've analyzed how much space we need for the worst case,
+ we don't have to resize the string. */
+ string = PyUnicode_FromUnicode(NULL, n);
+ if (!string)
+ return NULL;
+
+ s = PyUnicode_AS_UNICODE(string);
+
+ for (f = format; *f; f++) {
+ if (*f == '%') {
+ const char* p = f++;
+ int longflag = 0;
+ int size_tflag = 0;
+ /* parse the width.precision part (we're only
+ interested in the precision value, if any) */
+ n = 0;
+ while (isdigit(Py_CHARMASK(*f)))
+ n = (n*10) + *f++ - '0';
+ if (*f == '.') {
+ f++;
+ n = 0;
+ while (isdigit(Py_CHARMASK(*f)))
+ n = (n*10) + *f++ - '0';
+ }
+ while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
+ f++;
+ /* handle the long flag, but only for %ld and %lu.
+ others can be added when necessary. */
+ if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
+ longflag = 1;
+ ++f;
+ }
+ /* handle the size_t flag. */
+ if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
+ size_tflag = 1;
+ ++f;
+ }
+
+ switch (*f) {
+ case 'c':
+ *s++ = va_arg(vargs, int);
+ break;
+ case 'd':
+ if (longflag)
+ sprintf(buffer, "%ld", va_arg(vargs, long));
+ else if (size_tflag)
+ sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
+ va_arg(vargs, Py_ssize_t));
+ else
+ sprintf(buffer, "%d", va_arg(vargs, int));
+ appendstring(buffer);
+ break;
+ case 'u':
+ if (longflag)
+ sprintf(buffer, "%lu",
+ va_arg(vargs, unsigned long));
+ else if (size_tflag)
+ sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
+ va_arg(vargs, size_t));
+ else
+ sprintf(buffer, "%u",
+ va_arg(vargs, unsigned int));
+ appendstring(buffer);
+ break;
+ case 'i':
+ sprintf(buffer, "%i", va_arg(vargs, int));
+ appendstring(buffer);
+ break;
+ case 'x':
+ sprintf(buffer, "%x", va_arg(vargs, int));
+ appendstring(buffer);
+ break;
+ case 's':
+ p = va_arg(vargs, char*);
+ appendstring(p);
+ break;
+ case 'U':
+ {
+ PyObject *obj = va_arg(vargs, PyObject *);
+ Py_UNICODE *ucopy = PyUnicode_AS_UNICODE(obj);
+ Py_ssize_t usize = PyUnicode_GET_SIZE(obj);
+ Py_ssize_t upos;
+ for (upos = 0; upos<usize;)
+ *s++ = ucopy[upos++];
+ break;
+ }
+ case 'p':
+ sprintf(buffer, "%p", va_arg(vargs, void*));
+ /* %p is ill-defined: ensure leading 0x. */
+ if (buffer[1] == 'X')
+ buffer[1] = 'x';
+ else if (buffer[1] != 'x') {
+ memmove(buffer+2, buffer, strlen(buffer)+1);
+ buffer[0] = '0';
+ buffer[1] = 'x';
+ }
+ appendstring(buffer);
+ break;
+ case '%':
+ *s++ = '%';
+ break;
+ default:
+ appendstring(p);
+ goto end;
+ }
+ } else
+ *s++ = *f;
+ }
+
+ end:
+ _PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string));
+ return string;
+}
+
+#undef appendstring
+
+PyObject *
+PyUnicode_FromFormat(const char *format, ...)
+{
+ PyObject* ret;
+ va_list vargs;
+
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+ ret = PyUnicode_FromFormatV(format, vargs);
+ va_end(vargs);
+ return ret;
+}
+
Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode,
wchar_t *w,
Py_ssize_t size)