summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-11-19 11:54:53 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-11-19 11:54:53 (GMT)
commit4a58707a340cacea6f8e6a82adfcc10a230e1185 (patch)
tree93f42c1524db7e6ba8a60f9a1f2c656c6e0931d9
parent4d3f109ad3d1870130816b94a1f5d6f6c1a07586 (diff)
downloadcpython-4a58707a340cacea6f8e6a82adfcc10a230e1185.zip
cpython-4a58707a340cacea6f8e6a82adfcc10a230e1185.tar.gz
cpython-4a58707a340cacea6f8e6a82adfcc10a230e1185.tar.bz2
Add _PyUnicodeWriter_WriteASCIIString() function
-rw-r--r--Include/unicodeobject.h17
-rw-r--r--Objects/listobject.c9
-rw-r--r--Objects/unicodeobject.c90
-rw-r--r--Python/formatter_unicode.c18
4 files changed, 96 insertions, 38 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 20ce41d..b4891e4 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -962,12 +962,20 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
Py_ssize_t end
);
+/* Append a ASCII-encoded byte string.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
+ const char *str, /* ASCII-encoded byte string */
+ Py_ssize_t len /* number of bytes, or -1 if unknown */
+ );
+
/* Append a latin1-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer,
- const char *str, /* latin1-encoded byte string */
- Py_ssize_t len /* length in bytes */
+_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
+ const char *str, /* latin1-encoded byte string */
+ Py_ssize_t len /* length in bytes */
);
/* Get the value of the writer as an Unicode string. Clear the
@@ -979,6 +987,9 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
+
+PyAPI_FUNC(int) _PyObject_ReprWriter(_PyUnicodeWriter *writer,
+ PyObject *v);
#endif
#ifndef Py_LIMITED_API
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 45666fd..7d5674c 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -339,19 +339,12 @@ list_repr(PyListObject *v)
{
Py_ssize_t i;
PyObject *s;
- static PyObject *sep = NULL;
_PyUnicodeWriter writer;
if (Py_SIZE(v) == 0) {
return PyUnicode_FromString("[]");
}
- if (sep == NULL) {
- sep = PyUnicode_FromString(", ");
- if (sep == NULL)
- return NULL;
- }
-
i = Py_ReprEnter((PyObject*)v);
if (i != 0) {
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
@@ -369,7 +362,7 @@ list_repr(PyListObject *v)
so must refetch the list size on each iteration. */
for (i = 0; i < Py_SIZE(v); ++i) {
if (i > 0) {
- if (_PyUnicodeWriter_WriteStr(&writer, sep) < 0)
+ if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
goto error;
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7114006..880889e 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -140,9 +140,9 @@ extern "C" {
buffer where the result characters are written to. */
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
do { \
- to_type *_to = (to_type *) to; \
- const from_type *_iter = (begin); \
- const from_type *_end = (end); \
+ to_type *_to = (to_type *)(to); \
+ const from_type *_iter = (from_type *)(begin); \
+ const from_type *_end = (from_type *)(end); \
Py_ssize_t n = (_end) - (_iter); \
const from_type *_unrolled_end = \
_iter + _Py_SIZE_ROUND_DOWN(n, 4); \
@@ -2562,7 +2562,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
precision = len;
arglen = Py_MAX(precision, width);
- assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127);
if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
return NULL;
@@ -2581,8 +2580,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
writer->pos += fill;
}
- unicode_write_cstr(writer->buffer, writer->pos, buffer, len);
- writer->pos += len;
+ if (_PyUnicodeWriter_WriteASCIIString(writer, buffer, len) < 0)
+ return NULL;
break;
}
@@ -2604,11 +2603,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
len += 2;
}
- assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127);
- if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
+ if (_PyUnicodeWriter_WriteASCIIString(writer, number, len) < 0)
return NULL;
- unicode_write_cstr(writer->buffer, writer->pos, number, len);
- writer->pos += len;
break;
}
@@ -2707,7 +2703,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
skip the code, since there's no way to know what's in the
argument list) */
len = strlen(p);
- if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1)
+ if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1)
return NULL;
f = p+len;
return f;
@@ -2759,10 +2755,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
if (*p == '\0')
writer.overallocate = 0;
- if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1)
+
+ if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0)
goto fail;
- unicode_write_cstr(writer.buffer, writer.pos, f, len);
- writer.pos += len;
f = p;
}
@@ -13461,7 +13456,68 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
}
int
-_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len)
+_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
+ const char *ascii, Py_ssize_t len)
+{
+ if (len == -1)
+ len = strlen(ascii);
+
+ assert(ucs1lib_find_max_char((Py_UCS1*)ascii, (Py_UCS1*)ascii + len) < 128);
+
+ if (writer->buffer == NULL && !writer->overallocate) {
+ PyObject *str;
+
+ str = _PyUnicode_FromASCII(ascii, len);
+ if (str == NULL)
+ return -1;
+
+ writer->readonly = 1;
+ writer->buffer = str;
+ _PyUnicodeWriter_Update(writer);
+ writer->pos += len;
+ return 0;
+ }
+
+ if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
+ return -1;
+
+ switch (writer->kind)
+ {
+ case PyUnicode_1BYTE_KIND:
+ {
+ const Py_UCS1 *str = (const Py_UCS1 *)ascii;
+ Py_UCS1 *data = writer->data;
+
+ Py_MEMCPY(data + writer->pos, str, len);
+ break;
+ }
+ case PyUnicode_2BYTE_KIND:
+ {
+ _PyUnicode_CONVERT_BYTES(
+ Py_UCS1, Py_UCS2,
+ ascii, ascii + len,
+ (Py_UCS2 *)writer->data + writer->pos);
+ break;
+ }
+ case PyUnicode_4BYTE_KIND:
+ {
+ _PyUnicode_CONVERT_BYTES(
+ Py_UCS1, Py_UCS4,
+ ascii, ascii + len,
+ (Py_UCS4 *)writer->data + writer->pos);
+ break;
+ }
+ default:
+ assert(0);
+ }
+
+ writer->pos += len;
+ return 0;
+}
+
+int
+_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
+ const char *str, Py_ssize_t len)
{
Py_UCS4 maxchar;
@@ -13828,12 +13884,10 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
return -1;
len = strlen(p);
if (writer) {
- if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) {
+ if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
PyMem_Free(p);
return -1;
}
- unicode_write_cstr(writer->buffer, writer->pos, p, len);
- writer->pos += len;
}
else
*p_output = _PyUnicode_FromASCII(p, len);
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index e68087f..0a3cc59 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -1053,24 +1053,24 @@ format_float_internal(PyObject *value,
n_digits += 1;
}
- /* Since there is no unicode version of PyOS_double_to_string,
- just use the 8 bit version and then convert to unicode. */
- unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
- PyMem_Free(buf);
- if (unicode_tmp == NULL)
- goto done;
-
if (format->sign != '+' && format->sign != ' '
&& format->width == -1
&& format->type != 'n'
&& !format->thousands_separators)
{
/* Fast path */
- result = _PyUnicodeWriter_WriteStr(writer, unicode_tmp);
- Py_DECREF(unicode_tmp);
+ result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
+ PyMem_Free(buf);
return result;
}
+ /* Since there is no unicode version of PyOS_double_to_string,
+ just use the 8 bit version and then convert to unicode. */
+ unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
+ PyMem_Free(buf);
+ if (unicode_tmp == NULL)
+ goto done;
+
/* Is a sign character present in the output? If so, remember it
and skip it */
index = 0;