summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2015-09-21 22:58:32 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2015-09-21 22:58:32 (GMT)
commitca9381ea01211e79f5bc6078b95e177f1c04f52b (patch)
treed324c7a17fef115fc94486e76c96a031b81d1ca3
parent5014920cb72768bc54924e55e7004e79fcad94f7 (diff)
downloadcpython-ca9381ea01211e79f5bc6078b95e177f1c04f52b.zip
cpython-ca9381ea01211e79f5bc6078b95e177f1c04f52b.tar.gz
cpython-ca9381ea01211e79f5bc6078b95e177f1c04f52b.tar.bz2
Issue #24870: Add _PyUnicodeWriter_PrepareKind() macro
Add a macro which ensures that the writer has at least the requested kind.
-rw-r--r--Include/unicodeobject.h17
-rw-r--r--Objects/unicodeobject.c38
2 files changed, 46 insertions, 9 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 33e8f19..d0e0142 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -942,6 +942,23 @@ PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);
+/* Prepare the buffer to have at least the kind KIND.
+ For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
+ support characters in range U+000-U+FFFF.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
+ (assert((KIND) != PyUnicode_WCHAR_KIND), \
+ (KIND) <= (WRITER)->kind \
+ ? 0 \
+ : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
+ macro instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+ enum PyUnicode_Kind kind);
+
/* Append a Unicode character.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f5f2d48..7c079e0 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6722,14 +6722,11 @@ PyUnicode_DecodeASCII(const char *s,
case _Py_ERROR_REPLACE:
case _Py_ERROR_SURROGATEESCAPE:
/* Fast-path: the error handler only writes one character,
- but we must switch to UCS2 at the first write */
- if (kind < PyUnicode_2BYTE_KIND) {
- if (_PyUnicodeWriter_Prepare(&writer, size - writer.pos,
- 0xffff) < 0)
- return NULL;
- kind = writer.kind;
- data = writer.data;
- }
+ but we may switch to UCS2 at the first write */
+ if (_PyUnicodeWriter_PrepareKind(&writer, PyUnicode_2BYTE_KIND) < 0)
+ goto onError;
+ kind = writer.kind;
+ data = writer.data;
if (error_handler == _Py_ERROR_REPLACE)
PyUnicode_WRITE(kind, data, writer.pos, 0xfffd);
@@ -13309,7 +13306,8 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t newlen;
PyObject *newbuffer;
- assert(length > 0);
+ /* ensure that the _PyUnicodeWriter_Prepare macro was used */
+ assert(maxchar > writer->maxchar || length > 0);
if (length > PY_SSIZE_T_MAX - writer->pos) {
PyErr_NoMemory();
@@ -13375,6 +13373,28 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
#undef OVERALLOCATE_FACTOR
}
+int
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+ enum PyUnicode_Kind kind)
+{
+ Py_UCS4 maxchar;
+
+ /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */
+ assert(writer->kind < kind);
+
+ switch (kind)
+ {
+ case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
+ case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
+ case PyUnicode_4BYTE_KIND: maxchar = 0x10ffff; break;
+ default:
+ assert(0 && "invalid kind");
+ return -1;
+ }
+
+ return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
+}
+
Py_LOCAL_INLINE(int)
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
{