summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2023-07-03 08:23:43 (GMT)
committerGitHub <noreply@github.com>2023-07-03 08:23:43 (GMT)
commit5ccbbe5bb9a659fa8f2fe551428c84cc14015f44 (patch)
treedda828881a98a880d128b97c1072fc08b7b262b1
parentd65b783b6966d233467a48ef633afb4aff9d5df8 (diff)
downloadcpython-5ccbbe5bb9a659fa8f2fe551428c84cc14015f44.zip
cpython-5ccbbe5bb9a659fa8f2fe551428c84cc14015f44.tar.gz
cpython-5ccbbe5bb9a659fa8f2fe551428c84cc14015f44.tar.bz2
gh-106320: Move _PyUnicodeWriter to the internal C API (#106342)
Move also _PyUnicode_FormatAdvancedWriter(). CJK codecs and multibytecodec.c now define the Py_BUILD_CORE_MODULE macro.
-rw-r--r--Include/cpython/unicodeobject.h139
-rw-r--r--Include/internal/pycore_complexobject.h2
-rw-r--r--Include/internal/pycore_floatobject.h2
-rw-r--r--Include/internal/pycore_unicodeobject.h145
-rw-r--r--Modules/cjkcodecs/cjkcodecs.h4
-rw-r--r--Modules/cjkcodecs/multibytecodec.c4
-rw-r--r--Modules/cjkcodecs/multibytecodec.h2
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/gcc.py12
8 files changed, 166 insertions, 144 deletions
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index dee8b27..c5892a8 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -480,131 +480,6 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
Py_ssize_t start,
Py_ssize_t end);
-/* --- _PyUnicodeWriter API ----------------------------------------------- */
-
-typedef struct {
- PyObject *buffer;
- void *data;
- int kind;
- Py_UCS4 maxchar;
- Py_ssize_t size;
- Py_ssize_t pos;
-
- /* minimum number of allocated characters (default: 0) */
- Py_ssize_t min_length;
-
- /* minimum character (default: 127, ASCII) */
- Py_UCS4 min_char;
-
- /* If non-zero, overallocate the buffer (default: 0). */
- unsigned char overallocate;
-
- /* If readonly is 1, buffer is a shared string (cannot be modified)
- and size is set to 0. */
- unsigned char readonly;
-} _PyUnicodeWriter ;
-
-/* Initialize a Unicode writer.
- *
- * By default, the minimum buffer size is 0 character and overallocation is
- * disabled. Set min_length, min_char and overallocate attributes to control
- * the allocation of the buffer. */
-PyAPI_FUNC(void)
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
-
-/* Prepare the buffer to write 'length' characters
- with the specified maximum character.
-
- Return 0 on success, raise an exception and return -1 on error. */
-#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
- (((MAXCHAR) <= (WRITER)->maxchar \
- && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
- ? 0 \
- : (((LENGTH) == 0) \
- ? 0 \
- : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
-
-/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
- instead. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
- Py_ssize_t length, Py_UCS4 maxchar);
-
-/* Prepare the buffer to have at least the kind KIND.
- For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
- support characters in range U+000-U+FFFF.
-
- Return 0 on success, raise an exception and return -1 on error. */
-#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
- ((KIND) <= (WRITER)->kind \
- ? 0 \
- : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
-
-/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
- macro instead. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
- int kind);
-
-/* Append a Unicode character.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
- Py_UCS4 ch
- );
-
-/* Append a Unicode string.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
- PyObject *str /* Unicode string */
- );
-
-/* Append a substring of a Unicode string.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
- PyObject *str, /* Unicode string */
- Py_ssize_t start,
- Py_ssize_t end
- );
-
-/* Append an ASCII-encoded byte string.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
- const char *str, /* ASCII-encoded byte string */
- Py_ssize_t len /* number of bytes, or -1 if unknown */
- );
-
-/* Append a latin1-encoded byte string.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
- const char *str, /* latin1-encoded byte string */
- Py_ssize_t len /* length in bytes */
- );
-
-/* Get the value of the writer as a Unicode string. Clear the
- buffer of the writer. Raise an exception and return NULL
- on error. */
-PyAPI_FUNC(PyObject *)
-_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
-
-/* Deallocate memory of a writer (clear its internal buffer). */
-PyAPI_FUNC(void)
-_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
-
-
-/* Format the object based on the format_spec, as defined in PEP 3101
- (Advanced String Formatting). */
-PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
- _PyUnicodeWriter *writer,
- PyObject *obj,
- PyObject *format_spec,
- Py_ssize_t start,
- Py_ssize_t end);
-
/* --- Manage the default encoding ---------------------------------------- */
/* Returns a pointer to the default encoding (UTF-8) of the
@@ -774,20 +649,6 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
PyObject *sepobj
);
-/* Using explicit passed-in values, insert the thousands grouping
- into the string pointed to by buffer. For the argument descriptions,
- see Objects/stringlib/localeutil.h */
-PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
- _PyUnicodeWriter *writer,
- Py_ssize_t n_buffer,
- PyObject *digits,
- Py_ssize_t d_pos,
- Py_ssize_t n_digits,
- Py_ssize_t min_width,
- const char *grouping,
- PyObject *thousands_sep,
- Py_UCS4 *maxchar);
-
/* === Characters Type APIs =============================================== */
/* These should not be used directly. Use the Py_UNICODE_IS* and
diff --git a/Include/internal/pycore_complexobject.h b/Include/internal/pycore_complexobject.h
index fb344b7..7843c0a 100644
--- a/Include/internal/pycore_complexobject.h
+++ b/Include/internal/pycore_complexobject.h
@@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif
+#include "pycore_unicodeobject.h" // _PyUnicodeWriter
+
/* Operations on complex numbers from complexmodule.c */
PyAPI_FUNC(Py_complex) _Py_c_sum(Py_complex, Py_complex);
diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h
index 27c63bc..6abba04 100644
--- a/Include/internal/pycore_floatobject.h
+++ b/Include/internal/pycore_floatobject.h
@@ -9,6 +9,8 @@ extern "C" {
#endif
+#include "pycore_unicodeobject.h" // _PyUnicodeWriter
+
/* runtime lifecycle */
extern void _PyFloat_InitState(PyInterpreterState *);
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index 1bb0f36..a8c7f19 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -14,7 +14,148 @@ extern "C" {
void _PyUnicode_ExactDealloc(PyObject *op);
Py_ssize_t _PyUnicode_InternedSize(void);
-/* runtime lifecycle */
+/* --- _PyUnicodeWriter API ----------------------------------------------- */
+
+typedef struct {
+ PyObject *buffer;
+ void *data;
+ int kind;
+ Py_UCS4 maxchar;
+ Py_ssize_t size;
+ Py_ssize_t pos;
+
+ /* minimum number of allocated characters (default: 0) */
+ Py_ssize_t min_length;
+
+ /* minimum character (default: 127, ASCII) */
+ Py_UCS4 min_char;
+
+ /* If non-zero, overallocate the buffer (default: 0). */
+ unsigned char overallocate;
+
+ /* If readonly is 1, buffer is a shared string (cannot be modified)
+ and size is set to 0. */
+ unsigned char readonly;
+} _PyUnicodeWriter ;
+
+/* Initialize a Unicode writer.
+ *
+ * By default, the minimum buffer size is 0 character and overallocation is
+ * disabled. Set min_length, min_char and overallocate attributes to control
+ * the allocation of the buffer. */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
+
+/* Prepare the buffer to write 'length' characters
+ with the specified maximum character.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
+ (((MAXCHAR) <= (WRITER)->maxchar \
+ && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
+ ? 0 \
+ : (((LENGTH) == 0) \
+ ? 0 \
+ : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
+ instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
+ Py_ssize_t length, Py_UCS4 maxchar);
+
+/* Prepare the buffer to have at least the kind KIND.
+ For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
+ support characters in range U+000-U+FFFF.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
+ ((KIND) <= (WRITER)->kind \
+ ? 0 \
+ : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
+ macro instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+ int kind);
+
+/* Append a Unicode character.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
+ Py_UCS4 ch
+ );
+
+/* Append a Unicode string.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
+ PyObject *str /* Unicode string */
+ );
+
+/* Append a substring of a Unicode string.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
+ PyObject *str, /* Unicode string */
+ Py_ssize_t start,
+ Py_ssize_t end
+ );
+
+/* Append an ASCII-encoded byte string.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
+ const char *str, /* ASCII-encoded byte string */
+ Py_ssize_t len /* number of bytes, or -1 if unknown */
+ );
+
+/* Append a latin1-encoded byte string.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
+ const char *str, /* latin1-encoded byte string */
+ Py_ssize_t len /* length in bytes */
+ );
+
+/* Get the value of the writer as a Unicode string. Clear the
+ buffer of the writer. Raise an exception and return NULL
+ on error. */
+PyAPI_FUNC(PyObject *)
+_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
+
+/* Deallocate memory of a writer (clear its internal buffer). */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
+
+
+/* Format the object based on the format_spec, as defined in PEP 3101
+ (Advanced String Formatting). */
+PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
+
+/* --- Methods & Slots ---------------------------------------------------- */
+
+/* Using explicit passed-in values, insert the thousands grouping
+ into the string pointed to by buffer. For the argument descriptions,
+ see Objects/stringlib/localeutil.h */
+PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
+ _PyUnicodeWriter *writer,
+ Py_ssize_t n_buffer,
+ PyObject *digits,
+ Py_ssize_t d_pos,
+ Py_ssize_t n_digits,
+ Py_ssize_t min_width,
+ const char *grouping,
+ PyObject *thousands_sep,
+ Py_UCS4 *maxchar);
+
+/* --- Runtime lifecycle -------------------------------------------------- */
extern void _PyUnicode_InitState(PyInterpreterState *);
extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
@@ -24,7 +165,7 @@ extern void _PyUnicode_FiniTypes(PyInterpreterState *);
extern PyTypeObject _PyUnicodeASCIIIter_Type;
-/* other API */
+/* --- Other API ---------------------------------------------------------- */
struct _Py_unicode_runtime_ids {
PyThread_type_lock lock;
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
index 48cdcfb..97290aa 100644
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -7,6 +7,10 @@
#ifndef _CJKCODECS_H_
#define _CJKCODECS_H_
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
#include "Python.h"
#include "multibytecodec.h"
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index cf437d0..3febd1a 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -4,6 +4,10 @@
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
#include "Python.h"
#include "structmember.h" // PyMemberDef
#include "multibytecodec.h"
diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h
index f593622..5b85e2e 100644
--- a/Modules/cjkcodecs/multibytecodec.h
+++ b/Modules/cjkcodecs/multibytecodec.h
@@ -10,6 +10,8 @@
extern "C" {
#endif
+#include "pycore_unicodeobject.h" // _PyUnicodeWriter
+
#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
index 0929f71..415a2ba 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
@@ -3,6 +3,14 @@ import re
from . import common as _common
+# Modules/socketmodule.h uses pycore_time.h which needs the Py_BUILD_CORE
+# macro. Usually it's defined by the C file which includes it.
+# Other header files have a similar issue.
+NEED_BUILD_CORE = {
+ 'cjkcodecs.h',
+ 'multibytecodec.h',
+ 'socketmodule.h',
+}
TOOL = 'gcc'
@@ -62,9 +70,7 @@ def preprocess(filename,
filename = _normpath(filename, cwd)
postargs = POST_ARGS
- if os.path.basename(filename) == 'socketmodule.h':
- # Modules/socketmodule.h uses pycore_time.h which needs Py_BUILD_CORE.
- # Usually it's defined by the C file which includes it.
+ if os.path.basename(filename) in NEED_BUILD_CORE:
postargs += ('-DPy_BUILD_CORE=1',)
text = _common.preprocess(