diff options
author | Victor Stinner <vstinner@python.org> | 2023-07-03 08:23:43 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-03 08:23:43 (GMT) |
commit | 5ccbbe5bb9a659fa8f2fe551428c84cc14015f44 (patch) | |
tree | dda828881a98a880d128b97c1072fc08b7b262b1 | |
parent | d65b783b6966d233467a48ef633afb4aff9d5df8 (diff) | |
download | cpython-5ccbbe5bb9a659fa8f2fe551428c84cc14015f44.zip cpython-5ccbbe5bb9a659fa8f2fe551428c84cc14015f44.tar.gz cpython-5ccbbe5bb9a659fa8f2fe551428c84cc14015f44.tar.bz2 |
gh-106320: Move _PyUnicodeWriter to the internal C API (#106342)
Move also _PyUnicode_FormatAdvancedWriter().
CJK codecs and multibytecodec.c now define the Py_BUILD_CORE_MODULE
macro.
-rw-r--r-- | Include/cpython/unicodeobject.h | 139 | ||||
-rw-r--r-- | Include/internal/pycore_complexobject.h | 2 | ||||
-rw-r--r-- | Include/internal/pycore_floatobject.h | 2 | ||||
-rw-r--r-- | Include/internal/pycore_unicodeobject.h | 145 | ||||
-rw-r--r-- | Modules/cjkcodecs/cjkcodecs.h | 4 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.c | 4 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.h | 2 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/preprocessor/gcc.py | 12 |
8 files changed, 166 insertions, 144 deletions
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index dee8b27..c5892a8 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -480,131 +480,6 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( Py_ssize_t start, Py_ssize_t end); -/* --- _PyUnicodeWriter API ----------------------------------------------- */ - -typedef struct { - PyObject *buffer; - void *data; - int kind; - Py_UCS4 maxchar; - Py_ssize_t size; - Py_ssize_t pos; - - /* minimum number of allocated characters (default: 0) */ - Py_ssize_t min_length; - - /* minimum character (default: 127, ASCII) */ - Py_UCS4 min_char; - - /* If non-zero, overallocate the buffer (default: 0). */ - unsigned char overallocate; - - /* If readonly is 1, buffer is a shared string (cannot be modified) - and size is set to 0. */ - unsigned char readonly; -} _PyUnicodeWriter ; - -/* Initialize a Unicode writer. - * - * By default, the minimum buffer size is 0 character and overallocation is - * disabled. Set min_length, min_char and overallocate attributes to control - * the allocation of the buffer. */ -PyAPI_FUNC(void) -_PyUnicodeWriter_Init(_PyUnicodeWriter *writer); - -/* Prepare the buffer to write 'length' characters - with the specified maximum character. - - Return 0 on success, raise an exception and return -1 on error. */ -#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ - (((MAXCHAR) <= (WRITER)->maxchar \ - && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ - ? 0 \ - : (((LENGTH) == 0) \ - ? 0 \ - : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) - -/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro - instead. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, - Py_ssize_t length, Py_UCS4 maxchar); - -/* Prepare the buffer to have at least the kind KIND. - For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will - support characters in range U+000-U+FFFF. - - Return 0 on success, raise an exception and return -1 on error. */ -#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ - ((KIND) <= (WRITER)->kind \ - ? 0 \ - : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) - -/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() - macro instead. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, - int kind); - -/* Append a Unicode character. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, - Py_UCS4 ch - ); - -/* Append a Unicode string. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, - PyObject *str /* Unicode string */ - ); - -/* Append a substring of a Unicode string. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, - PyObject *str, /* Unicode string */ - Py_ssize_t start, - Py_ssize_t end - ); - -/* Append an ASCII-encoded byte string. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, - const char *str, /* ASCII-encoded byte string */ - Py_ssize_t len /* number of bytes, or -1 if unknown */ - ); - -/* Append a latin1-encoded byte string. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, - const char *str, /* latin1-encoded byte string */ - Py_ssize_t len /* length in bytes */ - ); - -/* Get the value of the writer as a Unicode string. Clear the - buffer of the writer. Raise an exception and return NULL - on error. */ -PyAPI_FUNC(PyObject *) -_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); - -/* Deallocate memory of a writer (clear its internal buffer). */ -PyAPI_FUNC(void) -_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); - - -/* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( - _PyUnicodeWriter *writer, - PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); - /* --- Manage the default encoding ---------------------------------------- */ /* Returns a pointer to the default encoding (UTF-8) of the @@ -774,20 +649,6 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( PyObject *sepobj ); -/* Using explicit passed-in values, insert the thousands grouping - into the string pointed to by buffer. For the argument descriptions, - see Objects/stringlib/localeutil.h */ -PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( - _PyUnicodeWriter *writer, - Py_ssize_t n_buffer, - PyObject *digits, - Py_ssize_t d_pos, - Py_ssize_t n_digits, - Py_ssize_t min_width, - const char *grouping, - PyObject *thousands_sep, - Py_UCS4 *maxchar); - /* === Characters Type APIs =============================================== */ /* These should not be used directly. Use the Py_UNICODE_IS* and diff --git a/Include/internal/pycore_complexobject.h b/Include/internal/pycore_complexobject.h index fb344b7..7843c0a 100644 --- a/Include/internal/pycore_complexobject.h +++ b/Include/internal/pycore_complexobject.h @@ -8,6 +8,8 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_unicodeobject.h" // _PyUnicodeWriter + /* Operations on complex numbers from complexmodule.c */ PyAPI_FUNC(Py_complex) _Py_c_sum(Py_complex, Py_complex); diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h index 27c63bc..6abba04 100644 --- a/Include/internal/pycore_floatobject.h +++ b/Include/internal/pycore_floatobject.h @@ -9,6 +9,8 @@ extern "C" { #endif +#include "pycore_unicodeobject.h" // _PyUnicodeWriter + /* runtime lifecycle */ extern void _PyFloat_InitState(PyInterpreterState *); diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 1bb0f36..a8c7f19 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -14,7 +14,148 @@ extern "C" { void _PyUnicode_ExactDealloc(PyObject *op); Py_ssize_t _PyUnicode_InternedSize(void); -/* runtime lifecycle */ +/* --- _PyUnicodeWriter API ----------------------------------------------- */ + +typedef struct { + PyObject *buffer; + void *data; + int kind; + Py_UCS4 maxchar; + Py_ssize_t size; + Py_ssize_t pos; + + /* minimum number of allocated characters (default: 0) */ + Py_ssize_t min_length; + + /* minimum character (default: 127, ASCII) */ + Py_UCS4 min_char; + + /* If non-zero, overallocate the buffer (default: 0). */ + unsigned char overallocate; + + /* If readonly is 1, buffer is a shared string (cannot be modified) + and size is set to 0. */ + unsigned char readonly; +} _PyUnicodeWriter ; + +/* Initialize a Unicode writer. + * + * By default, the minimum buffer size is 0 character and overallocation is + * disabled. Set min_length, min_char and overallocate attributes to control + * the allocation of the buffer. */ +PyAPI_FUNC(void) +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer); + +/* Prepare the buffer to write 'length' characters + with the specified maximum character. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ + (((MAXCHAR) <= (WRITER)->maxchar \ + && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ + ? 0 \ + : (((LENGTH) == 0) \ + ? 0 \ + : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) + +/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro + instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +/* Prepare the buffer to have at least the kind KIND. + For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will + support characters in range U+000-U+FFFF. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ + ((KIND) <= (WRITER)->kind \ + ? 0 \ + : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) + +/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() + macro instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, + int kind); + +/* Append a Unicode character. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, + Py_UCS4 ch + ); + +/* Append a Unicode string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, + PyObject *str /* Unicode string */ + ); + +/* Append a substring of a Unicode string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, + PyObject *str, /* Unicode string */ + Py_ssize_t start, + Py_ssize_t end + ); + +/* Append an ASCII-encoded byte string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, + const char *str, /* ASCII-encoded byte string */ + Py_ssize_t len /* number of bytes, or -1 if unknown */ + ); + +/* Append a latin1-encoded byte string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, + const char *str, /* latin1-encoded byte string */ + Py_ssize_t len /* length in bytes */ + ); + +/* Get the value of the writer as a Unicode string. Clear the + buffer of the writer. Raise an exception and return NULL + on error. */ +PyAPI_FUNC(PyObject *) +_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); + +/* Deallocate memory of a writer (clear its internal buffer). */ +PyAPI_FUNC(void) +_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); + + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); + +/* --- Methods & Slots ---------------------------------------------------- */ + +/* Using explicit passed-in values, insert the thousands grouping + into the string pointed to by buffer. For the argument descriptions, + see Objects/stringlib/localeutil.h */ +PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( + _PyUnicodeWriter *writer, + Py_ssize_t n_buffer, + PyObject *digits, + Py_ssize_t d_pos, + Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + PyObject *thousands_sep, + Py_UCS4 *maxchar); + +/* --- Runtime lifecycle -------------------------------------------------- */ extern void _PyUnicode_InitState(PyInterpreterState *); extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *); @@ -24,7 +165,7 @@ extern void _PyUnicode_FiniTypes(PyInterpreterState *); extern PyTypeObject _PyUnicodeASCIIIter_Type; -/* other API */ +/* --- Other API ---------------------------------------------------------- */ struct _Py_unicode_runtime_ids { PyThread_type_lock lock; diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 48cdcfb..97290aa 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -7,6 +7,10 @@ #ifndef _CJKCODECS_H_ #define _CJKCODECS_H_ +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + #include "Python.h" #include "multibytecodec.h" diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index cf437d0..3febd1a 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -4,6 +4,10 @@ * Written by Hye-Shik Chang <perky@FreeBSD.org> */ +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + #include "Python.h" #include "structmember.h" // PyMemberDef #include "multibytecodec.h" diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index f593622..5b85e2e 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -10,6 +10,8 @@ extern "C" { #endif +#include "pycore_unicodeobject.h" // _PyUnicodeWriter + #ifdef uint16_t typedef uint16_t ucs2_t, DBCHAR; #else diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py index 0929f71..415a2ba 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py +++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py @@ -3,6 +3,14 @@ import re from . import common as _common +# Modules/socketmodule.h uses pycore_time.h which needs the Py_BUILD_CORE +# macro. Usually it's defined by the C file which includes it. +# Other header files have a similar issue. +NEED_BUILD_CORE = { + 'cjkcodecs.h', + 'multibytecodec.h', + 'socketmodule.h', +} TOOL = 'gcc' @@ -62,9 +70,7 @@ def preprocess(filename, filename = _normpath(filename, cwd) postargs = POST_ARGS - if os.path.basename(filename) == 'socketmodule.h': - # Modules/socketmodule.h uses pycore_time.h which needs Py_BUILD_CORE. - # Usually it's defined by the C file which includes it. + if os.path.basename(filename) in NEED_BUILD_CORE: postargs += ('-DPy_BUILD_CORE=1',) text = _common.preprocess( |