From 47e1afd2a1793b5818a16c41307a4ce976331649 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 26 Oct 2020 16:43:47 +0100 Subject: bpo-1635741: _PyUnicode_Name_CAPI moves to internal C API (GH-22713) The private _PyUnicode_Name_CAPI structure of the PyCapsule API unicodedata.ucnhash_CAPI moves to the internal C API. Moreover, the structure gets a new state member which must be passed to the getcode() and getname() functions. * Move Include/ucnhash.h to Include/internal/pycore_ucnhash.h * unicodedata module is now built with Py_BUILD_CORE_MODULE. * unicodedata: move hashAPI variable into unicodedata_module_state. --- Doc/whatsnew/3.10.rst | 6 +++ Include/internal/pycore_ucnhash.h | 44 ++++++++++++++++++++++ Include/ucnhash.h | 36 ------------------ Makefile.pre.in | 2 +- .../2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst | 4 ++ Modules/Setup | 2 +- Modules/unicodedata.c | 28 +++++++------- Objects/unicodeobject.c | 31 +++++++-------- PCbuild/pythoncore.vcxproj | 2 +- PCbuild/pythoncore.vcxproj.filters | 6 +-- Python/codecs.c | 17 +++++---- setup.py | 3 +- 12 files changed, 103 insertions(+), 78 deletions(-) create mode 100644 Include/internal/pycore_ucnhash.h delete mode 100644 Include/ucnhash.h create mode 100644 Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 6206c94..581d3a5 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -407,6 +407,12 @@ Porting to Python 3.10 Unicode object without initial data. (Contributed by Inada Naoki in :issue:`36346`.) +* The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API + ``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, + the structure gets a new ``state`` member which must be passed to the + ``getcode()`` and ``getname()`` functions. + (Contributed by Victor Stinner in :issue:`1635741`.) + Deprecated ---------- diff --git a/Include/internal/pycore_ucnhash.h b/Include/internal/pycore_ucnhash.h new file mode 100644 index 0000000..380b941 --- /dev/null +++ b/Include/internal/pycore_ucnhash.h @@ -0,0 +1,44 @@ +/* Unicode name database interface */ +#ifndef Py_INTERNAL_UCNHASH_H +#define Py_INTERNAL_UCNHASH_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* revised ucnhash CAPI interface (exported through a "wrapper") */ + +#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI" + +typedef struct { + + /* Size of this struct */ + int size; + + // state which must be passed as the first parameter to getname() + // and getcode() + void *state; + + /* Get name for a given character code. Returns non-zero if + success, zero if not. Does not set Python exceptions. + If self is NULL, data come from the default version of the database. + If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */ + int (*getname)(void *state, PyObject *self, Py_UCS4 code, + char* buffer, int buflen, + int with_alias_and_seq); + + /* Get character code for a given name. Same error handling + as for getname. */ + int (*getcode)(void *state, PyObject *self, + const char* name, int namelen, Py_UCS4* code, + int with_named_seq); + +} _PyUnicode_Name_CAPI; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_UCNHASH_H */ diff --git a/Include/ucnhash.h b/Include/ucnhash.h deleted file mode 100644 index 45362e9..0000000 --- a/Include/ucnhash.h +++ /dev/null @@ -1,36 +0,0 @@ -/* Unicode name database interface */ -#ifndef Py_LIMITED_API -#ifndef Py_UCNHASH_H -#define Py_UCNHASH_H -#ifdef __cplusplus -extern "C" { -#endif - -/* revised ucnhash CAPI interface (exported through a "wrapper") */ - -#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI" - -typedef struct { - - /* Size of this struct */ - int size; - - /* Get name for a given character code. Returns non-zero if - success, zero if not. Does not set Python exceptions. - If self is NULL, data come from the default version of the database. - If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */ - int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen, - int with_alias_and_seq); - - /* Get character code for a given name. Same error handling - as for getname. */ - int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code, - int with_named_seq); - -} _PyUnicode_Name_CAPI; - -#ifdef __cplusplus -} -#endif -#endif /* !Py_UCNHASH_H */ -#endif /* !Py_LIMITED_API */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 921bd08..fe226ce 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1065,7 +1065,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/traceback.h \ $(srcdir)/Include/tracemalloc.h \ $(srcdir)/Include/tupleobject.h \ - $(srcdir)/Include/ucnhash.h \ $(srcdir)/Include/unicodeobject.h \ $(srcdir)/Include/warnings.h \ $(srcdir)/Include/weakrefobject.h \ @@ -1129,6 +1128,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_sysmodule.h \ $(srcdir)/Include/internal/pycore_traceback.h \ $(srcdir)/Include/internal/pycore_tuple.h \ + $(srcdir)/Include/internal/pycore_ucnhash.h \ $(srcdir)/Include/internal/pycore_unionobject.h \ $(srcdir)/Include/internal/pycore_warnings.h \ $(DTRACE_HEADERS) diff --git a/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst new file mode 100644 index 0000000..5272ad5 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst @@ -0,0 +1,4 @@ +The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API +``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the +structure gets a new ``state`` member which must be passed to the +``getcode()`` and ``getname()`` functions. Patch by Victor Stinner. diff --git a/Modules/Setup b/Modules/Setup index 87f3a7c..6f9bb81 100644 --- a/Modules/Setup +++ b/Modules/Setup @@ -185,7 +185,7 @@ _symtable symtablemodule.c #_json -I$(srcdir)/Include/internal -DPy_BUILD_CORE_BUILTIN _json.c # _json speedups #_statistics _statisticsmodule.c # statistics accelerator -#unicodedata unicodedata.c # static Unicode character database +#unicodedata unicodedata.c -DPy_BUILD_CORE_BUILTIN # static Unicode character database # Modules with some UNIX dependencies -- on by default: diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 941fd2f..bfd8ab5 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -16,7 +16,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" -#include "ucnhash.h" +#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI #include "structmember.h" // PyMemberDef #include @@ -97,6 +97,8 @@ typedef struct { // Borrowed reference to &UCD_Type. It is used to prepare the code // to convert the UCD_Type static type to a heap type. PyTypeObject *ucd_type; + + _PyUnicode_Name_CAPI capi; } unicodedata_module_state; // bpo-1635741: Temporary global state until the unicodedata module @@ -1180,10 +1182,11 @@ _getucname(unicodedata_module_state *state, PyObject *self, } static int -capi_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen, +capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code, + char* buffer, int buflen, int with_alias_and_seq) { - unicodedata_module_state *state = &global_module_state; + unicodedata_module_state *state = (unicodedata_module_state *)state_raw; return _getucname(state, self, code, buffer, buflen, with_alias_and_seq); } @@ -1323,21 +1326,15 @@ _getcode(unicodedata_module_state *state, PyObject* self, } static int -capi_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code, +capi_getcode(void *state_raw, PyObject* self, + const char* name, int namelen, Py_UCS4* code, int with_named_seq) { - unicodedata_module_state *state = &global_module_state; + unicodedata_module_state *state = (unicodedata_module_state *)state_raw; return _getcode(state, self, name, namelen, code, with_named_seq); } -static const _PyUnicode_Name_CAPI hashAPI = -{ - sizeof(_PyUnicode_Name_CAPI), - capi_getucname, - capi_getcode -}; - /* -------------------------------------------------------------------- */ /* Python bindings */ @@ -1510,6 +1507,11 @@ PyInit_unicodedata(void) PyObject *m, *v; unicodedata_module_state *state = &global_module_state; + state->capi.size = sizeof(_PyUnicode_Name_CAPI); + state->capi.state = state; + state->capi.getname = capi_getucname; + state->capi.getcode = capi_getcode; + Py_SET_TYPE(&UCD_Type, &PyType_Type); state->ucd_type = &UCD_Type; @@ -1528,7 +1530,7 @@ PyInit_unicodedata(void) PyModule_AddObject(m, "ucd_3_2_0", v); /* Export C API */ - v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL); + v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL); if (v != NULL) PyModule_AddObject(m, "ucnhash_CAPI", v); return m; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f963deb..ba48d35 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -40,16 +40,16 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define PY_SSIZE_T_CLEAN #include "Python.h" -#include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_bytes_methods.h" // _Py_bytes_lower() -#include "pycore_initconfig.h" // _PyStatus_OK() -#include "pycore_interp.h" // PyInterpreterState.fs_codec -#include "pycore_object.h" // _PyObject_GC_TRACK() -#include "pycore_pathconfig.h" // _Py_DumpPathConfig() -#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding() -#include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "ucnhash.h" // _PyUnicode_Name_CAPI -#include "stringlib/eq.h" // unicode_eq() +#include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_bytes_methods.h" // _Py_bytes_lower() +#include "pycore_initconfig.h" // _PyStatus_OK() +#include "pycore_interp.h" // PyInterpreterState.fs_codec +#include "pycore_object.h" // _PyObject_GC_TRACK() +#include "pycore_pathconfig.h" // _Py_DumpPathConfig() +#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding() +#include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI +#include "stringlib/eq.h" // unicode_eq() #ifdef MS_WINDOWS #include @@ -6344,7 +6344,7 @@ PyUnicode_AsUTF16String(PyObject *unicode) /* --- Unicode Escape Codec ----------------------------------------------- */ -static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; +static _PyUnicode_Name_CAPI *ucnhash_capi = NULL; PyObject * _PyUnicode_DecodeUnicodeEscape(const char *s, @@ -6497,11 +6497,11 @@ _PyUnicode_DecodeUnicodeEscape(const char *s, /* \N{name} */ case 'N': - if (ucnhash_CAPI == NULL) { + if (ucnhash_capi == NULL) { /* load the unicode data module */ - ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import( + ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( PyUnicodeData_CAPSULE_NAME, 1); - if (ucnhash_CAPI == NULL) { + if (ucnhash_capi == NULL) { PyErr_SetString( PyExc_UnicodeError, "\\N escapes not supported (can't load unicodedata module)" @@ -6523,7 +6523,8 @@ _PyUnicode_DecodeUnicodeEscape(const char *s, s++; ch = 0xffffffff; /* in case 'getcode' messes up */ if (namelen <= INT_MAX && - ucnhash_CAPI->getcode(NULL, start, (int)namelen, + ucnhash_capi->getcode(ucnhash_capi->state, NULL, + start, (int)namelen, &ch, 0)) { assert(ch <= MAX_UNICODE); WRITE_CHAR(ch); diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 266a193..600f33b 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -196,6 +196,7 @@ + @@ -252,7 +253,6 @@ - diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 22d9b79..75b91d8 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -273,9 +273,6 @@ Include - - Include - Include @@ -573,6 +570,9 @@ Include\internal + + Include\internal + Include\internal diff --git a/Python/codecs.c b/Python/codecs.c index ade1418..62d1f3f 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -11,7 +11,7 @@ Copyright (c) Corporation for National Research Initiatives. #include "Python.h" #include "pycore_interp.h" // PyInterpreterState.codec_search_path #include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "ucnhash.h" +#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI #include const char *Py_hexdigits = "0123456789abcdef"; @@ -954,7 +954,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, end); } -static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; +static _PyUnicode_Name_CAPI *ucnhash_capi = NULL; PyObject *PyCodec_NameReplaceErrors(PyObject *exc) { @@ -976,17 +976,19 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) return NULL; if (!(object = PyUnicodeEncodeError_GetObject(exc))) return NULL; - if (!ucnhash_CAPI) { + if (!ucnhash_capi) { /* load the unicode data module */ - ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import( + ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( PyUnicodeData_CAPSULE_NAME, 1); - if (!ucnhash_CAPI) + if (!ucnhash_capi) { return NULL; + } } for (i = start, ressize = 0; i < end; ++i) { /* object is guaranteed to be "ready" */ c = PyUnicode_READ_CHAR(object, i); - if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) { + if (ucnhash_capi->getname(ucnhash_capi->state, NULL, + c, buffer, sizeof(buffer), 1)) { replsize = 1+1+1+(int)strlen(buffer)+1; } else if (c >= 0x10000) { @@ -1009,7 +1011,8 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) i < end; ++i) { c = PyUnicode_READ_CHAR(object, i); *outp++ = '\\'; - if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) { + if (ucnhash_capi->getname(ucnhash_capi->state, NULL, + c, buffer, sizeof(buffer), 1)) { *outp++ = 'N'; *outp++ = '{'; strcpy((char *)outp, buffer); diff --git a/setup.py b/setup.py index d3fd7bc..8a4abe5 100644 --- a/setup.py +++ b/setup.py @@ -878,7 +878,8 @@ class PyBuildExt(build_ext): self.add(Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c'])) # static Unicode character database self.add(Extension('unicodedata', ['unicodedata.c'], - depends=['unicodedata_db.h', 'unicodename_db.h'])) + depends=['unicodedata_db.h', 'unicodename_db.h'], + extra_compile_args=['-DPy_BUILD_CORE_MODULE'])) # _opcode module self.add(Extension('_opcode', ['_opcode.c'])) # asyncio speedups -- cgit v0.12