summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/whatsnew/3.10.rst6
-rw-r--r--Include/internal/pycore_ucnhash.h (renamed from Include/ucnhash.h)22
-rw-r--r--Makefile.pre.in2
-rw-r--r--Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst4
-rw-r--r--Modules/Setup2
-rw-r--r--Modules/unicodedata.c28
-rw-r--r--Objects/unicodeobject.c31
-rw-r--r--PCbuild/pythoncore.vcxproj2
-rw-r--r--PCbuild/pythoncore.vcxproj.filters6
-rw-r--r--Python/codecs.c17
-rw-r--r--setup.py3
11 files changed, 74 insertions, 49 deletions
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 6206c94..581d3a5 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -407,6 +407,12 @@ Porting to Python 3.10
Unicode object without initial data.
(Contributed by Inada Naoki in :issue:`36346`.)
+* The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
+ ``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover,
+ the structure gets a new ``state`` member which must be passed to the
+ ``getcode()`` and ``getname()`` functions.
+ (Contributed by Victor Stinner in :issue:`1635741`.)
+
Deprecated
----------
diff --git a/Include/ucnhash.h b/Include/internal/pycore_ucnhash.h
index 45362e9..380b941 100644
--- a/Include/ucnhash.h
+++ b/Include/internal/pycore_ucnhash.h
@@ -1,11 +1,14 @@
/* Unicode name database interface */
-#ifndef Py_LIMITED_API
-#ifndef Py_UCNHASH_H
-#define Py_UCNHASH_H
+#ifndef Py_INTERNAL_UCNHASH_H
+#define Py_INTERNAL_UCNHASH_H
#ifdef __cplusplus
extern "C" {
#endif
+#ifndef Py_BUILD_CORE
+# error "this header requires Py_BUILD_CORE define"
+#endif
+
/* revised ucnhash CAPI interface (exported through a "wrapper") */
#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI"
@@ -15,16 +18,22 @@ typedef struct {
/* Size of this struct */
int size;
+ // state which must be passed as the first parameter to getname()
+ // and getcode()
+ void *state;
+
/* Get name for a given character code. Returns non-zero if
success, zero if not. Does not set Python exceptions.
If self is NULL, data come from the default version of the database.
If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
- int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
+ int (*getname)(void *state, PyObject *self, Py_UCS4 code,
+ char* buffer, int buflen,
int with_alias_and_seq);
/* Get character code for a given name. Same error handling
as for getname. */
- int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code,
+ int (*getcode)(void *state, PyObject *self,
+ const char* name, int namelen, Py_UCS4* code,
int with_named_seq);
} _PyUnicode_Name_CAPI;
@@ -32,5 +41,4 @@ typedef struct {
#ifdef __cplusplus
}
#endif
-#endif /* !Py_UCNHASH_H */
-#endif /* !Py_LIMITED_API */
+#endif /* !Py_INTERNAL_UCNHASH_H */
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 921bd08..fe226ce 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -1065,7 +1065,6 @@ PYTHON_HEADERS= \
$(srcdir)/Include/traceback.h \
$(srcdir)/Include/tracemalloc.h \
$(srcdir)/Include/tupleobject.h \
- $(srcdir)/Include/ucnhash.h \
$(srcdir)/Include/unicodeobject.h \
$(srcdir)/Include/warnings.h \
$(srcdir)/Include/weakrefobject.h \
@@ -1129,6 +1128,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_sysmodule.h \
$(srcdir)/Include/internal/pycore_traceback.h \
$(srcdir)/Include/internal/pycore_tuple.h \
+ $(srcdir)/Include/internal/pycore_ucnhash.h \
$(srcdir)/Include/internal/pycore_unionobject.h \
$(srcdir)/Include/internal/pycore_warnings.h \
$(DTRACE_HEADERS)
diff --git a/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst
new file mode 100644
index 0000000..5272ad5
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst
@@ -0,0 +1,4 @@
+The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
+``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the
+structure gets a new ``state`` member which must be passed to the
+``getcode()`` and ``getname()`` functions. Patch by Victor Stinner.
diff --git a/Modules/Setup b/Modules/Setup
index 87f3a7c..6f9bb81 100644
--- a/Modules/Setup
+++ b/Modules/Setup
@@ -185,7 +185,7 @@ _symtable symtablemodule.c
#_json -I$(srcdir)/Include/internal -DPy_BUILD_CORE_BUILTIN _json.c # _json speedups
#_statistics _statisticsmodule.c # statistics accelerator
-#unicodedata unicodedata.c # static Unicode character database
+#unicodedata unicodedata.c -DPy_BUILD_CORE_BUILTIN # static Unicode character database
# Modules with some UNIX dependencies -- on by default:
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 941fd2f..bfd8ab5 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -16,7 +16,7 @@
#define PY_SSIZE_T_CLEAN
#include "Python.h"
-#include "ucnhash.h"
+#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include "structmember.h" // PyMemberDef
#include <stdbool.h>
@@ -97,6 +97,8 @@ typedef struct {
// Borrowed reference to &UCD_Type. It is used to prepare the code
// to convert the UCD_Type static type to a heap type.
PyTypeObject *ucd_type;
+
+ _PyUnicode_Name_CAPI capi;
} unicodedata_module_state;
// bpo-1635741: Temporary global state until the unicodedata module
@@ -1180,10 +1182,11 @@ _getucname(unicodedata_module_state *state, PyObject *self,
}
static int
-capi_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
+capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code,
+ char* buffer, int buflen,
int with_alias_and_seq)
{
- unicodedata_module_state *state = &global_module_state;
+ unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
return _getucname(state, self, code, buffer, buflen, with_alias_and_seq);
}
@@ -1323,21 +1326,15 @@ _getcode(unicodedata_module_state *state, PyObject* self,
}
static int
-capi_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code,
+capi_getcode(void *state_raw, PyObject* self,
+ const char* name, int namelen, Py_UCS4* code,
int with_named_seq)
{
- unicodedata_module_state *state = &global_module_state;
+ unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
return _getcode(state, self, name, namelen, code, with_named_seq);
}
-static const _PyUnicode_Name_CAPI hashAPI =
-{
- sizeof(_PyUnicode_Name_CAPI),
- capi_getucname,
- capi_getcode
-};
-
/* -------------------------------------------------------------------- */
/* Python bindings */
@@ -1510,6 +1507,11 @@ PyInit_unicodedata(void)
PyObject *m, *v;
unicodedata_module_state *state = &global_module_state;
+ state->capi.size = sizeof(_PyUnicode_Name_CAPI);
+ state->capi.state = state;
+ state->capi.getname = capi_getucname;
+ state->capi.getcode = capi_getcode;
+
Py_SET_TYPE(&UCD_Type, &PyType_Type);
state->ucd_type = &UCD_Type;
@@ -1528,7 +1530,7 @@ PyInit_unicodedata(void)
PyModule_AddObject(m, "ucd_3_2_0", v);
/* Export C API */
- v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL);
+ v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL);
if (v != NULL)
PyModule_AddObject(m, "ucnhash_CAPI", v);
return m;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f963deb..ba48d35 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -40,16 +40,16 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#define PY_SSIZE_T_CLEAN
#include "Python.h"
-#include "pycore_abstract.h" // _PyIndex_Check()
-#include "pycore_bytes_methods.h" // _Py_bytes_lower()
-#include "pycore_initconfig.h" // _PyStatus_OK()
-#include "pycore_interp.h" // PyInterpreterState.fs_codec
-#include "pycore_object.h" // _PyObject_GC_TRACK()
-#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
-#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
-#include "pycore_pystate.h" // _PyInterpreterState_GET()
-#include "ucnhash.h" // _PyUnicode_Name_CAPI
-#include "stringlib/eq.h" // unicode_eq()
+#include "pycore_abstract.h" // _PyIndex_Check()
+#include "pycore_bytes_methods.h" // _Py_bytes_lower()
+#include "pycore_initconfig.h" // _PyStatus_OK()
+#include "pycore_interp.h" // PyInterpreterState.fs_codec
+#include "pycore_object.h" // _PyObject_GC_TRACK()
+#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
+#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
+#include "pycore_pystate.h" // _PyInterpreterState_GET()
+#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
+#include "stringlib/eq.h" // unicode_eq()
#ifdef MS_WINDOWS
#include <windows.h>
@@ -6344,7 +6344,7 @@ PyUnicode_AsUTF16String(PyObject *unicode)
/* --- Unicode Escape Codec ----------------------------------------------- */
-static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
+static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
PyObject *
_PyUnicode_DecodeUnicodeEscape(const char *s,
@@ -6497,11 +6497,11 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
/* \N{name} */
case 'N':
- if (ucnhash_CAPI == NULL) {
+ if (ucnhash_capi == NULL) {
/* load the unicode data module */
- ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
+ ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
- if (ucnhash_CAPI == NULL) {
+ if (ucnhash_capi == NULL) {
PyErr_SetString(
PyExc_UnicodeError,
"\\N escapes not supported (can't load unicodedata module)"
@@ -6523,7 +6523,8 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
s++;
ch = 0xffffffff; /* in case 'getcode' messes up */
if (namelen <= INT_MAX &&
- ucnhash_CAPI->getcode(NULL, start, (int)namelen,
+ ucnhash_capi->getcode(ucnhash_capi->state, NULL,
+ start, (int)namelen,
&ch, 0)) {
assert(ch <= MAX_UNICODE);
WRITE_CHAR(ch);
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 266a193..600f33b 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -196,6 +196,7 @@
<ClInclude Include="..\Include\internal\pycore_sysmodule.h" />
<ClInclude Include="..\Include\internal\pycore_traceback.h" />
<ClInclude Include="..\Include\internal\pycore_tuple.h" />
+ <ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
<ClInclude Include="..\Include\internal\pycore_unionobject.h" />
<ClInclude Include="..\Include\internal\pycore_warnings.h" />
<ClInclude Include="..\Include\interpreteridobject.h" />
@@ -252,7 +253,6 @@
<ClInclude Include="..\Include\traceback.h" />
<ClInclude Include="..\Include\tracemalloc.h" />
<ClInclude Include="..\Include\tupleobject.h" />
- <ClInclude Include="..\Include\ucnhash.h" />
<ClInclude Include="..\Include\unicodeobject.h" />
<ClInclude Include="..\Include\weakrefobject.h" />
<ClInclude Include="..\Modules\_math.h" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 22d9b79..75b91d8 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -273,9 +273,6 @@
<ClInclude Include="..\Include\tupleobject.h">
<Filter>Include</Filter>
</ClInclude>
- <ClInclude Include="..\Include\ucnhash.h">
- <Filter>Include</Filter>
- </ClInclude>
<ClInclude Include="..\Include\unicodeobject.h">
<Filter>Include</Filter>
</ClInclude>
@@ -573,6 +570,9 @@
<ClInclude Include="..\Include\internal\pycore_tuple.h">
<Filter>Include\internal</Filter>
</ClInclude>
+ <ClInclude Include="..\Include\internal\pycore_ucnhash.h">
+ <Filter>Include\internal</Filter>
+ </ClInclude>
<ClInclude Include="..\Include\internal\pycore_unionobject.h">
<Filter>Include\internal</Filter>
</ClInclude>
diff --git a/Python/codecs.c b/Python/codecs.c
index ade1418..62d1f3f 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -11,7 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
#include "Python.h"
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
#include "pycore_pystate.h" // _PyInterpreterState_GET()
-#include "ucnhash.h"
+#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include <ctype.h>
const char *Py_hexdigits = "0123456789abcdef";
@@ -954,7 +954,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
return Py_BuildValue("(Nn)", res, end);
}
-static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
+static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
{
@@ -976,17 +976,19 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- if (!ucnhash_CAPI) {
+ if (!ucnhash_capi) {
/* load the unicode data module */
- ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
+ ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
- if (!ucnhash_CAPI)
+ if (!ucnhash_capi) {
return NULL;
+ }
}
for (i = start, ressize = 0; i < end; ++i) {
/* object is guaranteed to be "ready" */
c = PyUnicode_READ_CHAR(object, i);
- if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
+ if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
+ c, buffer, sizeof(buffer), 1)) {
replsize = 1+1+1+(int)strlen(buffer)+1;
}
else if (c >= 0x10000) {
@@ -1009,7 +1011,8 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
i < end; ++i) {
c = PyUnicode_READ_CHAR(object, i);
*outp++ = '\\';
- if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
+ if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
+ c, buffer, sizeof(buffer), 1)) {
*outp++ = 'N';
*outp++ = '{';
strcpy((char *)outp, buffer);
diff --git a/setup.py b/setup.py
index d3fd7bc..8a4abe5 100644
--- a/setup.py
+++ b/setup.py
@@ -878,7 +878,8 @@ class PyBuildExt(build_ext):
self.add(Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']))
# static Unicode character database
self.add(Extension('unicodedata', ['unicodedata.c'],
- depends=['unicodedata_db.h', 'unicodename_db.h']))
+ depends=['unicodedata_db.h', 'unicodename_db.h'],
+ extra_compile_args=['-DPy_BUILD_CORE_MODULE']))
# _opcode module
self.add(Extension('_opcode', ['_opcode.c']))
# asyncio speedups