diff options
author | Victor Stinner <vstinner@python.org> | 2020-10-26 18:19:36 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-26 18:19:36 (GMT) |
commit | 920cb647ba23feab7987d0dac1bd63bfc2ffc4c0 (patch) | |
tree | 9ce3f058110f5fef8469d7a4421adc7d66fe85a8 | |
parent | 8374d2ee1589791be8892b00f4bbf8121dde24bd (diff) | |
download | cpython-920cb647ba23feab7987d0dac1bd63bfc2ffc4c0.zip cpython-920cb647ba23feab7987d0dac1bd63bfc2ffc4c0.tar.gz cpython-920cb647ba23feab7987d0dac1bd63bfc2ffc4c0.tar.bz2 |
bpo-42157: unicodedata avoids references to UCD_Type (GH-22990)
* UCD_Check() uses PyModule_Check()
* Simplify the internal _PyUnicode_Name_CAPI structure:
* Remove size and state members
* Remove state and self parameters of getcode() and getname()
functions
* Remove global_module_state
-rw-r--r-- | Doc/whatsnew/3.10.rst | 6 | ||||
-rw-r--r-- | Include/internal/pycore_ucnhash.h | 24 | ||||
-rw-r--r-- | Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst | 4 | ||||
-rw-r--r-- | Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-42157.e3BcPM.rst | 3 | ||||
-rw-r--r-- | Modules/unicodedata.c | 216 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 3 | ||||
-rw-r--r-- | Python/codecs.c | 6 |
7 files changed, 126 insertions, 136 deletions
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 581d3a5..2ef2b5d 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -408,10 +408,8 @@ Porting to Python 3.10 (Contributed by Inada Naoki in :issue:`36346`.) * The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API - ``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, - the structure gets a new ``state`` member which must be passed to the - ``getcode()`` and ``getname()`` functions. - (Contributed by Victor Stinner in :issue:`1635741`.) + ``unicodedata.ucnhash_CAPI`` moves to the internal C API. + (Contributed by Victor Stinner in :issue:`42157`.) Deprecated ---------- diff --git a/Include/internal/pycore_ucnhash.h b/Include/internal/pycore_ucnhash.h index 380b941..5e7c035 100644 --- a/Include/internal/pycore_ucnhash.h +++ b/Include/internal/pycore_ucnhash.h @@ -15,25 +15,15 @@ extern "C" { typedef struct { - /* Size of this struct */ - int size; - - // state which must be passed as the first parameter to getname() - // and getcode() - void *state; - - /* Get name for a given character code. Returns non-zero if - success, zero if not. Does not set Python exceptions. - If self is NULL, data come from the default version of the database. - If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */ - int (*getname)(void *state, PyObject *self, Py_UCS4 code, - char* buffer, int buflen, + /* Get name for a given character code. + Returns non-zero if success, zero if not. + Does not set Python exceptions. */ + int (*getname)(Py_UCS4 code, char* buffer, int buflen, int with_alias_and_seq); - /* Get character code for a given name. Same error handling - as for getname. */ - int (*getcode)(void *state, PyObject *self, - const char* name, int namelen, Py_UCS4* code, + /* Get character code for a given name. + Same error handling as for getname(). */ + int (*getcode)(const char* name, int namelen, Py_UCS4* code, int with_named_seq); } _PyUnicode_Name_CAPI; diff --git a/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst deleted file mode 100644 index 5272ad5..0000000 --- a/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst +++ /dev/null @@ -1,4 +0,0 @@ -The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API -``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the -structure gets a new ``state`` member which must be passed to the -``getcode()`` and ``getname()`` functions. Patch by Victor Stinner. diff --git a/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-42157.e3BcPM.rst b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-42157.e3BcPM.rst new file mode 100644 index 0000000..1f05186 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-42157.e3BcPM.rst @@ -0,0 +1,3 @@ +The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API +``unicodedata.ucnhash_CAPI`` moves to the internal C API. +Patch by Victor Stinner. diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index bfd8ab5..6c802ba 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -93,29 +93,19 @@ static PyMemberDef DB_members[] = { /* forward declaration */ static PyTypeObject UCD_Type; -typedef struct { - // Borrowed reference to &UCD_Type. It is used to prepare the code - // to convert the UCD_Type static type to a heap type. - PyTypeObject *ucd_type; - - _PyUnicode_Name_CAPI capi; -} unicodedata_module_state; - -// bpo-1635741: Temporary global state until the unicodedata module -// gets a real module state. -static unicodedata_module_state global_module_state; - -// Check if self is an instance of ucd_type. -// Return 0 if self is NULL (when the PyCapsule C API is used). -#define UCD_Check(self, ucd_type) (self != NULL && Py_IS_TYPE(self, ucd_type)) +// Check if self is an unicodedata.UCD instance. +// If self is NULL (when the PyCapsule C API is used), return 0. +// PyModule_Check() is used to avoid having to retrieve the ucd_type. +// See unicodedata_functions comment to the rationale of this macro. +#define UCD_Check(self) (self != NULL && !PyModule_Check(self)) static PyObject* -new_previous_version(unicodedata_module_state *state, +new_previous_version(PyTypeObject *ucd_type, const char*name, const change_record* (*getrecord)(Py_UCS4), Py_UCS4 (*normalization)(Py_UCS4)) { PreviousDBVersion *self; - self = PyObject_New(PreviousDBVersion, state->ucd_type); + self = PyObject_New(PreviousDBVersion, ucd_type); if (self == NULL) return NULL; self->name = name; @@ -147,12 +137,11 @@ unicodedata_UCD_decimal_impl(PyObject *self, int chr, PyObject *default_value) /*[clinic end generated code: output=be23376e1a185231 input=933f8107993f23d0]*/ { - unicodedata_module_state *state = &global_module_state; int have_old = 0; long rc; Py_UCS4 c = (Py_UCS4)chr; - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed == 0) { /* unassigned */ @@ -236,12 +225,11 @@ unicodedata_UCD_numeric_impl(PyObject *self, int chr, PyObject *default_value) /*[clinic end generated code: output=53ce281fe85b10c4 input=fdf5871a5542893c]*/ { - unicodedata_module_state *state = &global_module_state; int have_old = 0; double rc; Py_UCS4 c = (Py_UCS4)chr; - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed == 0) { /* unassigned */ @@ -283,11 +271,10 @@ static PyObject * unicodedata_UCD_category_impl(PyObject *self, int chr) /*[clinic end generated code: output=8571539ee2e6783a input=27d6f3d85050bc06]*/ { - unicodedata_module_state *state = &global_module_state; int index; Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->category; - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed != 0xFF) index = old->category_changed; @@ -311,11 +298,10 @@ static PyObject * unicodedata_UCD_bidirectional_impl(PyObject *self, int chr) /*[clinic end generated code: output=d36310ce2039bb92 input=b3d8f42cebfcf475]*/ { - unicodedata_module_state *state = &global_module_state; int index; Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->bidirectional; - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed == 0) index = 0; /* unassigned */ @@ -341,11 +327,10 @@ static int unicodedata_UCD_combining_impl(PyObject *self, int chr) /*[clinic end generated code: output=cad056d0cb6a5920 input=9f2d6b2a95d0a22a]*/ { - unicodedata_module_state *state = &global_module_state; int index; Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->combining; - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed == 0) index = 0; /* unassigned */ @@ -370,11 +355,10 @@ static int unicodedata_UCD_mirrored_impl(PyObject *self, int chr) /*[clinic end generated code: output=2532dbf8121b50e6 input=5dd400d351ae6f3b]*/ { - unicodedata_module_state *state = &global_module_state; int index; Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->mirrored; - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed == 0) index = 0; /* unassigned */ @@ -398,11 +382,10 @@ static PyObject * unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr) /*[clinic end generated code: output=484e8537d9ee8197 input=c4854798aab026e0]*/ { - unicodedata_module_state *state = &global_module_state; int index; Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->east_asian_width; - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed == 0) index = 0; /* unassigned */ @@ -428,7 +411,6 @@ static PyObject * unicodedata_UCD_decomposition_impl(PyObject *self, int chr) /*[clinic end generated code: output=7d699f3ec7565d27 input=e4c12459ad68507b]*/ { - unicodedata_module_state *state = &global_module_state; char decomp[256]; int code, index, count; size_t i; @@ -437,7 +419,7 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr) code = (int)c; - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed == 0) return PyUnicode_FromString(""); /* unassigned */ @@ -480,13 +462,14 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr) } static void -get_decomp_record(unicodedata_module_state *state, PyObject *self, - Py_UCS4 code, int *index, int *prefix, int *count) +get_decomp_record(PyObject *self, Py_UCS4 code, + int *index, int *prefix, int *count) { if (code >= 0x110000) { *index = 0; - } else if (UCD_Check(self, state->ucd_type) && - get_old_record(self, code)->category_changed==0) { + } + else if (UCD_Check(self) + && get_old_record(self, code)->category_changed==0) { /* unassigned in old version */ *index = 0; } @@ -515,8 +498,7 @@ get_decomp_record(unicodedata_module_state *state, PyObject *self, #define SCount (LCount*NCount) static PyObject* -nfd_nfkd(unicodedata_module_state *state, PyObject *self, - PyObject *input, int k) +nfd_nfkd(PyObject *self, PyObject *input, int k) { PyObject *result; Py_UCS4 *output; @@ -584,7 +566,7 @@ nfd_nfkd(unicodedata_module_state *state, PyObject *self, continue; } /* normalization changes */ - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { Py_UCS4 value = ((PreviousDBVersion*)self)->normalization(code); if (value != 0) { stack[stackptr++] = value; @@ -593,7 +575,7 @@ nfd_nfkd(unicodedata_module_state *state, PyObject *self, } /* Other decompositions. */ - get_decomp_record(state, self, code, &index, &prefix, &count); + get_decomp_record(self, code, &index, &prefix, &count); /* Copy character if it is not decomposable, or has a compatibility decomposition, but we do NFD. */ @@ -665,7 +647,7 @@ find_nfc_index(const struct reindex* nfc, Py_UCS4 code) } static PyObject* -nfc_nfkc(unicodedata_module_state *state, PyObject *self, PyObject *input, int k) +nfc_nfkc(PyObject *self, PyObject *input, int k) { PyObject *result; int kind; @@ -677,7 +659,7 @@ nfc_nfkc(unicodedata_module_state *state, PyObject *self, PyObject *input, int k Py_ssize_t skipped[20]; int cskipped = 0; - result = nfd_nfkd(state, self, input, k); + result = nfd_nfkd(self, input, k); if (!result) return NULL; /* result will be "ready". */ @@ -820,13 +802,13 @@ typedef enum {YES = 0, MAYBE = 1, NO = 2} QuickcheckResult; * https://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms */ static QuickcheckResult -is_normalized_quickcheck(unicodedata_module_state *state, PyObject *self, - PyObject *input, bool nfc, bool k, bool yes_only) +is_normalized_quickcheck(PyObject *self, PyObject *input, bool nfc, bool k, + bool yes_only) { - /* An older version of the database is requested, quickchecks must be - disabled. */ - if (UCD_Check(self, state->ucd_type)) + /* UCD 3.2.0 is requested, quickchecks must be disabled. */ + if (UCD_Check(self)) { return NO; + } Py_ssize_t i, len; int kind; @@ -885,7 +867,6 @@ unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form, PyObject *input) /*[clinic end generated code: output=11e5a3694e723ca5 input=a544f14cea79e508]*/ { - unicodedata_module_state *state = &global_module_state; if (PyUnicode_READY(input) == -1) { return NULL; } @@ -921,10 +902,10 @@ unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form, return NULL; } - m = is_normalized_quickcheck(state, self, input, nfc, k, false); + m = is_normalized_quickcheck(self, input, nfc, k, false); if (m == MAYBE) { - cmp = (nfc ? nfc_nfkc : nfd_nfkd)(state, self, input, k); + cmp = (nfc ? nfc_nfkc : nfd_nfkd)(self, input, k); if (cmp == NULL) { return NULL; } @@ -959,7 +940,6 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form, PyObject *input) /*[clinic end generated code: output=05ca4385a2ad6983 input=3a5206c0ad2833fb]*/ { - unicodedata_module_state *state = &global_module_state; if (PyUnicode_GET_LENGTH(input) == 0) { /* Special case empty input strings, since resizing them later would cause internal errors. */ @@ -968,36 +948,36 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form, } if (_PyUnicode_EqualToASCIIId(form, &PyId_NFC)) { - if (is_normalized_quickcheck(state, self, input, + if (is_normalized_quickcheck(self, input, true, false, true) == YES) { Py_INCREF(input); return input; } - return nfc_nfkc(state, self, input, 0); + return nfc_nfkc(self, input, 0); } if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKC)) { - if (is_normalized_quickcheck(state, self, input, + if (is_normalized_quickcheck(self, input, true, true, true) == YES) { Py_INCREF(input); return input; } - return nfc_nfkc(state, self, input, 1); + return nfc_nfkc(self, input, 1); } if (_PyUnicode_EqualToASCIIId(form, &PyId_NFD)) { - if (is_normalized_quickcheck(state, self, input, + if (is_normalized_quickcheck(self, input, false, false, true) == YES) { Py_INCREF(input); return input; } - return nfd_nfkd(state, self, input, 0); + return nfd_nfkd(self, input, 0); } if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKD)) { - if (is_normalized_quickcheck(state, self, input, + if (is_normalized_quickcheck(self, input, false, true, true) == YES) { Py_INCREF(input); return input; } - return nfd_nfkd(state, self, input, 1); + return nfd_nfkd(self, input, 1); } PyErr_SetString(PyExc_ValueError, "invalid normalization form"); return NULL; @@ -1080,7 +1060,7 @@ is_unified_ideograph(Py_UCS4 code) (cp < named_sequences_end)) static int -_getucname(unicodedata_module_state *state, PyObject *self, +_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen, int with_alias_and_seq) { /* Find the name associated with the given code point. @@ -1098,7 +1078,7 @@ _getucname(unicodedata_module_state *state, PyObject *self, if (!with_alias_and_seq && (IS_ALIAS(code) || IS_NAMED_SEQ(code))) return 0; - if (UCD_Check(self, state->ucd_type)) { + if (UCD_Check(self)) { /* in 3.2.0 there are no aliases and named sequences */ const change_record *old; if (IS_ALIAS(code) || IS_NAMED_SEQ(code)) @@ -1182,23 +1162,21 @@ _getucname(unicodedata_module_state *state, PyObject *self, } static int -capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code, +capi_getucname(Py_UCS4 code, char* buffer, int buflen, int with_alias_and_seq) { - unicodedata_module_state *state = (unicodedata_module_state *)state_raw; - return _getucname(state, self, code, buffer, buflen, with_alias_and_seq); + return _getucname(NULL, code, buffer, buflen, with_alias_and_seq); } static int -_cmpname(unicodedata_module_state *state, PyObject *self, - int code, const char* name, int namelen) +_cmpname(PyObject *self, int code, const char* name, int namelen) { /* check if code corresponds to the given name */ int i; char buffer[NAME_MAXLEN+1]; - if (!_getucname(state, self, code, buffer, NAME_MAXLEN, 1)) + if (!_getucname(self, code, buffer, NAME_MAXLEN, 1)) return 0; for (i = 0; i < namelen; i++) { if (Py_TOUPPER(name[i]) != buffer[i]) @@ -1243,7 +1221,7 @@ _check_alias_and_seq(unsigned int cp, Py_UCS4* code, int with_named_seq) } static int -_getcode(unicodedata_module_state *state, PyObject* self, +_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code, int with_named_seq) { /* Return the code point associated with the given name. @@ -1305,7 +1283,7 @@ _getcode(unicodedata_module_state *state, PyObject* self, v = code_hash[i]; if (!v) return 0; - if (_cmpname(state, self, v, name, namelen)) { + if (_cmpname(self, v, name, namelen)) { return _check_alias_and_seq(v, code, with_named_seq); } incr = (h ^ (h >> 3)) & mask; @@ -1316,7 +1294,7 @@ _getcode(unicodedata_module_state *state, PyObject* self, v = code_hash[i]; if (!v) return 0; - if (_cmpname(state, self, v, name, namelen)) { + if (_cmpname(self, v, name, namelen)) { return _check_alias_and_seq(v, code, with_named_seq); } incr = incr << 1; @@ -1326,15 +1304,20 @@ _getcode(unicodedata_module_state *state, PyObject* self, } static int -capi_getcode(void *state_raw, PyObject* self, - const char* name, int namelen, Py_UCS4* code, +capi_getcode(const char* name, int namelen, Py_UCS4* code, int with_named_seq) { - unicodedata_module_state *state = (unicodedata_module_state *)state_raw; - return _getcode(state, self, name, namelen, code, with_named_seq); + return _getcode(NULL, name, namelen, code, with_named_seq); } +static const _PyUnicode_Name_CAPI unicodedata_capi = +{ + .getname = capi_getucname, + .getcode = capi_getcode, +}; + + /* -------------------------------------------------------------------- */ /* Python bindings */ @@ -1356,11 +1339,10 @@ static PyObject * unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value) /*[clinic end generated code: output=6bbb37a326407707 input=3e0367f534de56d9]*/ { - unicodedata_module_state *state = &global_module_state; char name[NAME_MAXLEN+1]; Py_UCS4 c = (Py_UCS4)chr; - if (!_getucname(state, self, c, name, NAME_MAXLEN, 0)) { + if (!_getucname(self, c, name, NAME_MAXLEN, 0)) { if (default_value == NULL) { PyErr_SetString(PyExc_ValueError, "no such name"); return NULL; @@ -1392,7 +1374,6 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name, Py_ssize_clean_t name_length) /*[clinic end generated code: output=765cb8186788e6be input=a557be0f8607a0d6]*/ { - unicodedata_module_state *state = &global_module_state; Py_UCS4 code; unsigned int index; if (name_length > NAME_MAXLEN) { @@ -1400,7 +1381,7 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name, return NULL; } - if (!_getcode(state, self, name, (int)name_length, &code, 1)) { + if (!_getcode(self, name, (int)name_length, &code, 1)) { PyErr_Format(PyExc_KeyError, "undefined character name '%s'", name); return NULL; } @@ -1415,8 +1396,10 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name, return PyUnicode_FromOrdinal(code); } -/* XXX Add doc strings. */ - +// List of functions used to define module functions *AND* unicodedata.UCD +// methods. For module functions, self is the module. For UCD methods, self +// is an UCD instance. The UCD_Check() macro is used to check if self is +// an UCD instance. static PyMethodDef unicodedata_functions[] = { UNICODEDATA_UCD_DECIMAL_METHODDEF UNICODEDATA_UCD_DIGIT_METHODDEF @@ -1501,41 +1484,64 @@ static struct PyModuleDef unicodedatamodule = { NULL }; -PyMODINIT_FUNC -PyInit_unicodedata(void) -{ - PyObject *m, *v; - unicodedata_module_state *state = &global_module_state; - - state->capi.size = sizeof(_PyUnicode_Name_CAPI); - state->capi.state = state; - state->capi.getname = capi_getucname; - state->capi.getcode = capi_getcode; +static int +unicodedata_exec(PyObject *module) +{ Py_SET_TYPE(&UCD_Type, &PyType_Type); - state->ucd_type = &UCD_Type; + PyTypeObject *ucd_type = &UCD_Type; - m = PyModule_Create(&unicodedatamodule); - if (!m) - return NULL; + if (PyModule_AddStringConstant(module, "unidata_version", UNIDATA_VERSION) < 0) { + return -1; + } - PyModule_AddStringConstant(m, "unidata_version", UNIDATA_VERSION); - Py_INCREF(state->ucd_type); - PyModule_AddObject(m, "UCD", (PyObject*)state->ucd_type); + if (PyModule_AddType(module, ucd_type) < 0) { + return -1; + } /* Previous versions */ - v = new_previous_version(state, "3.2.0", + PyObject *v; + v = new_previous_version(ucd_type, "3.2.0", get_change_3_2_0, normalization_3_2_0); - if (v != NULL) - PyModule_AddObject(m, "ucd_3_2_0", v); + if (v == NULL) { + return -1; + } + if (PyModule_AddObject(module, "ucd_3_2_0", v) < 0) { + Py_DECREF(v); + return -1; + } /* Export C API */ - v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL); - if (v != NULL) - PyModule_AddObject(m, "ucnhash_CAPI", v); - return m; + v = PyCapsule_New((void *)&unicodedata_capi, PyUnicodeData_CAPSULE_NAME, + NULL); + if (v == NULL) { + return -1; + } + if (PyModule_AddObject(module, "ucnhash_CAPI", v) < 0) { + Py_DECREF(v); + return -1; + } + return 0; +} + + +PyMODINIT_FUNC +PyInit_unicodedata(void) +{ + PyObject *module = PyModule_Create(&unicodedatamodule); + if (!module) { + return NULL; + } + + if (unicodedata_exec(module) < 0) { + Py_DECREF(module); + return NULL; + } + + return module; } + /* Local variables: c-basic-offset: 4 diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ba48d35..9058018 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6523,8 +6523,7 @@ _PyUnicode_DecodeUnicodeEscape(const char *s, s++; ch = 0xffffffff; /* in case 'getcode' messes up */ if (namelen <= INT_MAX && - ucnhash_capi->getcode(ucnhash_capi->state, NULL, - start, (int)namelen, + ucnhash_capi->getcode(start, (int)namelen, &ch, 0)) { assert(ch <= MAX_UNICODE); WRITE_CHAR(ch); diff --git a/Python/codecs.c b/Python/codecs.c index 62d1f3f..fa329ce 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -987,8 +987,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) for (i = start, ressize = 0; i < end; ++i) { /* object is guaranteed to be "ready" */ c = PyUnicode_READ_CHAR(object, i); - if (ucnhash_capi->getname(ucnhash_capi->state, NULL, - c, buffer, sizeof(buffer), 1)) { + if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { replsize = 1+1+1+(int)strlen(buffer)+1; } else if (c >= 0x10000) { @@ -1011,8 +1010,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) i < end; ++i) { c = PyUnicode_READ_CHAR(object, i); *outp++ = '\\'; - if (ucnhash_capi->getname(ucnhash_capi->state, NULL, - c, buffer, sizeof(buffer), 1)) { + if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { *outp++ = 'N'; *outp++ = '{'; strcpy((char *)outp, buffer); |