summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorErlend E. Aasland <erlend.aasland@protonmail.com>2023-04-17 00:41:25 (GMT)
committerGitHub <noreply@github.com>2023-04-17 00:41:25 (GMT)
commit217911ede5d52b02b2e3c9222439e1ea08545291 (patch)
treec5ce2f736752084bece9e383458b443df69b6b97 /Modules
parentff3303e49c13495d8d9cf1dc0cf0624bbda1d3ae (diff)
downloadcpython-217911ede5d52b02b2e3c9222439e1ea08545291.zip
cpython-217911ede5d52b02b2e3c9222439e1ea08545291.tar.gz
cpython-217911ede5d52b02b2e3c9222439e1ea08545291.tar.bz2
gh-103583: Add codecs and maps to _codecs_* module state (#103540)
Diffstat (limited to 'Modules')
-rw-r--r--Modules/cjkcodecs/_codecs_cn.c4
-rw-r--r--Modules/cjkcodecs/_codecs_hk.c5
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c9
-rw-r--r--Modules/cjkcodecs/_codecs_jp.c13
-rw-r--r--Modules/cjkcodecs/_codecs_kr.c4
-rw-r--r--Modules/cjkcodecs/_codecs_tw.c4
-rw-r--r--Modules/cjkcodecs/cjkcodecs.h175
-rw-r--r--Modules/cjkcodecs/multibytecodec.c41
8 files changed, 161 insertions, 94 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
index 8a62f7e..e2c7908 100644
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -453,14 +453,14 @@ DECODER(hz)
}
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(4)
MAPPING_DECONLY(gb2312)
MAPPING_DECONLY(gbkext)
MAPPING_ENCONLY(gbcommon)
MAPPING_ENCDEC(gb18030ext)
END_MAPPINGS_LIST
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(4)
CODEC_STATELESS(gb2312)
CODEC_STATELESS(gbk)
CODEC_STATELESS(gb18030)
diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c
index 4f21569..43593b8 100644
--- a/Modules/cjkcodecs/_codecs_hk.c
+++ b/Modules/cjkcodecs/_codecs_hk.c
@@ -177,14 +177,13 @@ DECODER(big5hkscs)
return 0;
}
-
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(3)
MAPPING_DECONLY(big5hkscs)
MAPPING_ENCONLY(big5hkscs_bmp)
MAPPING_ENCONLY(big5hkscs_nonbmp)
END_MAPPINGS_LIST
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(1)
CODEC_STATELESS_WINIT(big5hkscs)
END_CODECS_LIST
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index 7394cf6..cf34752 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -1119,18 +1119,19 @@ static const struct iso2022_designation iso2022_jp_ext_designations[] = {
CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(0)
/* no mapping table here */
END_MAPPINGS_LIST
-#define ISO2022_CODEC(variation) { \
+#define ISO2022_CODEC(variation) \
+NEXT_CODEC = (MultibyteCodec){ \
"iso2022_" #variation, \
&iso2022_##variation##_config, \
iso2022_codec_init, \
_STATEFUL_METHODS(iso2022) \
-},
+};
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(7)
ISO2022_CODEC(kr)
ISO2022_CODEC(jp)
ISO2022_CODEC(jp_1)
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index 3a33295..7a8b78a 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -733,7 +733,7 @@ DECODER(shift_jis_2004)
}
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(11)
MAPPING_DECONLY(jisx0208)
MAPPING_DECONLY(jisx0212)
MAPPING_ENCONLY(jisxcommon)
@@ -747,14 +747,19 @@ BEGIN_MAPPINGS_LIST
MAPPING_ENCDEC(cp932ext)
END_MAPPINGS_LIST
-BEGIN_CODECS_LIST
+#define CODEC_CUSTOM(NAME, N, METH) \
+ NEXT_CODEC = (MultibyteCodec){NAME, (void *)N, NULL, _STATELESS_METHODS(METH)};
+
+BEGIN_CODECS_LIST(7)
CODEC_STATELESS(shift_jis)
CODEC_STATELESS(cp932)
CODEC_STATELESS(euc_jp)
CODEC_STATELESS(shift_jis_2004)
CODEC_STATELESS(euc_jis_2004)
- { "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
- { "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
+ CODEC_CUSTOM("euc_jisx0213", 2000, euc_jis_2004)
+ CODEC_CUSTOM("shift_jisx0213", 2000, shift_jis_2004)
END_CODECS_LIST
+#undef CODEC_CUSTOM
+
I_AM_A_MODULE_FOR(jp)
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c
index 72641e4..fd9a9fd 100644
--- a/Modules/cjkcodecs/_codecs_kr.c
+++ b/Modules/cjkcodecs/_codecs_kr.c
@@ -453,13 +453,13 @@ DECODER(johab)
#undef FILL
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(3)
MAPPING_DECONLY(ksx1001)
MAPPING_ENCONLY(cp949)
MAPPING_DECONLY(cp949ext)
END_MAPPINGS_LIST
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(3)
CODEC_STATELESS(euc_kr)
CODEC_STATELESS(cp949)
CODEC_STATELESS(johab)
diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c
index 722b26b..3e44099 100644
--- a/Modules/cjkcodecs/_codecs_tw.c
+++ b/Modules/cjkcodecs/_codecs_tw.c
@@ -130,12 +130,12 @@ DECODER(cp950)
-BEGIN_MAPPINGS_LIST
+BEGIN_MAPPINGS_LIST(2)
MAPPING_ENCDEC(big5)
MAPPING_ENCDEC(cp950ext)
END_MAPPINGS_LIST
-BEGIN_CODECS_LIST
+BEGIN_CODECS_LIST(2)
CODEC_STATELESS(big5)
CODEC_STATELESS(cp950)
END_CODECS_LIST
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
index d9aeec2..646a9fd 100644
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -60,8 +60,20 @@ struct pair_encodemap {
DBCHAR code;
};
-static const MultibyteCodec *codec_list;
-static const struct dbcs_map *mapping_list;
+typedef struct {
+ int num_mappings;
+ int num_codecs;
+ struct dbcs_map *mapping_list;
+ MultibyteCodec *codec_list;
+} cjkcodecs_module_state;
+
+static inline cjkcodecs_module_state *
+get_module_state(PyObject *mod)
+{
+ void *state = PyModule_GetState(mod);
+ assert(state != NULL);
+ return (cjkcodecs_module_state *)state;
+}
#define CODEC_INIT(encoding) \
static int encoding##_codec_init(const void *config)
@@ -202,16 +214,42 @@ static const struct dbcs_map *mapping_list;
#define TRYMAP_DEC(charset, assi, c1, c2) \
_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
-#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
-#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
-#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
-#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
-#define END_MAPPINGS_LIST \
- {"", NULL, NULL} }; \
- static const struct dbcs_map *mapping_list = \
- (const struct dbcs_map *)_mapping_list;
+#define BEGIN_MAPPINGS_LIST(NUM) \
+static int \
+add_mappings(cjkcodecs_module_state *st) \
+{ \
+ int idx = 0; \
+ (void)idx; \
+ st->num_mappings = NUM; \
+ st->mapping_list = PyMem_Calloc(NUM, sizeof(struct dbcs_map)); \
+ if (st->mapping_list == NULL) { \
+ return -1; \
+ }
+
+#define MAPPING_ENCONLY(enc) \
+ st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, NULL};
+#define MAPPING_DECONLY(enc) \
+ st->mapping_list[idx++] = (struct dbcs_map){#enc, NULL, (void*)enc##_decmap};
+#define MAPPING_ENCDEC(enc) \
+ st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, (void*)enc##_decmap};
+
+#define END_MAPPINGS_LIST \
+ assert(st->num_mappings == idx); \
+ return 0; \
+}
+
+#define BEGIN_CODECS_LIST(NUM) \
+static int \
+add_codecs(cjkcodecs_module_state *st) \
+{ \
+ int idx = 0; \
+ (void)idx; \
+ st->num_codecs = NUM; \
+ st->codec_list = PyMem_Calloc(NUM, sizeof(MultibyteCodec)); \
+ if (st->codec_list == NULL) { \
+ return -1; \
+ }
-#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
#define _STATEFUL_METHODS(enc) \
enc##_encode, \
enc##_encode_init, \
@@ -222,23 +260,21 @@ static const struct dbcs_map *mapping_list;
#define _STATELESS_METHODS(enc) \
enc##_encode, NULL, NULL, \
enc##_decode, NULL, NULL,
-#define CODEC_STATEFUL(enc) { \
- #enc, NULL, NULL, \
- _STATEFUL_METHODS(enc) \
-},
-#define CODEC_STATELESS(enc) { \
- #enc, NULL, NULL, \
- _STATELESS_METHODS(enc) \
-},
-#define CODEC_STATELESS_WINIT(enc) { \
- #enc, NULL, \
- enc##_codec_init, \
- _STATELESS_METHODS(enc) \
-},
-#define END_CODECS_LIST \
- {"", NULL,} }; \
- static const MultibyteCodec *codec_list = \
- (const MultibyteCodec *)_codec_list;
+
+#define NEXT_CODEC \
+ st->codec_list[idx++]
+
+#define CODEC_STATEFUL(enc) \
+ NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATEFUL_METHODS(enc)};
+#define CODEC_STATELESS(enc) \
+ NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATELESS_METHODS(enc)};
+#define CODEC_STATELESS_WINIT(enc) \
+ NEXT_CODEC = (MultibyteCodec){#enc, NULL, enc##_codec_init, _STATELESS_METHODS(enc)};
+
+#define END_CODECS_LIST \
+ assert(st->num_codecs == idx); \
+ return 0; \
+}
@@ -249,53 +285,70 @@ getmultibytecodec(void)
}
static PyObject *
-getcodec(PyObject *self, PyObject *encoding)
+_getcodec(const MultibyteCodec *codec)
{
- PyObject *codecobj, *r, *cofunc;
- const MultibyteCodec *codec;
- const char *enc;
-
- if (!PyUnicode_Check(encoding)) {
- PyErr_SetString(PyExc_TypeError,
- "encoding name must be a string.");
+ PyObject *cofunc = getmultibytecodec();
+ if (cofunc == NULL) {
return NULL;
}
- enc = PyUnicode_AsUTF8(encoding);
- if (enc == NULL)
- return NULL;
- cofunc = getmultibytecodec();
- if (cofunc == NULL)
+ PyObject *codecobj = PyCapsule_New((void *)codec,
+ PyMultibyteCodec_CAPSULE_NAME,
+ NULL);
+ if (codecobj == NULL) {
+ Py_DECREF(cofunc);
return NULL;
+ }
- for (codec = codec_list; codec->encoding[0]; codec++)
- if (strcmp(codec->encoding, enc) == 0)
- break;
+ PyObject *res = PyObject_CallOneArg(cofunc, codecobj);
+ Py_DECREF(codecobj);
+ Py_DECREF(cofunc);
+ return res;
+}
- if (codec->encoding[0] == '\0') {
- PyErr_SetString(PyExc_LookupError,
- "no such codec is supported.");
+static PyObject *
+getcodec(PyObject *self, PyObject *encoding)
+{
+ if (!PyUnicode_Check(encoding)) {
+ PyErr_SetString(PyExc_TypeError,
+ "encoding name must be a string.");
return NULL;
}
-
- codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
- if (codecobj == NULL)
+ const char *enc = PyUnicode_AsUTF8(encoding);
+ if (enc == NULL) {
return NULL;
+ }
- r = PyObject_CallOneArg(cofunc, codecobj);
- Py_DECREF(codecobj);
- Py_DECREF(cofunc);
+ cjkcodecs_module_state *st = get_module_state(self);
+ for (int i = 0; i < st->num_codecs; i++) {
+ const MultibyteCodec *codec = &st->codec_list[i];
+ if (strcmp(codec->encoding, enc) == 0) {
+ return _getcodec(codec);
+ }
+ }
- return r;
+ PyErr_SetString(PyExc_LookupError,
+ "no such codec is supported.");
+ return NULL;
}
+static int add_mappings(cjkcodecs_module_state *);
+static int add_codecs(cjkcodecs_module_state *);
static int
register_maps(PyObject *module)
{
- const struct dbcs_map *h;
+ // Init module state.
+ cjkcodecs_module_state *st = get_module_state(module);
+ if (add_mappings(st) < 0) {
+ return -1;
+ }
+ if (add_codecs(st) < 0) {
+ return -1;
+ }
- for (h = mapping_list; h->charset[0] != '\0'; h++) {
+ for (int i = 0; i < st->num_mappings; i++) {
+ const struct dbcs_map *h = &st->mapping_list[i];
char mhname[256] = "__map_";
strcpy(mhname + sizeof("__map_") - 1, h->charset);
@@ -394,6 +447,13 @@ _cjk_exec(PyObject *module)
return register_maps(module);
}
+static void
+_cjk_free(void *mod)
+{
+ cjkcodecs_module_state *st = get_module_state((PyObject *)mod);
+ PyMem_Free(st->mapping_list);
+ PyMem_Free(st->codec_list);
+}
static struct PyMethodDef _cjk_methods[] = {
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
@@ -409,9 +469,10 @@ static PyModuleDef_Slot _cjk_slots[] = {
static struct PyModuleDef _cjk_module = { \
PyModuleDef_HEAD_INIT, \
.m_name = "_codecs_"#loc, \
- .m_size = 0, \
+ .m_size = sizeof(cjkcodecs_module_state), \
.m_methods = _cjk_methods, \
.m_slots = _cjk_slots, \
+ .m_free = _cjk_free, \
}; \
\
PyMODINIT_FUNC \
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index 8564494..55778cd 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -19,26 +19,27 @@ typedef struct {
PyTypeObject *writer_type;
PyTypeObject *multibytecodec_type;
PyObject *str_write;
-} _multibytecodec_state;
+} module_state;
-static _multibytecodec_state *
-_multibytecodec_get_state(PyObject *module)
+static module_state *
+get_module_state(PyObject *module)
{
- _multibytecodec_state *state = PyModule_GetState(module);
+ module_state *state = PyModule_GetState(module);
assert(state != NULL);
return state;
}
static struct PyModuleDef _multibytecodecmodule;
-static _multibytecodec_state *
-_multibyte_codec_find_state_by_type(PyTypeObject *type)
+
+static module_state *
+find_state_by_def(PyTypeObject *type)
{
PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
assert(module != NULL);
- return _multibytecodec_get_state(module);
+ return get_module_state(module);
}
-#define clinic_get_state() _multibyte_codec_find_state_by_type(type)
+#define clinic_get_state() find_state_by_def(type)
/*[clinic input]
module _multibytecodec
class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
@@ -1040,7 +1041,7 @@ mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (codec == NULL)
goto errorexit;
- _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
+ module_state *state = find_state_by_def(type);
if (!MultibyteCodec_Check(state, codec)) {
PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
goto errorexit;
@@ -1315,7 +1316,7 @@ mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (codec == NULL)
goto errorexit;
- _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
+ module_state *state = find_state_by_def(type);
if (!MultibyteCodec_Check(state, codec)) {
PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
goto errorexit;
@@ -1630,7 +1631,7 @@ mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (codec == NULL)
goto errorexit;
- _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
+ module_state *state = find_state_by_def(type);
if (!MultibyteCodec_Check(state, codec)) {
PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
goto errorexit;
@@ -1735,7 +1736,7 @@ _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *se
PyObject *strobj)
/*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
{
- _multibytecodec_state *state = PyType_GetModuleState(cls);
+ module_state *state = PyType_GetModuleState(cls);
assert(state != NULL);
if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
return NULL;
@@ -1766,7 +1767,7 @@ _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObjec
return NULL;
}
- _multibytecodec_state *state = PyType_GetModuleState(cls);
+ module_state *state = PyType_GetModuleState(cls);
assert(state != NULL);
for (i = 0; i < PySequence_Length(lines); i++) {
/* length can be changed even within this loop */
@@ -1817,7 +1818,7 @@ _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *se
assert(PyBytes_Check(pwrt));
- _multibytecodec_state *state = PyType_GetModuleState(cls);
+ module_state *state = PyType_GetModuleState(cls);
assert(state != NULL);
if (PyBytes_Size(pwrt) > 0) {
@@ -1853,7 +1854,7 @@ mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (codec == NULL)
goto errorexit;
- _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
+ module_state *state = find_state_by_def(type);
if (!MultibyteCodec_Check(state, codec)) {
PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
goto errorexit;
@@ -1963,7 +1964,7 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg)
if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
return NULL;
- _multibytecodec_state *state = _multibytecodec_get_state(module);
+ module_state *state = get_module_state(module);
self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
if (self == NULL)
return NULL;
@@ -1976,7 +1977,7 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg)
static int
_multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
{
- _multibytecodec_state *state = _multibytecodec_get_state(mod);
+ module_state *state = get_module_state(mod);
Py_VISIT(state->multibytecodec_type);
Py_VISIT(state->encoder_type);
Py_VISIT(state->decoder_type);
@@ -1988,7 +1989,7 @@ _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
static int
_multibytecodec_clear(PyObject *mod)
{
- _multibytecodec_state *state = _multibytecodec_get_state(mod);
+ module_state *state = get_module_state(mod);
Py_CLEAR(state->multibytecodec_type);
Py_CLEAR(state->encoder_type);
Py_CLEAR(state->decoder_type);
@@ -2022,7 +2023,7 @@ _multibytecodec_free(void *mod)
static int
_multibytecodec_exec(PyObject *mod)
{
- _multibytecodec_state *state = _multibytecodec_get_state(mod);
+ module_state *state = get_module_state(mod);
state->str_write = PyUnicode_InternFromString("write");
if (state->str_write == NULL) {
return -1;
@@ -2056,7 +2057,7 @@ static PyModuleDef_Slot _multibytecodec_slots[] = {
static struct PyModuleDef _multibytecodecmodule = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_multibytecodec",
- .m_size = sizeof(_multibytecodec_state),
+ .m_size = sizeof(module_state),
.m_methods = _multibytecodec_methods,
.m_slots = _multibytecodec_slots,
.m_traverse = _multibytecodec_traverse,