From 07f2cee93f1b619650403981c455f47bfed8d818 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 8 Jan 2021 00:15:22 +0100 Subject: bpo-42846: Convert CJK codec extensions to multiphase init (GH-24157) Convert the 6 CJK codec extension modules (_codecs_cn, _codecs_hk, _codecs_iso2022, _codecs_jp, _codecs_kr and _codecs_tw) to the multiphase initialization API (PEP 489). Remove getmultibytecodec() local cache: always import _multibytecodec. It should be uncommon to get a codec. For example, this function is only called once per CJK codec module. Fix a reference leak in register_maps() error path. --- Lib/test/test_multibytecodec.py | 26 ++++++++- .../2021-01-07-23-31-17.bpo-42846.kukDjw.rst | 3 + Modules/cjkcodecs/cjkcodecs.h | 68 +++++++++++++--------- 3 files changed, 66 insertions(+), 31 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-01-07-23-31-17.bpo-42846.kukDjw.rst diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index 7c3b67f..3efa150 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -3,11 +3,15 @@ # Unit test for multibytecodec itself # +import _multibytecodec +import codecs +import io +import sys +import textwrap +import unittest from test import support from test.support import os_helper from test.support.os_helper import TESTFN -import unittest, io, codecs, sys -import _multibytecodec ALL_CJKENCODINGS = [ # _codecs_cn @@ -205,6 +209,24 @@ class Test_IncrementalEncoder(unittest.TestCase): self.assertEqual(encoder.encode('\xff'), b'\\xff') self.assertEqual(encoder.encode('\n'), b'\n') + @support.cpython_only + def test_subinterp(self): + # bpo-42846: Test a CJK codec in a subinterpreter + import _testcapi + encoding = 'cp932' + text = "Python の開発は、1990 年ごろから開始されています。" + code = textwrap.dedent(""" + import codecs + encoding = %r + text = %r + encoder = codecs.getincrementalencoder(encoding)() + text2 = encoder.encode(text).decode(encoding) + if text2 != text: + raise ValueError(f"encoding issue: {text2!a} != {text!a}") + """) % (encoding, text) + res = _testcapi.run_in_subinterp(code) + self.assertEqual(res, 0) + class Test_IncrementalDecoder(unittest.TestCase): def test_dbcs(self): diff --git a/Misc/NEWS.d/next/Library/2021-01-07-23-31-17.bpo-42846.kukDjw.rst b/Misc/NEWS.d/next/Library/2021-01-07-23-31-17.bpo-42846.kukDjw.rst new file mode 100644 index 0000000..6f8a739 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-01-07-23-31-17.bpo-42846.kukDjw.rst @@ -0,0 +1,3 @@ +Convert the 6 CJK codec extension modules (_codecs_cn, _codecs_hk, +_codecs_iso2022, _codecs_jp, _codecs_kr and _codecs_tw) to the multiphase +initialization API (:pep:`489`). Patch by Victor Stinner. diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index e41755b..3b89bc9 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -245,15 +245,13 @@ static const struct dbcs_map *mapping_list; static PyObject * getmultibytecodec(void) { - static PyObject *cofunc = NULL; - - if (cofunc == NULL) { - PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec"); - if (mod == NULL) - return NULL; - cofunc = PyObject_GetAttrString(mod, "__create_codec"); - Py_DECREF(mod); + PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec"); + if (mod == NULL) { + return NULL; } + + PyObject *cofunc = PyObject_GetAttrString(mod, "__create_codec"); + Py_DECREF(mod); return cofunc; } @@ -297,10 +295,6 @@ getcodec(PyObject *self, PyObject *encoding) return r; } -static struct PyMethodDef __methods[] = { - {"getcodec", (PyCFunction)getcodec, METH_O, ""}, - {NULL, NULL}, -}; static int register_maps(PyObject *module) @@ -309,12 +303,17 @@ register_maps(PyObject *module) for (h = mapping_list; h->charset[0] != '\0'; h++) { char mhname[256] = "__map_"; - int r; strcpy(mhname + sizeof("__map_") - 1, h->charset); - r = PyModule_AddObject(module, mhname, - PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL)); - if (r == -1) + + PyObject *capsule = PyCapsule_New((void *)h, + PyMultibyteCodec_CAPSULE_NAME, NULL); + if (capsule == NULL) { + return -1; + } + if (PyModule_AddObject(module, mhname, capsule) < 0) { + Py_DECREF(capsule); return -1; + } } return 0; } @@ -395,25 +394,36 @@ errorexit: } #endif +static int +_cjk_exec(PyObject *module) +{ + return register_maps(module); +} + + +static struct PyMethodDef _cjk_methods[] = { + {"getcodec", (PyCFunction)getcodec, METH_O, ""}, + {NULL, NULL}, +}; + +static PyModuleDef_Slot _cjk_slots[] = { + {Py_mod_exec, _cjk_exec}, + {0, NULL} +}; + #define I_AM_A_MODULE_FOR(loc) \ - static struct PyModuleDef __module = { \ + static struct PyModuleDef _cjk_module = { \ PyModuleDef_HEAD_INIT, \ - "_codecs_"#loc, \ - NULL, \ - 0, \ - __methods, \ - NULL, \ - NULL, \ - NULL, \ - NULL \ + .m_name = "_codecs_"#loc, \ + .m_size = 0, \ + .m_methods = _cjk_methods, \ + .m_slots = _cjk_slots, \ }; \ + \ PyMODINIT_FUNC \ PyInit__codecs_##loc(void) \ { \ - PyObject *m = PyModule_Create(&__module); \ - if (m != NULL) \ - (void)register_maps(m); \ - return m; \ + return PyModuleDef_Init(&_cjk_module); \ } #endif -- cgit v0.12