summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/internal/pycore_codecs.h31
-rw-r--r--Include/internal/pycore_interp.h6
-rw-r--r--Objects/unicodeobject.c6
-rw-r--r--Python/codecs.c150
-rw-r--r--Python/pystate.c4
-rw-r--r--Tools/c-analyzer/cpython/ignored.tsv2
6 files changed, 120 insertions, 79 deletions
diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h
index a2a7151..5e2d5c5 100644
--- a/Include/internal/pycore_codecs.h
+++ b/Include/internal/pycore_codecs.h
@@ -8,6 +8,17 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif
+#include "pycore_lock.h" // PyMutex
+
+/* Initialize codecs-related state for the given interpreter, including
+ registering the first codec search function. Must be called before any other
+ PyCodec-related functions, and while only one thread is active. */
+extern PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp);
+
+/* Finalize codecs-related state for the given interpreter. No PyCodec-related
+ functions other than PyCodec_Unregister() may be called after this. */
+extern void _PyCodec_Fini(PyInterpreterState *interp);
+
extern PyObject* _PyCodec_Lookup(const char *encoding);
/* Text codec specific encoding and decoding API.
@@ -48,6 +59,26 @@ extern PyObject* _PyCodecInfo_GetIncrementalEncoder(
PyObject *codec_info,
const char *errors);
+// Per-interpreter state used by codecs.c.
+struct codecs_state {
+ // A list of callable objects used to search for codecs.
+ PyObject *search_path;
+
+ // A dict mapping codec names to codecs returned from a callable in
+ // search_path.
+ PyObject *search_cache;
+
+ // A dict mapping error handling strategies to functions to implement them.
+ PyObject *error_registry;
+
+#ifdef Py_GIL_DISABLED
+ // Used to safely delete a specific item from search_path.
+ PyMutex search_path_mutex;
+#endif
+
+ // Whether or not the rest of the state is initialized.
+ int initialized;
+};
#ifdef __cplusplus
}
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index d38959e..a2aec6d 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -14,6 +14,7 @@ extern "C" {
#include "pycore_atexit.h" // struct atexit_state
#include "pycore_ceval_state.h" // struct _ceval_state
#include "pycore_code.h" // struct callable_cache
+#include "pycore_codecs.h" // struct codecs_state
#include "pycore_context.h" // struct _Py_context_state
#include "pycore_crossinterp.h" // struct _xidregistry
#include "pycore_dict_state.h" // struct _Py_dict_state
@@ -182,10 +183,7 @@ struct _is {
possible to facilitate out-of-process observability
tools. */
- PyObject *codec_search_path;
- PyObject *codec_search_cache;
- PyObject *codec_error_registry;
- int codecs_initialized;
+ struct codecs_state codecs;
PyConfig config;
unsigned long feature_flags;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2c259b7..67b1282 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -15441,7 +15441,11 @@ init_fs_encoding(PyThreadState *tstate)
PyStatus
_PyUnicode_InitEncodings(PyThreadState *tstate)
{
- PyStatus status = init_fs_encoding(tstate);
+ PyStatus status = _PyCodec_InitRegistry(tstate->interp);
+ if (_PyStatus_EXCEPTION(status)) {
+ return status;
+ }
+ status = init_fs_encoding(tstate);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
diff --git a/Python/codecs.c b/Python/codecs.c
index d8fe7b2..bed2453 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -11,6 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
#include "Python.h"
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
+#include "pycore_lock.h" // PyMutex
#include "pycore_pyerrors.h" // _PyErr_FormatNote()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
@@ -19,24 +20,10 @@ const char *Py_hexdigits = "0123456789abcdef";
/* --- Codec Registry ----------------------------------------------------- */
-/* Import the standard encodings package which will register the first
- codec search function.
-
- This is done in a lazy way so that the Unicode implementation does
- not downgrade startup time of scripts not needing it.
-
- ImportErrors are silently ignored by this function. Only one try is
- made.
-
-*/
-
-static int _PyCodecRegistry_Init(void); /* Forward */
-
int PyCodec_Register(PyObject *search_function)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
- if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
- goto onError;
+ assert(interp->codecs.initialized);
if (search_function == NULL) {
PyErr_BadArgument();
goto onError;
@@ -45,7 +32,14 @@ int PyCodec_Register(PyObject *search_function)
PyErr_SetString(PyExc_TypeError, "argument must be callable");
goto onError;
}
- return PyList_Append(interp->codec_search_path, search_function);
+#ifdef Py_GIL_DISABLED
+ PyMutex_Lock(&interp->codecs.search_path_mutex);
+#endif
+ int ret = PyList_Append(interp->codecs.search_path, search_function);
+#ifdef Py_GIL_DISABLED
+ PyMutex_Unlock(&interp->codecs.search_path_mutex);
+#endif
+ return ret;
onError:
return -1;
@@ -55,22 +49,34 @@ int
PyCodec_Unregister(PyObject *search_function)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
- PyObject *codec_search_path = interp->codec_search_path;
- /* Do nothing if codec_search_path is not created yet or was cleared. */
- if (codec_search_path == NULL) {
+ if (interp->codecs.initialized != 1) {
+ /* Do nothing if codecs state was cleared (only possible during
+ interpreter shutdown). */
return 0;
}
+ PyObject *codec_search_path = interp->codecs.search_path;
assert(PyList_CheckExact(codec_search_path));
- Py_ssize_t n = PyList_GET_SIZE(codec_search_path);
- for (Py_ssize_t i = 0; i < n; i++) {
- PyObject *item = PyList_GET_ITEM(codec_search_path, i);
+ for (Py_ssize_t i = 0; i < PyList_GET_SIZE(codec_search_path); i++) {
+#ifdef Py_GIL_DISABLED
+ PyMutex_Lock(&interp->codecs.search_path_mutex);
+#endif
+ PyObject *item = PyList_GetItemRef(codec_search_path, i);
+ int ret = 1;
if (item == search_function) {
- if (interp->codec_search_cache != NULL) {
- assert(PyDict_CheckExact(interp->codec_search_cache));
- PyDict_Clear(interp->codec_search_cache);
- }
- return PyList_SetSlice(codec_search_path, i, i+1, NULL);
+ // We hold a reference to the item, so its destructor can't run
+ // while we hold search_path_mutex.
+ ret = PyList_SetSlice(codec_search_path, i, i+1, NULL);
+ }
+#ifdef Py_GIL_DISABLED
+ PyMutex_Unlock(&interp->codecs.search_path_mutex);
+#endif
+ Py_DECREF(item);
+ if (ret != 1) {
+ assert(interp->codecs.search_cache != NULL);
+ assert(PyDict_CheckExact(interp->codecs.search_cache));
+ PyDict_Clear(interp->codecs.search_cache);
+ return ret;
}
}
return 0;
@@ -132,9 +138,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
}
PyInterpreterState *interp = _PyInterpreterState_GET();
- if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) {
- return NULL;
- }
+ assert(interp->codecs.initialized);
/* Convert the encoding to a normalized Python string: all
characters are converted to lower case, spaces and hyphens are
@@ -147,7 +151,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
/* First, try to lookup the name in the registry dictionary */
PyObject *result;
- if (PyDict_GetItemRef(interp->codec_search_cache, v, &result) < 0) {
+ if (PyDict_GetItemRef(interp->codecs.search_cache, v, &result) < 0) {
goto onError;
}
if (result != NULL) {
@@ -156,7 +160,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
}
/* Next, scan the search functions in order of registration */
- const Py_ssize_t len = PyList_Size(interp->codec_search_path);
+ const Py_ssize_t len = PyList_Size(interp->codecs.search_path);
if (len < 0)
goto onError;
if (len == 0) {
@@ -170,14 +174,15 @@ PyObject *_PyCodec_Lookup(const char *encoding)
for (i = 0; i < len; i++) {
PyObject *func;
- func = PyList_GetItem(interp->codec_search_path, i);
+ func = PyList_GetItemRef(interp->codecs.search_path, i);
if (func == NULL)
goto onError;
result = PyObject_CallOneArg(func, v);
+ Py_DECREF(func);
if (result == NULL)
goto onError;
if (result == Py_None) {
- Py_DECREF(result);
+ Py_CLEAR(result);
continue;
}
if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
@@ -188,7 +193,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
}
break;
}
- if (i == len) {
+ if (result == NULL) {
/* XXX Perhaps we should cache misses too ? */
PyErr_Format(PyExc_LookupError,
"unknown encoding: %s", encoding);
@@ -196,7 +201,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
}
/* Cache and return the result */
- if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
+ if (PyDict_SetItem(interp->codecs.search_cache, v, result) < 0) {
Py_DECREF(result);
goto onError;
}
@@ -600,13 +605,12 @@ PyObject *_PyCodec_DecodeText(PyObject *object,
int PyCodec_RegisterError(const char *name, PyObject *error)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
- if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
- return -1;
+ assert(interp->codecs.initialized);
if (!PyCallable_Check(error)) {
PyErr_SetString(PyExc_TypeError, "handler must be callable");
return -1;
}
- return PyDict_SetItemString(interp->codec_error_registry,
+ return PyDict_SetItemString(interp->codecs.error_registry,
name, error);
}
@@ -616,13 +620,12 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
PyObject *PyCodec_LookupError(const char *name)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
- if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
- return NULL;
+ assert(interp->codecs.initialized);
if (name==NULL)
name = "strict";
PyObject *handler;
- if (PyDict_GetItemStringRef(interp->codec_error_registry, name, &handler) < 0) {
+ if (PyDict_GetItemStringRef(interp->codecs.error_registry, name, &handler) < 0) {
return NULL;
}
if (handler == NULL) {
@@ -1375,7 +1378,8 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
return PyCodec_SurrogateEscapeErrors(exc);
}
-static int _PyCodecRegistry_Init(void)
+PyStatus
+_PyCodec_InitRegistry(PyInterpreterState *interp)
{
static struct {
const char *name;
@@ -1463,45 +1467,51 @@ static int _PyCodecRegistry_Init(void)
}
};
- PyInterpreterState *interp = _PyInterpreterState_GET();
- PyObject *mod;
-
- if (interp->codec_search_path != NULL)
- return 0;
-
- interp->codec_search_path = PyList_New(0);
- if (interp->codec_search_path == NULL) {
- return -1;
+ assert(interp->codecs.initialized == 0);
+ interp->codecs.search_path = PyList_New(0);
+ if (interp->codecs.search_path == NULL) {
+ return PyStatus_NoMemory();
}
-
- interp->codec_search_cache = PyDict_New();
- if (interp->codec_search_cache == NULL) {
- return -1;
+ interp->codecs.search_cache = PyDict_New();
+ if (interp->codecs.search_cache == NULL) {
+ return PyStatus_NoMemory();
}
-
- interp->codec_error_registry = PyDict_New();
- if (interp->codec_error_registry == NULL) {
- return -1;
+ interp->codecs.error_registry = PyDict_New();
+ if (interp->codecs.error_registry == NULL) {
+ return PyStatus_NoMemory();
}
-
for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
- if (!func) {
- return -1;
+ if (func == NULL) {
+ return PyStatus_NoMemory();
}
- int res = PyCodec_RegisterError(methods[i].name, func);
+ int res = PyDict_SetItemString(interp->codecs.error_registry,
+ methods[i].name, func);
Py_DECREF(func);
- if (res) {
- return -1;
+ if (res < 0) {
+ return PyStatus_Error("Failed to insert into codec error registry");
}
}
- mod = PyImport_ImportModule("encodings");
+ interp->codecs.initialized = 1;
+
+ // Importing `encodings' will call back into this module to register codec
+ // search functions, so this is done after everything else is initialized.
+ PyObject *mod = PyImport_ImportModule("encodings");
if (mod == NULL) {
- return -1;
+ return PyStatus_Error("Failed to import encodings module");
}
Py_DECREF(mod);
- interp->codecs_initialized = 1;
- return 0;
+
+ return PyStatus_Ok();
+}
+
+void
+_PyCodec_Fini(PyInterpreterState *interp)
+{
+ Py_CLEAR(interp->codecs.search_path);
+ Py_CLEAR(interp->codecs.search_cache);
+ Py_CLEAR(interp->codecs.error_registry);
+ interp->codecs.initialized = 0;
}
diff --git a/Python/pystate.c b/Python/pystate.c
index 3d6e76e..f442d87 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -843,9 +843,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
}
PyConfig_Clear(&interp->config);
- Py_CLEAR(interp->codec_search_path);
- Py_CLEAR(interp->codec_search_cache);
- Py_CLEAR(interp->codec_error_registry);
+ _PyCodec_Fini(interp);
assert(interp->imports.modules == NULL);
assert(interp->imports.modules_by_index == NULL);
diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv
index 87b695d..c4dbdcf 100644
--- a/Tools/c-analyzer/cpython/ignored.tsv
+++ b/Tools/c-analyzer/cpython/ignored.tsv
@@ -344,7 +344,7 @@ Python/ceval.c - _PyEval_BinaryOps -
Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS -
Python/codecs.c - Py_hexdigits -
Python/codecs.c - ucnhash_capi -
-Python/codecs.c _PyCodecRegistry_Init methods -
+Python/codecs.c _PyCodec_InitRegistry methods -
Python/compile.c - NO_LABEL -
Python/compile.c - NO_LOCATION -
Python/dynload_shlib.c - _PyImport_DynLoadFiletab -