diff options
author | Miss Skeleton (bot) <31488909+miss-islington@users.noreply.github.com> | 2020-10-15 02:25:45 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-15 02:25:45 (GMT) |
commit | f07448bef48d645c8cee862b1f25a99003a6140e (patch) | |
tree | 5bc7e3eed4fbf6afc4f3f8b838df9f6559a9b7b1 | |
parent | 69f040c0009dc4ca72f487446e74fdb2f91c95dc (diff) | |
download | cpython-f07448bef48d645c8cee862b1f25a99003a6140e.zip cpython-f07448bef48d645c8cee862b1f25a99003a6140e.tar.gz cpython-f07448bef48d645c8cee862b1f25a99003a6140e.tar.bz2 |
bpo-41894: Fix UnicodeDecodeError while loading native module (GH-22466)
When running in a non-UTF-8 locale, if an error occurs while importing a
native Python module (say because a dependent share library is missing),
the error message string returned may contain non-ASCII code points
causing a UnicodeDecodeError.
PyUnicode_DecodeFSDefault is used for buffers which may contain
filesystem paths. For consistency with os.strerror(),
PyUnicode_DecodeLocale is used for buffers which contain system error
messages. While the shortname parameter is always encoded in ASCII
according to PEP 489, it is left decoded using PyUnicode_FromString to
minimize the changes and since it should not affect the decoding (albeit
_potentially_ slower).
In dynload_hpux, since the error buffer contains a message generated
from a static ASCII string and the module filesystem path,
PyUnicode_DecodeFSDefault is used instead of PyUnicode_DecodeLocale as
is used elsewhere.
* bpo-41894: Fix bugs in dynload error msg handling
For both dynload_aix and dynload_hpux, properly handle the possibility
that decoding strings may return NULL and when such an error happens,
properly decrement any previously decoded strings and return early.
In addition, in dynload_aix, ensure that we pass the decoded string
*object* pathname_ob to PyErr_SetImportError instead of the original
pathname buffer.
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
(cherry picked from commit 2d2af320d94afc6561e8f8adf174c9d3fd9065bc)
Co-authored-by: Kevin Adler <kadler@us.ibm.com>
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2020-10-02-11-35-33.bpo-41894.ffmtOt.rst | 3 | ||||
-rw-r--r-- | Python/dynload_aix.c | 14 | ||||
-rw-r--r-- | Python/dynload_hpux.c | 15 | ||||
-rw-r--r-- | Python/dynload_shlib.c | 4 |
4 files changed, 28 insertions, 8 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-02-11-35-33.bpo-41894.ffmtOt.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-02-11-35-33.bpo-41894.ffmtOt.rst new file mode 100644 index 0000000..571f5da --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-02-11-35-33.bpo-41894.ffmtOt.rst @@ -0,0 +1,3 @@ +When loading a native module and a load failure occurs, prevent a possible +UnicodeDecodeError when not running in a UTF-8 locale by decoding the load +error message using the current locale's encoding. diff --git a/Python/dynload_aix.c b/Python/dynload_aix.c index 684f10a..97f7698 100644 --- a/Python/dynload_aix.c +++ b/Python/dynload_aix.c @@ -144,10 +144,16 @@ aix_loaderror(const char *pathname) ERRBUF_APPEND(message[i]); ERRBUF_APPEND("\n"); } - errbuf[strlen(errbuf)-1] = '\0'; /* trim off last newline */ - pathname_ob = PyUnicode_FromString(pathname); - errbuf_ob = PyUnicode_FromString(errbuf); - PyErr_SetImportError(errbuf_ob, NULL, pathname); + /* Subtract 1 from the length to trim off trailing newline */ + errbuf_ob = PyUnicode_DecodeLocaleAndSize(errbuf, strlen(errbuf)-1, "surrogateescape"); + if (errbuf_ob == NULL) + return; + pathname_ob = PyUnicode_DecodeFSDefault(pathname); + if (pathname_ob == NULL) { + Py_DECREF(errbuf_ob); + return; + } + PyErr_SetImportError(errbuf_ob, NULL, pathname_ob); Py_DECREF(pathname_ob); Py_DECREF(errbuf_ob); return; diff --git a/Python/dynload_hpux.c b/Python/dynload_hpux.c index 4b964a6..e36d608 100644 --- a/Python/dynload_hpux.c +++ b/Python/dynload_hpux.c @@ -36,9 +36,20 @@ dl_funcptr _PyImport_FindSharedFuncptr(const char *prefix, char buf[256]; PyOS_snprintf(buf, sizeof(buf), "Failed to load %.200s", pathname); - PyObject *buf_ob = PyUnicode_FromString(buf); + PyObject *buf_ob = PyUnicode_DecodeFSDefault(buf); + if (buf_ob == NULL) + return NULL; PyObject *shortname_ob = PyUnicode_FromString(shortname); - PyObject *pathname_ob = PyUnicode_FromString(pathname); + if (shortname_ob == NULL) { + Py_DECREF(buf_ob); + return NULL; + } + PyObject *pathname_ob = PyUnicode_DecodeFSDefault(pathname); + if (pathname_ob == NULL) { + Py_DECREF(buf_ob); + Py_DECREF(shortname_ob); + return NULL; + } PyErr_SetImportError(buf_ob, shortname_ob, pathname_ob); Py_DECREF(buf_ob); Py_DECREF(shortname_ob); diff --git a/Python/dynload_shlib.c b/Python/dynload_shlib.c index 082154d..2382889 100644 --- a/Python/dynload_shlib.c +++ b/Python/dynload_shlib.c @@ -106,7 +106,7 @@ _PyImport_FindSharedFuncptr(const char *prefix, const char *error = dlerror(); if (error == NULL) error = "unknown dlopen() error"; - error_ob = PyUnicode_FromString(error); + error_ob = PyUnicode_DecodeLocale(error, "surrogateescape"); if (error_ob == NULL) return NULL; mod_name = PyUnicode_FromString(shortname); @@ -114,7 +114,7 @@ _PyImport_FindSharedFuncptr(const char *prefix, Py_DECREF(error_ob); return NULL; } - path = PyUnicode_FromString(pathname); + path = PyUnicode_DecodeFSDefault(pathname); if (path == NULL) { Py_DECREF(error_ob); Py_DECREF(mod_name); |