diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2022-09-30 13:38:41 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-30 13:38:41 (GMT) |
commit | 6537bc9a49d31a3dc7f5df1284285bafdd0f56ef (patch) | |
tree | a62bf9d9587b5c3ceebff37b0da326bf87be6764 /Modules | |
parent | 0fbee30f7166a5b75c738e27e506e2f125bad479 (diff) | |
download | cpython-6537bc9a49d31a3dc7f5df1284285bafdd0f56ef.zip cpython-6537bc9a49d31a3dc7f5df1284285bafdd0f56ef.tar.gz cpython-6537bc9a49d31a3dc7f5df1284285bafdd0f56ef.tar.bz2 |
gh-94526: getpath_dirname() no longer encodes the path (GH-97645)
Fix the Python path configuration used to initialized sys.path at
Python startup. Paths are no longer encoded to UTF-8/strict to avoid
encoding errors if it contains surrogate characters (bytes paths are
decoded with the surrogateescape error handler).
getpath_basename() and getpath_dirname() functions no longer encode
the path to UTF-8/strict, but work directly on Unicode strings. These
functions now use PyUnicode_FindChar() and PyUnicode_Substring() on
the Unicode path, rather than strrchr() on the encoded bytes string.
(cherry picked from commit 9f2f1dd131b912e224cd0269adde8879799686c4)
Co-authored-by: Victor Stinner <vstinner@python.org>
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/getpath.c | 23 |
1 files changed, 14 insertions, 9 deletions
diff --git a/Modules/getpath.c b/Modules/getpath.c index 9447988..be704ad 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -82,27 +82,32 @@ getpath_abspath(PyObject *Py_UNUSED(self), PyObject *args) static PyObject * getpath_basename(PyObject *Py_UNUSED(self), PyObject *args) { - const char *path; - if (!PyArg_ParseTuple(args, "s", &path)) { + PyObject *path; + if (!PyArg_ParseTuple(args, "U", &path)) { return NULL; } - const char *name = strrchr(path, SEP); - return PyUnicode_FromString(name ? name + 1 : path); + Py_ssize_t end = PyUnicode_GET_LENGTH(path); + Py_ssize_t pos = PyUnicode_FindChar(path, SEP, 0, end, -1); + if (pos < 0) { + return Py_NewRef(path); + } + return PyUnicode_Substring(path, pos + 1, end); } static PyObject * getpath_dirname(PyObject *Py_UNUSED(self), PyObject *args) { - const char *path; - if (!PyArg_ParseTuple(args, "s", &path)) { + PyObject *path; + if (!PyArg_ParseTuple(args, "U", &path)) { return NULL; } - const char *name = strrchr(path, SEP); - if (!name) { + Py_ssize_t end = PyUnicode_GET_LENGTH(path); + Py_ssize_t pos = PyUnicode_FindChar(path, SEP, 0, end, -1); + if (pos < 0) { return PyUnicode_FromStringAndSize(NULL, 0); } - return PyUnicode_FromStringAndSize(path, (name - path)); + return PyUnicode_Substring(path, 0, pos); } |