summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2022-09-30 12:58:30 (GMT)
committerGitHub <noreply@github.com>2022-09-30 12:58:30 (GMT)
commit9f2f1dd131b912e224cd0269adde8879799686c4 (patch)
treeb9976f4716a607ec95f03d02bff872b0e33e710b /Modules
parentff54dd96cbe589635ed95c8b5b26bc768166b07d (diff)
downloadcpython-9f2f1dd131b912e224cd0269adde8879799686c4.zip
cpython-9f2f1dd131b912e224cd0269adde8879799686c4.tar.gz
cpython-9f2f1dd131b912e224cd0269adde8879799686c4.tar.bz2
gh-94526: getpath_dirname() no longer encodes the path (#97645)
Fix the Python path configuration used to initialized sys.path at Python startup. Paths are no longer encoded to UTF-8/strict to avoid encoding errors if it contains surrogate characters (bytes paths are decoded with the surrogateescape error handler). getpath_basename() and getpath_dirname() functions no longer encode the path to UTF-8/strict, but work directly on Unicode strings. These functions now use PyUnicode_FindChar() and PyUnicode_Substring() on the Unicode path, rather than strrchr() on the encoded bytes string.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/getpath.c23
1 files changed, 14 insertions, 9 deletions
diff --git a/Modules/getpath.c b/Modules/getpath.c
index 9447988..be704ad 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -82,27 +82,32 @@ getpath_abspath(PyObject *Py_UNUSED(self), PyObject *args)
static PyObject *
getpath_basename(PyObject *Py_UNUSED(self), PyObject *args)
{
- const char *path;
- if (!PyArg_ParseTuple(args, "s", &path)) {
+ PyObject *path;
+ if (!PyArg_ParseTuple(args, "U", &path)) {
return NULL;
}
- const char *name = strrchr(path, SEP);
- return PyUnicode_FromString(name ? name + 1 : path);
+ Py_ssize_t end = PyUnicode_GET_LENGTH(path);
+ Py_ssize_t pos = PyUnicode_FindChar(path, SEP, 0, end, -1);
+ if (pos < 0) {
+ return Py_NewRef(path);
+ }
+ return PyUnicode_Substring(path, pos + 1, end);
}
static PyObject *
getpath_dirname(PyObject *Py_UNUSED(self), PyObject *args)
{
- const char *path;
- if (!PyArg_ParseTuple(args, "s", &path)) {
+ PyObject *path;
+ if (!PyArg_ParseTuple(args, "U", &path)) {
return NULL;
}
- const char *name = strrchr(path, SEP);
- if (!name) {
+ Py_ssize_t end = PyUnicode_GET_LENGTH(path);
+ Py_ssize_t pos = PyUnicode_FindChar(path, SEP, 0, end, -1);
+ if (pos < 0) {
return PyUnicode_FromStringAndSize(NULL, 0);
}
- return PyUnicode_FromStringAndSize(path, (name - path));
+ return PyUnicode_Substring(path, 0, pos);
}