From 17c61045c51512add61a9e75e9c7343cf4e4fb82 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 22 Oct 2021 17:20:03 -0600 Subject: bpo-45506: Normalize _PyPathConfig.stdlib_dir when calculated. (#29040) The recently added PyConfig.stdlib_dir was being set with ".." entries. When __file__ was added for from modules this caused a problem on out-of-tree builds. This PR fixes that by normalizing "stdlib_dir" when it is calculated in getpath.c. https://bugs.python.org/issue45506 --- Include/internal/pycore_fileutils.h | 3 ++ Lib/test/test_fileutils.py | 30 ++++++++++++ Lib/test/test_posixpath.py | 62 +++++++++++++++++------- Modules/_testinternalcapi.c | 24 ++++++++++ Modules/getpath.c | 46 +++++++++++++++--- Python/fileutils.c | 95 +++++++++++++++++++++++++++++++++++++ 6 files changed, 236 insertions(+), 24 deletions(-) create mode 100644 Lib/test/test_fileutils.py diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index ab436ae..d1caf9c 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -80,6 +80,9 @@ extern int _Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize); extern size_t _Py_find_basename(const wchar_t *filename); +PyAPI_FUNC(int) _Py_normalize_path(const wchar_t *path, + wchar_t *buf, const size_t buf_len); + // Macros to protect CRT calls against instant termination when passed an // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. diff --git a/Lib/test/test_fileutils.py b/Lib/test/test_fileutils.py new file mode 100644 index 0000000..45b3f32 --- /dev/null +++ b/Lib/test/test_fileutils.py @@ -0,0 +1,30 @@ +# Run tests for functions in Python/fileutils.c. + +import os +import os.path +import unittest +from test.support import import_helper + +# Skip this test if the _testcapi module isn't available. +_testcapi = import_helper.import_module('_testinternalcapi') + + +class PathTests(unittest.TestCase): + + def test_capi_normalize_path(self): + if os.name == 'nt': + raise unittest.SkipTest('Windows has its own helper for this') + else: + from .test_posixpath import PosixPathTest as posixdata + tests = posixdata.NORMPATH_CASES + for filename, expected in tests: + if not os.path.isabs(filename): + continue + with self.subTest(filename): + result = _testcapi.normalize_path(filename) + self.assertEqual(result, expected, + msg=f'input: {filename!r} expected output: {expected!r}') + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index 8d398ec..e4d8384 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -304,25 +304,51 @@ class PosixPathTest(unittest.TestCase): for path in ('~', '~/.local', '~vstinner/'): self.assertEqual(posixpath.expanduser(path), path) + NORMPATH_CASES = [ + ("", "."), + ("/", "/"), + ("/.", "/"), + ("/./", "/"), + ("/.//.", "/"), + ("/foo", "/foo"), + ("/foo/bar", "/foo/bar"), + ("//", "//"), + ("///", "/"), + ("///foo/.//bar//", "/foo/bar"), + ("///foo/.//bar//.//..//.//baz///", "/foo/baz"), + ("///..//./foo/.//bar", "/foo/bar"), + (".", "."), + (".//.", "."), + ("..", ".."), + ("../", ".."), + ("../foo", "../foo"), + ("../../foo", "../../foo"), + ("../foo/../bar", "../bar"), + ("../../foo/../bar/./baz/boom/..", "../../bar/baz"), + ("/..", "/"), + ("/..", "/"), + ("/../", "/"), + ("/..//", "/"), + ("//..", "//"), + ("/../foo", "/foo"), + ("/../../foo", "/foo"), + ("/../foo/../", "/"), + ("/../foo/../bar", "/bar"), + ("/../../foo/../bar/./baz/boom/..", "/bar/baz"), + ("/../../foo/../bar/./baz/boom/.", "/bar/baz/boom"), + ] + def test_normpath(self): - self.assertEqual(posixpath.normpath(""), ".") - self.assertEqual(posixpath.normpath("/"), "/") - self.assertEqual(posixpath.normpath("//"), "//") - self.assertEqual(posixpath.normpath("///"), "/") - self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar") - self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"), - "/foo/baz") - self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar") - - self.assertEqual(posixpath.normpath(b""), b".") - self.assertEqual(posixpath.normpath(b"/"), b"/") - self.assertEqual(posixpath.normpath(b"//"), b"//") - self.assertEqual(posixpath.normpath(b"///"), b"/") - self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar") - self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"), - b"/foo/baz") - self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"), - b"/foo/bar") + for path, expected in self.NORMPATH_CASES: + with self.subTest(path): + result = posixpath.normpath(path) + self.assertEqual(result, expected) + + path = path.encode('utf-8') + expected = expected.encode('utf-8') + with self.subTest(path, type=bytes): + result = posixpath.normpath(path) + self.assertEqual(result, expected) @skip_if_ABSTFN_contains_backslash def test_realpath_curdir(self): diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 1ca0606..1f205b8 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -14,12 +14,14 @@ #include "Python.h" #include "pycore_atomic_funcs.h" // _Py_atomic_int_get() #include "pycore_bitutils.h" // _Py_bswap32() +#include "pycore_fileutils.h" // _Py_normalize_path #include "pycore_gc.h" // PyGC_Head #include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_initconfig.h" // _Py_GetConfigsAsDict() #include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy() #include "pycore_pyerrors.h" // _Py_UTF8_Edit_Cost() #include "pycore_pystate.h" // _PyThreadState_GET() +#include "osdefs.h" // MAXPATHLEN static PyObject * @@ -366,6 +368,27 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args)) } +static PyObject * +normalize_path(PyObject *self, PyObject *filename) +{ + Py_ssize_t size = -1; + wchar_t *encoded = PyUnicode_AsWideCharString(filename, &size); + if (encoded == NULL) { + return NULL; + } + + wchar_t buf[MAXPATHLEN + 1]; + int res = _Py_normalize_path(encoded, buf, Py_ARRAY_LENGTH(buf)); + PyMem_Free(encoded); + if (res != 0) { + PyErr_SetString(PyExc_ValueError, "string too long"); + return NULL; + } + + return PyUnicode_FromWideChar(buf, -1); +} + + static PyMethodDef TestMethods[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -377,6 +400,7 @@ static PyMethodDef TestMethods[] = { {"set_config", test_set_config, METH_O}, {"test_atomic_funcs", test_atomic_funcs, METH_NOARGS}, {"test_edit_cost", test_edit_cost, METH_NOARGS}, + {"normalize_path", normalize_path, METH_O, NULL}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/getpath.c b/Modules/getpath.c index 1405023..4dbd502 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -520,6 +520,42 @@ search_for_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig, static PyStatus +calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig) +{ + // Note that, unlike calculate_set_prefix(), here we allow a negative + // prefix_found. That means the source tree Lib dir gets used. + if (!calculate->prefix_found) { + return _PyStatus_OK(); + } + PyStatus status; + wchar_t *prefix = calculate->prefix; + if (!_Py_isabs(prefix)) { + prefix = _PyMem_RawWcsdup(prefix); + if (prefix == NULL) { + return _PyStatus_NO_MEMORY(); + } + status = absolutize(&prefix); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } + wchar_t buf[MAXPATHLEN + 1]; + int res = _Py_normalize_path(prefix, buf, Py_ARRAY_LENGTH(buf)); + if (prefix != calculate->prefix) { + PyMem_RawFree(prefix); + } + if (res < 0) { + return PATHLEN_ERR(); + } + pathconfig->stdlib_dir = _PyMem_RawWcsdup(buf); + if (pathconfig->stdlib_dir == NULL) { + return _PyStatus_NO_MEMORY(); + } + return _PyStatus_OK(); +} + + +static PyStatus calculate_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig) { wchar_t prefix[MAXPATHLEN+1]; @@ -1494,12 +1530,10 @@ calculate_path(PyCalculatePath *calculate, _PyPathConfig *pathconfig) } if (pathconfig->stdlib_dir == NULL) { - if (calculate->prefix_found) { - /* This must be done *before* calculate_set_prefix() is called. */ - pathconfig->stdlib_dir = _PyMem_RawWcsdup(calculate->prefix); - if (pathconfig->stdlib_dir == NULL) { - return _PyStatus_NO_MEMORY(); - } + /* This must be done *before* calculate_set_prefix() is called. */ + status = calculate_set_stdlib_dir(calculate, pathconfig); + if (_PyStatus_EXCEPTION(status)) { + return status; } } diff --git a/Python/fileutils.c b/Python/fileutils.c index 3d8f3a4..ac0046c 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2181,6 +2181,101 @@ _Py_find_basename(const wchar_t *filename) } +/* Remove navigation elements such as "." and "..". + + This is mostly a C implementation of posixpath.normpath(). + Return 0 on success. Return -1 if "orig" is too big for the buffer. */ +int +_Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len) +{ + assert(path && *path != L'\0'); + assert(*path == SEP); // an absolute path + if (wcslen(path) + 1 >= buf_len) { + return -1; + } + + int dots = -1; + int check_leading = 1; + const wchar_t *buf_start = buf; + wchar_t *buf_next = buf; + // The resulting filename will never be longer than path. + for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) { + wchar_t c = *remainder; + buf_next[0] = c; + buf_next++; + if (c == SEP) { + assert(dots <= 2); + if (dots == 2) { + // Turn "/x/y/../z" into "/x/z". + buf_next -= 4; // "/../" + assert(*buf_next == SEP); + // We cap it off at the root, so "/../spam" becomes "/spam". + if (buf_next == buf_start) { + buf_next++; + } + else { + // Move to the previous SEP in the buffer. + while (*(buf_next - 1) != SEP) { + assert(buf_next != buf_start); + buf_next--; + } + } + } + else if (dots == 1) { + // Turn "/./" into "/". + buf_next -= 2; // "./" + assert(*(buf_next - 1) == SEP); + } + else if (dots == 0) { + // Turn "//" into "/". + buf_next--; + assert(*(buf_next - 1) == SEP); + if (check_leading) { + if (buf_next - 1 == buf && *(remainder + 1) != SEP) { + // Leave a leading "//" alone, unless "///...". + buf_next++; + buf_start++; + } + check_leading = 0; + } + } + dots = 0; + } + else { + check_leading = 0; + if (dots >= 0) { + if (c == L'.' && dots < 2) { + dots++; + } + else { + dots = -1; + } + } + } + } + if (dots >= 0) { + // Strip any trailing dots and trailing slash. + buf_next -= dots + 1; // "/" or "/." or "/.." + assert(*buf_next == SEP); + if (buf_next == buf_start) { + // Leave the leading slash for root. + buf_next++; + } + else { + if (dots == 2) { + // Move to the previous SEP in the buffer. + do { + assert(buf_next != buf_start); + buf_next--; + } while (*(buf_next) != SEP); + } + } + } + *buf_next = L'\0'; + return 0; +} + + /* Get the current directory. buflen is the buffer size in wide characters including the null character. Decode the path from the locale encoding. -- cgit v0.12