From df32691e6fa379aca3b7d1f1081d48c6ddd964ba Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 10 Feb 2013 12:22:07 +0200 Subject: Issue #6975: os.path.realpath() now correctly resolves multiple nested symlinks on POSIX platforms. --- Lib/posixpath.py | 90 +++++++++++++++++++++++++--------------------- Lib/test/test_posixpath.py | 55 ++++++++++++++++++++++++++++ Misc/NEWS | 3 ++ 3 files changed, 107 insertions(+), 41 deletions(-) diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 2e3625b..7e9dd85 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -390,51 +390,59 @@ def abspath(path): def realpath(filename): """Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path.""" - if isinstance(filename, bytes): + path, ok = _joinrealpath(filename[:0], filename, {}) + return abspath(path) + +# Join two paths, normalizing ang eliminating any symbolic links +# encountered in the second path. +def _joinrealpath(path, rest, seen): + if isinstance(path, bytes): sep = b'/' - empty = b'' + curdir = b'.' + pardir = b'..' else: sep = '/' - empty = '' - if isabs(filename): - bits = [sep] + filename.split(sep)[1:] - else: - bits = [empty] + filename.split(sep) - - for i in range(2, len(bits)+1): - component = join(*bits[0:i]) - # Resolve symbolic links. - if islink(component): - resolved = _resolve_link(component) - if resolved is None: - # Infinite loop -- return original component + rest of the path - return abspath(join(*([component] + bits[i:]))) + curdir = '.' + pardir = '..' + + if isabs(rest): + rest = rest[1:] + path = sep + + while rest: + name, _, rest = rest.partition(sep) + if not name or name == curdir: + # current dir + continue + if name == pardir: + # parent dir + if path: + path = dirname(path) else: - newpath = join(*([resolved] + bits[i:])) - return realpath(newpath) - - return abspath(filename) - - -def _resolve_link(path): - """Internal helper function. Takes a path and follows symlinks - until we either arrive at something that isn't a symlink, or - encounter a path we've seen before (meaning that there's a loop). - """ - paths_seen = set() - while islink(path): - if path in paths_seen: - # Already seen this path, so we must have a symlink loop - return None - paths_seen.add(path) - # Resolve where the link points to - resolved = os.readlink(path) - if not isabs(resolved): - dir = dirname(path) - path = normpath(join(dir, resolved)) - else: - path = normpath(resolved) - return path + path = name + continue + newpath = join(path, name) + if not islink(newpath): + path = newpath + continue + # Resolve the symbolic link + if newpath in seen: + # Already seen this path + path = seen[newpath] + if path is not None: + # use cached value + continue + # The symlink is not resolved, so we must have a symlink loop. + # Return already resolved part + rest of the path unchanged. + return join(newpath, rest), False + seen[newpath] = None # not resolved symlink + path, ok = _joinrealpath(path, os.readlink(newpath), seen) + if not ok: + return join(path, rest), False + seen[newpath] = path # resolved symlink + + return path, True + supports_unicode_filenames = (sys.platform == 'darwin') diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index 599c85a..430a41c 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -375,6 +375,22 @@ class PosixPathTest(unittest.TestCase): self.assertEqual(realpath(ABSTFN+"1"), ABSTFN+"1") self.assertEqual(realpath(ABSTFN+"2"), ABSTFN+"2") + self.assertEqual(realpath(ABSTFN+"1/x"), ABSTFN+"1/x") + self.assertEqual(realpath(ABSTFN+"1/.."), dirname(ABSTFN)) + self.assertEqual(realpath(ABSTFN+"1/../x"), dirname(ABSTFN) + "/x") + os.symlink(ABSTFN+"x", ABSTFN+"y") + self.assertEqual(realpath(ABSTFN+"1/../" + basename(ABSTFN) + "y"), + ABSTFN + "y") + self.assertEqual(realpath(ABSTFN+"1/../" + basename(ABSTFN) + "1"), + ABSTFN + "1") + + os.symlink(basename(ABSTFN) + "a/b", ABSTFN+"a") + self.assertEqual(realpath(ABSTFN+"a"), ABSTFN+"a/b") + + os.symlink("../" + basename(dirname(ABSTFN)) + "/" + + basename(ABSTFN) + "c", ABSTFN+"c") + self.assertEqual(realpath(ABSTFN+"c"), ABSTFN+"c") + # Test using relative path as well. os.chdir(dirname(ABSTFN)) self.assertEqual(realpath(basename(ABSTFN)), ABSTFN) @@ -383,6 +399,45 @@ class PosixPathTest(unittest.TestCase): support.unlink(ABSTFN) support.unlink(ABSTFN+"1") support.unlink(ABSTFN+"2") + support.unlink(ABSTFN+"y") + support.unlink(ABSTFN+"c") + + @unittest.skipUnless(hasattr(os, "symlink"), + "Missing symlink implementation") + @skip_if_ABSTFN_contains_backslash + def test_realpath_repeated_indirect_symlinks(self): + # Issue #6975. + try: + os.mkdir(ABSTFN) + os.symlink('../' + basename(ABSTFN), ABSTFN + '/self') + os.symlink('self/self/self', ABSTFN + '/link') + self.assertEqual(realpath(ABSTFN + '/link'), ABSTFN) + finally: + support.unlink(ABSTFN + '/self') + support.unlink(ABSTFN + '/link') + safe_rmdir(ABSTFN) + + @unittest.skipUnless(hasattr(os, "symlink"), + "Missing symlink implementation") + @skip_if_ABSTFN_contains_backslash + def test_realpath_deep_recursion(self): + depth = 10 + old_path = abspath('.') + try: + os.mkdir(ABSTFN) + for i in range(depth): + os.symlink('/'.join(['%d' % i] * 10), ABSTFN + '/%d' % (i + 1)) + os.symlink('.', ABSTFN + '/0') + self.assertEqual(realpath(ABSTFN + '/%d' % depth), ABSTFN) + + # Test using relative path as well. + os.chdir(ABSTFN) + self.assertEqual(realpath('%d' % depth), ABSTFN) + finally: + os.chdir(old_path) + for i in range(depth + 1): + support.unlink(ABSTFN + '/%d' % i) + safe_rmdir(ABSTFN) @unittest.skipUnless(hasattr(os, "symlink"), "Missing symlink implementation") diff --git a/Misc/NEWS b/Misc/NEWS index 8fe5e16..cef6edb 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -218,6 +218,9 @@ Core and Builtins Library ------- +- Issue #6975: os.path.realpath() now correctly resolves multiple nested + symlinks on POSIX platforms. + - Issue #17156: pygettext.py now uses an encoding of source file and correctly writes and escapes non-ascii characters. -- cgit v0.12