From 56c1f6d7edad454f382d3ecb8cdcff24ac898a50 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Wed, 28 Jul 2021 15:28:14 +0100 Subject: bpo-27827: identify a greater range of reserved filename on Windows. (GH-26698) `pathlib.PureWindowsPath.is_reserved()` now identifies as reserved filenames with trailing spaces or colons. Co-authored-by: Barney Gale Co-authored-by: Eryk Sun --- Lib/pathlib.py | 32 ++++++++++++++-------- Lib/test/test_pathlib.py | 32 ++++++++++++++++------ .../2021-06-12-21-25-35.bpo-27827.TMWh1i.rst | 2 ++ 3 files changed, 47 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-06-12-21-25-35.bpo-27827.TMWh1i.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8e6eb48..621fba0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -124,16 +124,25 @@ class _WindowsFlavour(_Flavour): ext_namespace_prefix = '\\\\?\\' reserved_names = ( - {'CON', 'PRN', 'AUX', 'NUL'} | - {'COM%d' % i for i in range(1, 10)} | - {'LPT%d' % i for i in range(1, 10)} + {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | + {'COM%s' % c for c in '123456789\xb9\xb2\xb3'} | + {'LPT%s' % c for c in '123456789\xb9\xb2\xb3'} ) # Interesting findings about extended paths: - # - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported - # but '\\?\c:/a' is not - # - extended paths are always absolute; "relative" extended paths will - # fail. + # * '\\?\c:\a' is an extended path, which bypasses normal Windows API + # path processing. Thus relative paths are not resolved and slash is not + # translated to backslash. It has the native NT path limit of 32767 + # characters, but a bit less after resolving device symbolic links, + # such as '\??\C:' => '\Device\HarddiskVolume2'. + # * '\\?\c:/a' looks for a device named 'C:/a' because slash is a + # regular name character in the object namespace. + # * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems. + # The only path separator at the filesystem level is backslash. + # * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and + # thus limited to MAX_PATH. + # * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH, + # even with the '\\?\' prefix. def splitroot(self, part, sep=sep): first = part[0:1] @@ -195,15 +204,16 @@ class _WindowsFlavour(_Flavour): def is_reserved(self, parts): # NOTE: the rules for reserved names seem somewhat complicated - # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). - # We err on the side of caution and return True for paths which are - # not considered reserved by Windows. + # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not + # exist). We err on the side of caution and return True for paths + # which are not considered reserved by Windows. if not parts: return False if parts[0].startswith('\\\\'): # UNC paths are never reserved return False - return parts[-1].partition('.')[0].upper() in self.reserved_names + name = parts[-1].partition('.')[0].partition(':')[0].rstrip(' ') + return name.upper() in self.reserved_names def make_uri(self, path): # Under Windows, file URIs use the UTF-8 encoding. diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 54b7977..e716f4d 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1282,19 +1282,35 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase): self.assertIs(False, P('').is_reserved()) self.assertIs(False, P('/').is_reserved()) self.assertIs(False, P('/foo/bar').is_reserved()) + # UNC paths are never reserved. + self.assertIs(False, P('//my/share/nul/con/aux').is_reserved()) + # Case-insenstive DOS-device names are reserved. + self.assertIs(True, P('nul').is_reserved()) + self.assertIs(True, P('aux').is_reserved()) + self.assertIs(True, P('prn').is_reserved()) self.assertIs(True, P('con').is_reserved()) - self.assertIs(True, P('NUL').is_reserved()) + self.assertIs(True, P('conin$').is_reserved()) + self.assertIs(True, P('conout$').is_reserved()) + # COM/LPT + 1-9 or + superscript 1-3 are reserved. + self.assertIs(True, P('COM1').is_reserved()) + self.assertIs(True, P('LPT9').is_reserved()) + self.assertIs(True, P('com\xb9').is_reserved()) + self.assertIs(True, P('com\xb2').is_reserved()) + self.assertIs(True, P('lpt\xb3').is_reserved()) + # DOS-device name mataching ignores characters after a dot or + # a colon and also ignores trailing spaces. self.assertIs(True, P('NUL.txt').is_reserved()) - self.assertIs(True, P('com1').is_reserved()) - self.assertIs(True, P('com9.bar').is_reserved()) + self.assertIs(True, P('PRN ').is_reserved()) + self.assertIs(True, P('AUX .txt').is_reserved()) + self.assertIs(True, P('COM1:bar').is_reserved()) + self.assertIs(True, P('LPT9 :bar').is_reserved()) + # DOS-device names are only matched at the beginning + # of a path component. self.assertIs(False, P('bar.com9').is_reserved()) - self.assertIs(True, P('lpt1').is_reserved()) - self.assertIs(True, P('lpt9.bar').is_reserved()) self.assertIs(False, P('bar.lpt9').is_reserved()) - # Only the last component matters. + # Only the last path component matters. + self.assertIs(True, P('c:/baz/con/NUL').is_reserved()) self.assertIs(False, P('c:/NUL/con/baz').is_reserved()) - # UNC paths are never reserved. - self.assertIs(False, P('//my/share/nul/con/aux').is_reserved()) class PurePathTest(_BasePurePathTest, unittest.TestCase): cls = pathlib.PurePath diff --git a/Misc/NEWS.d/next/Library/2021-06-12-21-25-35.bpo-27827.TMWh1i.rst b/Misc/NEWS.d/next/Library/2021-06-12-21-25-35.bpo-27827.TMWh1i.rst new file mode 100644 index 0000000..1b8cc04 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-06-12-21-25-35.bpo-27827.TMWh1i.rst @@ -0,0 +1,2 @@ +:meth:`pathlib.PureWindowsPath.is_reserved` now identifies a greater range of +reserved filenames, including those with trailing spaces or colons. -- cgit v0.12