From 8789add99164177f29a8cd319a834187c65ab16c Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
 <31488909+miss-islington@users.noreply.github.com>
Date: Wed, 28 Jul 2021 08:01:47 -0700
Subject: bpo-27827: identify a greater range of reserved filename on Windows.
 (GH-26698) (GH-27421)

`pathlib.PureWindowsPath.is_reserved()` now identifies as reserved
filenames with trailing spaces or colons.

Co-authored-by: Barney Gale <barney.gale@foundry.com>
Co-authored-by: Eryk Sun <eryksun@gmail.com>
(cherry picked from commit 56c1f6d7edad454f382d3ecb8cdcff24ac898a50)
---
 Lib/pathlib.py                                     | 32 ++++++++++++++--------
 Lib/test/test_pathlib.py                           | 32 ++++++++++++++++------
 .../2021-06-12-21-25-35.bpo-27827.TMWh1i.rst       |  2 ++
 3 files changed, 47 insertions(+), 19 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2021-06-12-21-25-35.bpo-27827.TMWh1i.rst

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 8e6eb48..621fba0 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -124,16 +124,25 @@ class _WindowsFlavour(_Flavour):
     ext_namespace_prefix = '\\\\?\\'
 
     reserved_names = (
-        {'CON', 'PRN', 'AUX', 'NUL'} |
-        {'COM%d' % i for i in range(1, 10)} |
-        {'LPT%d' % i for i in range(1, 10)}
+        {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
+        {'COM%s' % c for c in '123456789\xb9\xb2\xb3'} |
+        {'LPT%s' % c for c in '123456789\xb9\xb2\xb3'}
         )
 
     # Interesting findings about extended paths:
-    # - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported
-    #   but '\\?\c:/a' is not
-    # - extended paths are always absolute; "relative" extended paths will
-    #   fail.
+    # * '\\?\c:\a' is an extended path, which bypasses normal Windows API
+    #   path processing. Thus relative paths are not resolved and slash is not
+    #   translated to backslash. It has the native NT path limit of 32767
+    #   characters, but a bit less after resolving device symbolic links,
+    #   such as '\??\C:' => '\Device\HarddiskVolume2'.
+    # * '\\?\c:/a' looks for a device named 'C:/a' because slash is a
+    #   regular name character in the object namespace.
+    # * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems.
+    #   The only path separator at the filesystem level is backslash.
+    # * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and
+    #   thus limited to MAX_PATH.
+    # * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH,
+    #   even with the '\\?\' prefix.
 
     def splitroot(self, part, sep=sep):
         first = part[0:1]
@@ -195,15 +204,16 @@ class _WindowsFlavour(_Flavour):
 
     def is_reserved(self, parts):
         # NOTE: the rules for reserved names seem somewhat complicated
-        # (e.g. r"..\NUL" is reserved but not r"foo\NUL").
-        # We err on the side of caution and return True for paths which are
-        # not considered reserved by Windows.
+        # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
+        # exist). We err on the side of caution and return True for paths
+        # which are not considered reserved by Windows.
         if not parts:
             return False
         if parts[0].startswith('\\\\'):
             # UNC paths are never reserved
             return False
-        return parts[-1].partition('.')[0].upper() in self.reserved_names
+        name = parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
+        return name.upper() in self.reserved_names
 
     def make_uri(self, path):
         # Under Windows, file URIs use the UTF-8 encoding.
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 54b7977..e716f4d 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1282,19 +1282,35 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase):
         self.assertIs(False, P('').is_reserved())
         self.assertIs(False, P('/').is_reserved())
         self.assertIs(False, P('/foo/bar').is_reserved())
+        # UNC paths are never reserved.
+        self.assertIs(False, P('//my/share/nul/con/aux').is_reserved())
+        # Case-insenstive DOS-device names are reserved.
+        self.assertIs(True, P('nul').is_reserved())
+        self.assertIs(True, P('aux').is_reserved())
+        self.assertIs(True, P('prn').is_reserved())
         self.assertIs(True, P('con').is_reserved())
-        self.assertIs(True, P('NUL').is_reserved())
+        self.assertIs(True, P('conin$').is_reserved())
+        self.assertIs(True, P('conout$').is_reserved())
+        # COM/LPT + 1-9 or + superscript 1-3 are reserved.
+        self.assertIs(True, P('COM1').is_reserved())
+        self.assertIs(True, P('LPT9').is_reserved())
+        self.assertIs(True, P('com\xb9').is_reserved())
+        self.assertIs(True, P('com\xb2').is_reserved())
+        self.assertIs(True, P('lpt\xb3').is_reserved())
+        # DOS-device name mataching ignores characters after a dot or
+        # a colon and also ignores trailing spaces.
         self.assertIs(True, P('NUL.txt').is_reserved())
-        self.assertIs(True, P('com1').is_reserved())
-        self.assertIs(True, P('com9.bar').is_reserved())
+        self.assertIs(True, P('PRN  ').is_reserved())
+        self.assertIs(True, P('AUX  .txt').is_reserved())
+        self.assertIs(True, P('COM1:bar').is_reserved())
+        self.assertIs(True, P('LPT9   :bar').is_reserved())
+        # DOS-device names are only matched at the beginning
+        # of a path component.
         self.assertIs(False, P('bar.com9').is_reserved())
-        self.assertIs(True, P('lpt1').is_reserved())
-        self.assertIs(True, P('lpt9.bar').is_reserved())
         self.assertIs(False, P('bar.lpt9').is_reserved())
-        # Only the last component matters.
+        # Only the last path component matters.
+        self.assertIs(True, P('c:/baz/con/NUL').is_reserved())
         self.assertIs(False, P('c:/NUL/con/baz').is_reserved())
-        # UNC paths are never reserved.
-        self.assertIs(False, P('//my/share/nul/con/aux').is_reserved())
 
 class PurePathTest(_BasePurePathTest, unittest.TestCase):
     cls = pathlib.PurePath
diff --git a/Misc/NEWS.d/next/Library/2021-06-12-21-25-35.bpo-27827.TMWh1i.rst b/Misc/NEWS.d/next/Library/2021-06-12-21-25-35.bpo-27827.TMWh1i.rst
new file mode 100644
index 0000000..1b8cc04
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-06-12-21-25-35.bpo-27827.TMWh1i.rst
@@ -0,0 +1,2 @@
+:meth:`pathlib.PureWindowsPath.is_reserved` now identifies a greater range of
+reserved filenames, including those with trailing spaces or colons.
-- 
cgit v0.12